jsocket.cpp 176 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. // New IPv6 Version - IN PROGRESS
  14. /*
  15. TBD IPv6 connect
  16. multicast
  17. look at loopback
  18. */
  19. #include "platform.h"
  20. #ifdef _VER_C5
  21. #include <clwclib.h>
  22. #else
  23. #include "platform.h"
  24. #include <stdio.h>
  25. #endif
  26. #include <algorithm>
  27. #ifdef _WIN32
  28. #define WIN32_LEAN_AND_MEAN
  29. #include <windows.h>
  30. #include <winsock2.h>
  31. #include <ws2tcpip.h>
  32. #include <signal.h>
  33. #else
  34. #include <sys/types.h>
  35. #include <sys/socket.h>
  36. #include <netinet/tcp.h>
  37. #include <netinet/in.h>
  38. #include <arpa/inet.h>
  39. #include <stddef.h>
  40. #include <errno.h>
  41. #include <net/if.h>
  42. #endif
  43. #include <limits.h>
  44. #include "jmutex.hpp"
  45. #include "jsocket.hpp"
  46. #include "jexcept.hpp"
  47. #include "jio.hpp"
  48. #include "jmisc.hpp"
  49. #include "jthread.hpp"
  50. #include "jqueue.tpp"
  51. #include "jtime.hpp"
  52. #include "jprop.hpp"
  53. #include "jregexp.hpp"
  54. #include "jdebug.hpp"
  55. #include "build-config.h"
  56. // epoll only with linux
  57. #ifndef __linux__
  58. # undef _HAS_EPOLL_SUPPORT
  59. #else
  60. # define _HAS_EPOLL_SUPPORT
  61. # ifdef _HAS_EPOLL_SUPPORT
  62. # include <unistd.h>
  63. # include <sys/epoll.h>
  64. //# define EPOLLTRACE
  65. # endif
  66. #endif
  67. // various options
  68. #define CONNECT_TIMEOUT_REFUSED_WAIT 1000 // maximum to sleep on connect_timeout
  69. #define TRACE_SLOW_BLOCK_TRANSFER
  70. #define DEFAULT_CONNECT_TIME (100*1000) // for connect_wait
  71. #ifndef _WIN32
  72. #define BLOCK_POLLED_SINGLE_CONNECTS // NB this is much slower in windows
  73. #define CENTRAL_NODE_RANDOM_DELAY
  74. #else
  75. #define USERECVSEM // to singlethread BF_SYNC_TRANSFER_PUSH
  76. #endif
  77. #ifdef _DEBUG
  78. //#define SOCKTRACE
  79. # ifdef _HAS_EPOLL_SUPPORT
  80. # if defined(SOCKTRACE) || !defined(EPOLLTRACE)
  81. # define EPOLLTRACE
  82. # endif
  83. # endif
  84. #endif
  85. #ifdef _TESTING
  86. #define _TRACE
  87. #endif
  88. #ifdef _TRACE
  89. #define THROWJSOCKEXCEPTION(exc) \
  90. { StringBuffer msg; \
  91. msg.appendf("Target: %s, Raised in: %s, line %d",tracename ,__FILE__, __LINE__); \
  92. IJSOCK_Exception *e = new SocketException(exc,msg.str());\
  93. throw e; }
  94. #define THROWJSOCKEXCEPTION2(exc) \
  95. { StringBuffer msg; \
  96. msg.appendf("Raised in: %s, line %d",__FILE__, __LINE__); \
  97. IJSOCK_Exception *e = new SocketException(exc,msg.str());\
  98. throw e; }
  99. #define LOGERR(err,ref,info) LogErr(err,ref,info,__LINE__,NULL)
  100. #define LOGERR2(err,ref,info) LogErr(err,ref,info,__LINE__,tracename)
  101. #else
  102. #define THROWJSOCKEXCEPTION(exc) \
  103. { IJSOCK_Exception *e = new SocketException(exc);\
  104. throw e; }
  105. #define THROWJSOCKEXCEPTION2(exc) THROWJSOCKEXCEPTION(exc)
  106. #define LOGERR(err,ref,info)
  107. #define LOGERR2(err,ref,info)
  108. #endif
  109. JSocketStatistics STATS;
  110. static bool IP4only=false; // slighly faster if we know no IPv6
  111. static bool IP6preferred=false; // e.g. for DNS and socket create
  112. IpSubNet PreferredSubnet(NULL,NULL); // set this if you prefer a particular subnet for debugging etc
  113. // e.g. PreferredSubnet("192.168.16.0", "255.255.255.0")
  114. static atomic_t pre_conn_unreach_cnt = ATOMIC_INIT(0); // global count of pre_connect() ENETUNREACH error
  115. #define IPV6_SERIALIZE_PREFIX (0x00ff00ff)
  116. inline void LogErr(unsigned err,unsigned ref,const char *info,unsigned lineno,const char *tracename)
  117. {
  118. if (err)
  119. PROGLOG("jsocket(%d,%d)%s%s err = %d%s%s",ref,lineno,
  120. (info&&*info)?" ":"",(info&&*info)?info:"",err,
  121. (tracename&&*tracename)?" : ":"",(tracename&&*tracename)?tracename:"");
  122. }
  123. class jlib_thrown_decl SocketException: public CInterface, public IJSOCK_Exception
  124. {
  125. public:
  126. IMPLEMENT_IINTERFACE;
  127. SocketException(int code,const char *_msg=NULL) : errcode(code)
  128. {
  129. if (_msg)
  130. msg = strdup(_msg);
  131. else
  132. msg = NULL;
  133. };
  134. ~SocketException() { free(msg); }
  135. int errorCode() const { return errcode; }
  136. static StringBuffer & geterrormessage(int err,StringBuffer &str)
  137. {
  138. switch (err) {
  139. case JSOCKERR_ok: return str.append("ok");
  140. case JSOCKERR_not_opened: return str.append("socket not opened");
  141. case JSOCKERR_bad_address: return str.append("bad address");
  142. case JSOCKERR_connection_failed: return str.append("connection failed");
  143. case JSOCKERR_broken_pipe: return str.append("connection is broken");
  144. case JSOCKERR_graceful_close: return str.append("connection closed other end");
  145. case JSOCKERR_invalid_access_mode: return str.append("invalid access mode");
  146. case JSOCKERR_timeout_expired: return str.append("timeout expired");
  147. case JSOCKERR_port_in_use: return str.append("port in use");
  148. case JSOCKERR_cancel_accept: return str.append("cancel accept");
  149. case JSOCKERR_connectionless_socket: return str.append("connectionless socket");
  150. case JSOCKERR_handle_too_large: return str.append("handle too large");
  151. case JSOCKERR_bad_netaddr: return str.append("bad net addr");
  152. case JSOCKERR_ipv6_not_implemented: return str.append("IPv6 not implemented");
  153. // OS errors
  154. #ifdef _WIN32
  155. case WSAEINTR: return str.append("WSAEINTR(10004) - Interrupted system call.");
  156. case WSAEBADF: return str.append("WSAEBADF(10009) - Bad file number.");
  157. case WSAEACCES: return str.append("WSAEACCES(10013) - Permission denied.");
  158. case WSAEFAULT: return str.append("WSAEFAULT(10014) - Bad address.");
  159. case WSAEINVAL: return str.append("WSAEINVAL(10022) - Invalid argument.");
  160. case WSAEMFILE: return str.append("WSAEMFILE(10024) - Too many open files.");
  161. case WSAEWOULDBLOCK: return str.append("WSAEWOULDBLOCK(10035) - Operation would block.");
  162. case WSAEINPROGRESS: return str.append("WSAEINPROGRESS(10036) - Operation now in progress.");
  163. case WSAEALREADY: return str.append("WSAEALREADY(10037) - Operation already in progress.");
  164. case WSAENOTSOCK: return str.append("WSAENOTSOCK(10038) - Socket operation on nonsocket.");
  165. case WSAEDESTADDRREQ: return str.append("WSAEDESTADDRREQ(10039) - Destination address required.");
  166. case WSAEMSGSIZE: return str.append("WSAEMSGSIZE(10040) - Message too long.");
  167. case WSAEPROTOTYPE: return str.append("WSAEPROTOTYPE(10041) - Protocol wrong type for socket.");
  168. case WSAENOPROTOOPT: return str.append("WSAENOPROTOOPT(10042) - Protocol not available.");
  169. case WSAEPROTONOSUPPORT: return str.append("WSAEPROTONOSUPPORT(10043) - Protocol not supported.");
  170. case WSAESOCKTNOSUPPORT: return str.append("WSAESOCKTNOSUPPORT(10044) - Socket type not supported.");
  171. case WSAEOPNOTSUPP: return str.append("WSAEOPNOTSUPP(10045) - Operation not supported on socket.");
  172. case WSAEPFNOSUPPORT: return str.append("WSAEPFNOSUPPORT(10046) - Protocol family not supported.");
  173. case WSAEAFNOSUPPORT: return str.append("WSAEAFNOSUPPORT(10047) - Address family not supported by protocol family.");
  174. case WSAEADDRINUSE: return str.append("WSAEADDRINUSE(10048) - Address already in use.");
  175. case WSAEADDRNOTAVAIL: return str.append("WSAEADDRNOTAVAIL(10049) - Cannot assign requested address.");
  176. case WSAENETDOWN: return str.append("WSAENETDOWN(10050) - Network is down.");
  177. case WSAENETUNREACH: return str.append("WSAENETUNREACH(10051) - Network is unreachable.");
  178. case WSAENETRESET: return str.append("WSAENETRESET(10052) - Network dropped connection on reset.");
  179. case WSAECONNABORTED: return str.append("WSAECONNABORTED(10053) - Software caused connection abort.");
  180. case WSAECONNRESET: return str.append("WSAECONNRESET(10054) - Connection reset by peer.");
  181. case WSAENOBUFS: return str.append("WSAENOBUFS(10055) - No buffer space available.");
  182. case WSAEISCONN: return str.append("WSAEISCONN(10056) - Socket is already connected.");
  183. case WSAENOTCONN: return str.append("WSAENOTCONN(10057) - Socket is not connected.");
  184. case WSAESHUTDOWN: return str.append("WSAESHUTDOWN(10058) - Cannot send after socket shutdown.");
  185. case WSAETOOMANYREFS: return str.append("WSAETOOMANYREFS(10059) - Too many references: cannot splice.");
  186. case WSAETIMEDOUT: return str.append("WSAETIMEDOUT(10060) - Connection timed out.");
  187. case WSAECONNREFUSED: return str.append("WSAECONNREFUSED(10061) - Connection refused.");
  188. case WSAELOOP: return str.append("WSAELOOP(10062) - Too many levels of symbolic links.");
  189. case WSAENAMETOOLONG: return str.append("WSAENAMETOOLONG(10063) - File name too long.");
  190. case WSAEHOSTDOWN: return str.append("WSAEHOSTDOWN(10064) - Host is down.");
  191. case WSAEHOSTUNREACH: return str.append("WSAEHOSTUNREACH(10065) - No route to host.");
  192. case WSASYSNOTREADY: return str.append("WSASYSNOTREADY(10091) - The network subsystem is unusable.");
  193. case WSAVERNOTSUPPORTED: return str.append("WSAVERNOTSUPPORTED(10092) - The Windows Sockets DLL cannot support this application.");
  194. case WSANOTINITIALISED: return str.append("WSANOTINITIALISED(10093) - Winsock not initialized.");
  195. case WSAEDISCON: return str.append("WSAEDISCON(10101) - Disconnect.");
  196. case WSAHOST_NOT_FOUND: return str.append("WSAHOST_NOT_FOUND(11001) - Host not found.");
  197. case WSATRY_AGAIN: return str.append("WSATRY_AGAIN(11002) - Nonauthoritative host not found.");
  198. case WSANO_RECOVERY: return str.append("WSANO_RECOVERY(11003) - Nonrecoverable error.");
  199. case WSANO_DATA: return str.append("WSANO_DATA(11004) - Valid name, no data record of requested type.");
  200. #else
  201. case ENOTSOCK: return str.append("ENOTSOCK - Socket operation on non-socket ");
  202. case EDESTADDRREQ: return str.append("EDESTADDRREQ - Destination address required ");
  203. case EMSGSIZE: return str.append("EMSGSIZE - Message too long ");
  204. case EPROTOTYPE: return str.append("EPROTOTYPE - Protocol wrong type for socket ");
  205. case ENOPROTOOPT: return str.append("ENOPROTOOPT - Protocol not available ");
  206. case EPROTONOSUPPORT: return str.append("EPROTONOSUPPORT - Protocol not supported ");
  207. case ESOCKTNOSUPPORT: return str.append("ESOCKTNOSUPPORT - Socket type not supported ");
  208. case EOPNOTSUPP: return str.append("EOPNOTSUPP - Operation not supported on socket ");
  209. case EPFNOSUPPORT: return str.append("EPFNOSUPPORT - Protocol family not supported ");
  210. case EAFNOSUPPORT: return str.append("EAFNOSUPPORT - Address family not supported by protocol family ");
  211. case EADDRINUSE: return str.append("EADDRINUSE - Address already in use ");
  212. case EADDRNOTAVAIL: return str.append("EADDRNOTAVAIL - Can't assign requested address ");
  213. case ENETDOWN: return str.append("ENETDOWN - Network is down ");
  214. case ENETUNREACH: return str.append("ENETUNREACH - Network is unreachable ");
  215. case ENETRESET: return str.append("ENETRESET - Network dropped connection because of reset ");
  216. case ECONNABORTED: return str.append("ECONNABORTED - Software caused connection abort ");
  217. case ECONNRESET: return str.append("ECONNRESET - Connection reset by peer ");
  218. case ENOBUFS: return str.append("ENOBUFS - No buffer space available ");
  219. case EISCONN: return str.append("EISCONN - Socket is already connected ");
  220. case ENOTCONN: return str.append("ENOTCONN - Socket is not connected ");
  221. case ESHUTDOWN: return str.append("ESHUTDOWN - Can't send after socket shutdown ");
  222. case ETOOMANYREFS: return str.append("ETOOMANYREFS - Too many references: can't splice ");
  223. case ETIMEDOUT: return str.append("ETIMEDOUT - Connection timed out ");
  224. case ECONNREFUSED: return str.append("ECONNREFUSED - Connection refused ");
  225. case EHOSTDOWN: return str.append("EHOSTDOWN - Host is down ");
  226. case EHOSTUNREACH: return str.append("EHOSTUNREACH - No route to host ");
  227. case EWOULDBLOCK: return str.append("EWOULDBLOCK - operation already in progress");
  228. case EINPROGRESS: return str.append("EINPROGRESS - operation now in progress ");
  229. #endif
  230. }
  231. IException *ose = MakeOsException(err);
  232. ose->errorMessage(str);
  233. ose->Release();
  234. return str;
  235. }
  236. StringBuffer & errorMessage(StringBuffer &str) const
  237. {
  238. if (msg)
  239. return geterrormessage(errcode,str).append('\n').append(msg);
  240. return geterrormessage(errcode,str);
  241. }
  242. MessageAudience errorAudience() const
  243. {
  244. switch (errcode) {
  245. case JSOCKERR_port_in_use:
  246. return MSGAUD_operator;
  247. }
  248. return MSGAUD_user;
  249. }
  250. private:
  251. int errcode;
  252. char *msg;
  253. };
  254. IJSOCK_Exception *IPv6NotImplementedException(const char *filename,unsigned lineno)
  255. {
  256. StringBuffer msg;
  257. msg.appendf("%s(%d)",filename,lineno);
  258. return new SocketException(JSOCKERR_ipv6_not_implemented,msg.str());
  259. }
  260. struct MCASTREQ
  261. {
  262. struct in_addr imr_multiaddr; /* multicast group to join */
  263. struct in_addr imr_interface; /* interface to join on */
  264. MCASTREQ(const char *mcip)
  265. {
  266. imr_multiaddr.s_addr = inet_addr(mcip);
  267. imr_interface.s_addr = htonl(INADDR_ANY);
  268. }
  269. };
  270. #ifdef __APPLE__
  271. #ifndef MSG_NOSIGNAL
  272. #define MSG_NOSIGNAL 0x4000
  273. #endif
  274. #endif
  275. #if defined( _WIN32)
  276. #define T_SOCKET SOCKET
  277. #define T_FD_SET fd_set
  278. #define XFD_SETSIZE FD_SETSIZE
  279. #define ETIMEDOUT WSAETIMEDOUT
  280. #define ECONNREFUSED WSAECONNREFUSED
  281. #define XFD_ZERO(s) FD_ZERO(s)
  282. #define SEND_FLAGS 0
  283. #define BADSOCKERR(err) ((err==WSAEBADF)||(err==WSAENOTSOCK))
  284. #define CHECKSOCKRANGE(s)
  285. #elif defined(__FreeBSD__) || defined(__APPLE__)
  286. #define XFD_SETSIZE FD_SETSIZE
  287. #define T_FD_SET fd_set
  288. #define XFD_ZERO(s) FD_ZERO(s)
  289. #define T_SOCKET int
  290. #define SEND_FLAGS (MSG_NOSIGNAL)
  291. #define BADSOCKERR(err) ((err==EBADF)||(err==ENOTSOCK))
  292. #define CHECKSOCKRANGE(s)
  293. #else
  294. #define XFD_SETSIZE 32768
  295. struct xfd_set { __fd_mask fds_bits[XFD_SETSIZE / __NFDBITS]; }; // define our own
  296. // linux 64 bit
  297. #ifdef __linux__
  298. #ifdef __x86_64__
  299. #undef __FDMASK
  300. #define __FDMASK(d) (1UL << ((d) % __NFDBITS))
  301. #undef __FDELT
  302. #define __FDELT(d) ((d) / __NFDBITS)
  303. #undef __FD_SET
  304. #define __FD_SET(d, s) (__FDS_BITS (s)[__FDELT(d)] |= __FDMASK(d))
  305. #undef __FD_ISSET
  306. #define __FD_ISSET(d, s) ((__FDS_BITS (s)[__FDELT(d)] & __FDMASK(d)) != 0)
  307. #endif
  308. #define CHECKSOCKRANGE(s) { if (s>=XFD_SETSIZE) THROWJSOCKEXCEPTION2(JSOCKERR_handle_too_large); }
  309. #endif
  310. // end 64 bit
  311. #define T_FD_SET xfd_set
  312. #define XFD_ZERO(s) memset(s,0,sizeof(xfd_set))
  313. #define T_SOCKET int
  314. #define SEND_FLAGS (MSG_NOSIGNAL)
  315. #define BADSOCKERR(err) ((err==EBADF)||(err==ENOTSOCK))
  316. #endif
  317. #ifdef CENTRAL_NODE_RANDOM_DELAY
  318. static SocketEndpointArray CentralNodeArray;
  319. #endif
  320. enum SOCKETMODE { sm_tcp_server, sm_tcp, sm_udp_server, sm_udp, sm_multicast_server, sm_multicast};
  321. class CSocket: public CInterface, public ISocket
  322. {
  323. public:
  324. IMPLEMENT_IINTERFACE;
  325. static CriticalSection crit;
  326. protected:
  327. friend class CSocketConnectWait;
  328. enum { ss_open, ss_shutdown, ss_close, ss_pre_open } state;
  329. T_SOCKET sock;
  330. char* hostname; // host address
  331. unsigned short hostport; // host port
  332. SOCKETMODE sockmode;
  333. IpAddress targetip;
  334. SocketEndpoint returnep; // set by set_return_addr
  335. MCASTREQ * mcastreq;
  336. size32_t nextblocksize;
  337. unsigned blockflags;
  338. unsigned blocktimeoutms;
  339. bool owned;
  340. enum {accept_not_cancelled, accept_cancel_pending, accept_cancelled} accept_cancel_state;
  341. bool in_accept;
  342. bool nonblocking;
  343. bool nagling;
  344. static unsigned connectingcount;
  345. #ifdef USERECVSEM
  346. static Semaphore receiveblocksem;
  347. bool receiveblocksemowned; // owned by this socket
  348. #endif
  349. #ifdef _TRACE
  350. char * tracename;
  351. #endif
  352. public:
  353. void open(int listen_queue_size,bool reuseports=false);
  354. bool connect_timeout( unsigned timeout, bool noexception);
  355. void connect_wait( unsigned timems);
  356. void udpconnect();
  357. void read(void* buf, size32_t min_size, size32_t max_size, size32_t &size_read,unsigned timeoutsecs);
  358. void readtms(void* buf, size32_t min_size, size32_t max_size, size32_t &size_read, unsigned timedelaysecs);
  359. void read(void* buf, size32_t size);
  360. size32_t write(void const* buf, size32_t size);
  361. size32_t write_multiple(unsigned num,void const**buf, size32_t *size);
  362. size32_t udp_write_to(SocketEndpoint &ep,void const* buf, size32_t size);
  363. void close();
  364. void errclose();
  365. bool connectionless() { return (sockmode!=sm_tcp)&&(sockmode!=sm_tcp_server); }
  366. void shutdown(unsigned mode);
  367. ISocket* accept(bool allowcancel);
  368. int wait_read(unsigned timeout);
  369. int wait_write(unsigned timeout);
  370. int name(char *name,size32_t namemax);
  371. int peer_name(char *name,size32_t namemax);
  372. SocketEndpoint &getPeerEndpoint(SocketEndpoint &ep);
  373. IpAddress & getPeerAddress(IpAddress &addr);
  374. void set_return_addr(int port,const char *name); // sets returnep
  375. void cancel_accept();
  376. size32_t get_max_send_size();
  377. bool set_nonblock(bool on=true);
  378. bool set_nagle(bool on);
  379. void set_linger(int lingersecs);
  380. void set_keep_alive(bool set);
  381. virtual void set_inherit(bool inherit=false);
  382. virtual bool check_connection();
  383. // Block functions
  384. void set_block_mode(unsigned flags,size32_t recsize=0,unsigned timeoutms=0);
  385. bool send_block(const void *blk,size32_t sz);
  386. size32_t receive_block_size();
  387. size32_t receive_block(void *blk,size32_t sz);
  388. size32_t get_send_buffer_size();
  389. void set_send_buffer_size(size32_t sz);
  390. bool join_multicast_group(SocketEndpoint &ep); // for udp multicast
  391. bool leave_multicast_group(SocketEndpoint &ep); // for udp multicast
  392. size32_t get_receive_buffer_size();
  393. void set_receive_buffer_size(size32_t sz);
  394. size32_t avail_read();
  395. int pre_connect(bool block);
  396. int post_connect();
  397. CSocket(const SocketEndpoint &_ep,SOCKETMODE smode,const char *name);
  398. CSocket(T_SOCKET new_sock,SOCKETMODE smode,bool _owned);
  399. virtual ~CSocket();
  400. unsigned OShandle()
  401. {
  402. return (unsigned)sock;
  403. }
  404. private:
  405. int closesock()
  406. {
  407. if (sock!=INVALID_SOCKET) {
  408. T_SOCKET s = sock;
  409. sock = INVALID_SOCKET;
  410. STATS.activesockets--;
  411. #ifdef SOCKTRACE
  412. PROGLOG("SOCKTRACE: Closing socket %x %d (%x)", s, s, this);
  413. #endif
  414. #ifdef _WIN32
  415. return ::closesocket(s);
  416. #else
  417. return ::close(s);
  418. #endif
  419. }
  420. else
  421. return 0;
  422. }
  423. };
  424. CriticalSection CSocket::crit;
  425. unsigned CSocket::connectingcount=0;
  426. #ifdef USERECVSEM
  427. Semaphore CSocket::receiveblocksem(2);
  428. #endif
  429. #ifdef _WIN32
  430. class win_socket_library
  431. {
  432. static bool initdone; // to prevent dependancy probs very early on (e.g. jlog)
  433. public:
  434. win_socket_library() { init(); }
  435. bool init()
  436. {
  437. if (initdone)
  438. return true;
  439. WSADATA wsa;
  440. if (WSAStartup(MAKEWORD(2, 2), &wsa) != 0) {
  441. if (WSAStartup(MAKEWORD(1, 1), &wsa) != 0) {
  442. MessageBox(NULL,"Failed to initialize windows sockets","JLib Socket Error",MB_OK);
  443. return false;
  444. }
  445. }
  446. initdone = true;
  447. return true;
  448. }
  449. ~win_socket_library()
  450. {
  451. WSACleanup();
  452. }
  453. };
  454. bool win_socket_library::initdone = false;
  455. static win_socket_library ws32_lib;
  456. #define ERRNO() WSAGetLastError()
  457. #define EADDRINUSE WSAEADDRINUSE
  458. #define EINTRCALL WSAEINTR
  459. #define ECONNRESET WSAECONNRESET
  460. #define ECONNABORTED WSAECONNABORTED
  461. #define ENOTCONN WSAENOTCONN
  462. #define EWOULDBLOCK WSAEWOULDBLOCK
  463. #define EINPROGRESS WSAEINPROGRESS
  464. #define ENETUNREACH WSAENETUNREACH
  465. #define ENOTSOCK WSAENOTSOCK
  466. struct j_sockaddr_in6 {
  467. short sin6_family; /* AF_INET6 */
  468. u_short sin6_port; /* Transport level port number */
  469. u_long sin6_flowinfo; /* IPv6 flow information */
  470. struct in_addr6 sin6_addr; /* IPv6 address */
  471. u_long sin6_scope_id; /* set of interfaces for a scope */
  472. };
  473. typedef union {
  474. struct sockaddr sa;
  475. struct j_sockaddr_in6 sin6;
  476. struct sockaddr_in sin;
  477. } J_SOCKADDR;
  478. #define DEFINE_SOCKADDR(name) J_SOCKADDR name; memset(&name,0,sizeof(J_SOCKADDR))
  479. static int _inet_pton(int af, const char* src, void* dst)
  480. {
  481. DEFINE_SOCKADDR(u);
  482. int address_length;
  483. switch (af) {
  484. case AF_INET:
  485. u.sin.sin_family = AF_INET;
  486. address_length = sizeof (u.sin);
  487. break;
  488. case AF_INET6:
  489. u.sin6.sin6_family = AF_INET6;
  490. address_length = sizeof (u.sin6);
  491. break;
  492. default:
  493. #ifdef EAFNOSUPPORT
  494. errno = EAFNOSUPPORT;
  495. #else
  496. errno = 52;
  497. #endif
  498. return -1;
  499. }
  500. ws32_lib.init();
  501. int ret = WSAStringToAddress ((LPTSTR) src, af, NULL, &u.sa, &address_length);
  502. if (ret == 0) {
  503. switch (af) {
  504. case AF_INET:
  505. memcpy (dst, &u.sin.sin_addr, sizeof (struct in_addr));
  506. break;
  507. case AF_INET6:
  508. memcpy (dst, &u.sin6.sin6_addr, sizeof (u.sin6.sin6_addr));
  509. break;
  510. }
  511. return 1;
  512. }
  513. errno = WSAGetLastError();
  514. // PROGLOG("errno = %d",errno);
  515. return 0;
  516. }
  517. static const char * _inet_ntop (int af, const void *src, char *dst, socklen_t cnt)
  518. {
  519. /* struct sockaddr can't accomodate struct sockaddr_in6. */
  520. DEFINE_SOCKADDR(u);
  521. DWORD dstlen = cnt;
  522. size_t srcsize;
  523. memset(&u,0,sizeof(u));
  524. switch (af) {
  525. case AF_INET:
  526. u.sin.sin_family = AF_INET;
  527. u.sin.sin_addr = *(struct in_addr *) src;
  528. srcsize = sizeof (u.sin);
  529. break;
  530. case AF_INET6:
  531. u.sin6.sin6_family = AF_INET6;
  532. memcpy(&u.sin6.sin6_addr,src,sizeof(in_addr6));
  533. srcsize = sizeof (u.sin6);
  534. break;
  535. default:
  536. return NULL;
  537. }
  538. ws32_lib.init();
  539. if (WSAAddressToString (&u.sa, srcsize, NULL, dst, &dstlen) != 0) {
  540. errno = WSAGetLastError();
  541. return NULL;
  542. }
  543. return (const char *) dst;
  544. }
  545. int inet_aton (const char *name, struct in_addr *addr)
  546. {
  547. addr->s_addr = inet_addr (name);
  548. return (addr->s_addr == (u_long)-1)?1:0; // 255.255.255.255 has had it here
  549. }
  550. #else
  551. #define _inet_ntop inet_ntop
  552. #define _inet_pton inet_pton
  553. #define in_addr6 in6_addr
  554. typedef union {
  555. struct sockaddr sa;
  556. struct sockaddr_in6 sin6;
  557. struct sockaddr_in sin;
  558. } J_SOCKADDR;
  559. #define DEFINE_SOCKADDR(name) J_SOCKADDR name; memset(&name,0,sizeof(J_SOCKADDR))
  560. #define EINTRCALL EINTR
  561. #define ERRNO() (errno)
  562. #ifndef INADDR_NONE
  563. #define INADDR_NONE (-1)
  564. #endif
  565. #endif
  566. #ifndef INET6_ADDRSTRLEN
  567. #define INET6_ADDRSTRLEN 65
  568. #endif
  569. inline socklen_t setSockAddr(J_SOCKADDR &u, const IpAddress &ip,unsigned short port)
  570. {
  571. if (!IP6preferred) {
  572. if (ip.getNetAddress(sizeof(in_addr),&u.sin.sin_addr)==sizeof(in_addr)) {
  573. u.sin.sin_family = AF_INET;
  574. u.sin.sin_port = htons(port);
  575. return sizeof(u.sin);
  576. }
  577. }
  578. if (IP4only)
  579. IPV6_NOT_IMPLEMENTED();
  580. ip.getNetAddress(sizeof(in_addr6),&u.sin6.sin6_addr);
  581. u.sin6.sin6_family = AF_INET6;
  582. u.sin6.sin6_port = htons(port);
  583. return sizeof(u.sin6);
  584. }
  585. inline socklen_t setSockAddrAny(J_SOCKADDR &u, unsigned short port)
  586. {
  587. if (IP6preferred) {
  588. #ifdef _WIN32
  589. IN6ADDR_SETANY((PSOCKADDR_IN6)&u.sin6.sin6_addr);
  590. #else
  591. memcpy(&u.sin6.sin6_addr,&in6addr_any,sizeof(in_addr6));
  592. #endif
  593. u.sin6.sin6_family= AF_INET6;
  594. u.sin6.sin6_port = htons(port);
  595. return sizeof(u.sin6);
  596. }
  597. u.sin.sin_addr.s_addr = htonl(INADDR_ANY);
  598. u.sin.sin_family= AF_INET;
  599. u.sin.sin_port = htons(port);
  600. return sizeof(u.sin);
  601. }
  602. inline void getSockAddrEndpoint(const J_SOCKADDR &u, socklen_t ul, SocketEndpoint &ep)
  603. {
  604. if (ul==sizeof(u.sin)) {
  605. ep.setNetAddress(sizeof(in_addr),&u.sin.sin_addr);
  606. ep.port = htons(u.sin.sin_port);
  607. }
  608. else {
  609. ep.setNetAddress(sizeof(in_addr6),&u.sin6.sin6_addr);
  610. ep.port = htons(u.sin6.sin6_port);
  611. }
  612. }
  613. /* might need fcntl(F_SETFL), or ioctl(FIONBIO) */
  614. /* Posix.1g says fcntl */
  615. #if defined(O_NONBLOCK)
  616. bool CSocket::set_nonblock(bool on)
  617. {
  618. int flags = fcntl(sock, F_GETFL, 0);
  619. if (flags == -1)
  620. return nonblocking;
  621. if (on)
  622. flags |= O_NONBLOCK;
  623. else
  624. flags &= ~O_NONBLOCK;
  625. if (fcntl(sock, F_SETFL, flags)==0) {
  626. bool wasNonBlocking = nonblocking;
  627. nonblocking = on;
  628. return wasNonBlocking;
  629. }
  630. return nonblocking;
  631. }
  632. #else
  633. bool CSocket::set_nonblock(bool on)
  634. {
  635. #ifdef _WIN32
  636. u_long yes = on?1:0;
  637. if (ioctlsocket(sock, FIONBIO, &yes)==0) {
  638. #else
  639. int yes = on?1:0;
  640. if (ioctl(sock, FIONBIO, &yes)==0) {
  641. #endif
  642. bool wasNonBlocking = nonblocking;
  643. nonblocking = on;
  644. return wasNonBlocking;
  645. }
  646. return nonblocking;
  647. }
  648. #endif
  649. bool CSocket::set_nagle(bool on)
  650. {
  651. bool ret = nagling;
  652. nagling = on;
  653. int enabled = !on;
  654. if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char*)&enabled, sizeof(enabled)) != 0) {
  655. nagling = !on;
  656. }
  657. return ret;
  658. }
  659. void CSocket::set_inherit(bool inherit)
  660. {
  661. #ifndef _WIN32
  662. long flag = fcntl(sock, F_GETFD);
  663. if(inherit)
  664. flag &= ~FD_CLOEXEC;
  665. else
  666. flag |= FD_CLOEXEC;
  667. fcntl(sock, F_SETFD, flag);
  668. #endif
  669. }
  670. size32_t CSocket::avail_read()
  671. {
  672. #ifdef _WIN32
  673. u_long avail;
  674. if (ioctlsocket(sock, FIONREAD, &avail)==0)
  675. #else
  676. int avail;
  677. if (ioctl(sock, FIONREAD, &avail)==0)
  678. #endif
  679. return (size32_t)avail;
  680. int err = ERRNO();
  681. LOGERR2(err,1,"avail_read");
  682. return 0;
  683. }
  684. #define PRE_CONN_UNREACH_ELIM 100
  685. int CSocket::pre_connect (bool block)
  686. {
  687. assertex(hostname);
  688. DEFINE_SOCKADDR(u);
  689. if (targetip.isNull()) {
  690. set_return_addr(hostport,hostname);
  691. targetip.ipset(returnep);
  692. }
  693. socklen_t ul = setSockAddr(u,targetip,hostport);
  694. sock = ::socket(u.sa.sa_family, SOCK_STREAM, targetip.isIp4()?0:PF_INET6);
  695. owned = true;
  696. state = ss_pre_open; // will be set to open by post_connect
  697. if (sock == INVALID_SOCKET) {
  698. int err = ERRNO();
  699. THROWJSOCKEXCEPTION(err);
  700. }
  701. STATS.activesockets++;
  702. int err = 0;
  703. set_nonblock(!block);
  704. int rc = ::connect(sock, &u.sa, ul);
  705. if (rc==SOCKET_ERROR) {
  706. err = ERRNO();
  707. if ((err != EINPROGRESS)&&(err != EWOULDBLOCK)&&(err != ETIMEDOUT)&&(err!=ECONNREFUSED)) { // handled by caller
  708. if (err != ENETUNREACH) {
  709. atomic_set(&pre_conn_unreach_cnt, 0);
  710. LOGERR2(err,1,"pre_connect");
  711. } else {
  712. int ecnt = atomic_read(&pre_conn_unreach_cnt);
  713. if (ecnt <= PRE_CONN_UNREACH_ELIM) {
  714. atomic_inc(&pre_conn_unreach_cnt);
  715. LOGERR2(err,1,"pre_connect network unreachable");
  716. }
  717. }
  718. } else
  719. atomic_set(&pre_conn_unreach_cnt, 0);
  720. } else
  721. atomic_set(&pre_conn_unreach_cnt, 0);
  722. #ifdef SOCKTRACE
  723. PROGLOG("SOCKTRACE: pre-connected socket%s %x %d (%x) err=%d", block?"(block)":"", sock, sock, (int)this, err);
  724. #endif
  725. return err;
  726. }
  727. int CSocket::post_connect ()
  728. {
  729. set_nonblock(false);
  730. int err = 0;
  731. socklen_t errlen = sizeof(err);
  732. int rc = getsockopt(sock, SOL_SOCKET, SO_ERROR, (char *)&err, &errlen); // check for error
  733. if ((rc!=0)&&!err)
  734. err = ERRNO(); // some implementations of getsockopt duff
  735. if (err==0) {
  736. nagling = true;
  737. set_nagle(false);
  738. state = ss_open;
  739. }
  740. else if ((err!=ETIMEDOUT)&&(err!=ECONNREFUSED)) // handled by caller
  741. LOGERR2(err,1,"post_connect");
  742. return err;
  743. }
  744. void CSocket::open(int listen_queue_size,bool reuseports)
  745. {
  746. if (IP6preferred)
  747. sock = ::socket(AF_INET6, connectionless()?SOCK_DGRAM:SOCK_STREAM, PF_INET6);
  748. else
  749. sock = ::socket(AF_INET, connectionless()?SOCK_DGRAM:SOCK_STREAM, 0);
  750. if (sock == INVALID_SOCKET) {
  751. THROWJSOCKEXCEPTION(ERRNO());
  752. }
  753. STATS.activesockets++;
  754. #ifdef SOCKTRACE
  755. PROGLOG("SOCKTRACE: opened socket %x %d (%x)", sock,sock,this);
  756. #endif
  757. if ((hostport==0)&&(sockmode==sm_udp)) {
  758. state = ss_open;
  759. #ifdef SOCKTRACE
  760. PROGLOG("SOCKTRACE: opened socket return udp");
  761. #endif
  762. set_inherit(false);
  763. return;
  764. }
  765. #ifndef _WIN32
  766. reuseports = true; // for some reason linux requires reuse ports
  767. #endif
  768. if (reuseports) {
  769. int on = 1;
  770. setsockopt( sock, SOL_SOCKET, SO_REUSEADDR, (char *)&on, sizeof(on));
  771. }
  772. DEFINE_SOCKADDR(u);
  773. socklen_t ul;
  774. if (hostname) {
  775. if (targetip.isNull()) {
  776. set_return_addr(hostport,hostname);
  777. targetip.ipset(returnep);
  778. }
  779. ul = setSockAddr(u,targetip,hostport);
  780. }
  781. else
  782. ul = setSockAddrAny(u,hostport);
  783. int saverr;
  784. if (::bind(sock, &u.sa, ul) != 0) {
  785. saverr = ERRNO();
  786. if (saverr==EADDRINUSE) { // don't log as error (some usages probe ports)
  787. ErrPortInUse:
  788. closesock();
  789. char msg[1024];
  790. sprintf(msg,"Target: %s, port = %d, Raised in: %s, line %d",tracename,(int)hostport,__FILE__, __LINE__);
  791. IJSOCK_Exception *e = new SocketException(JSOCKERR_port_in_use,msg);
  792. throw e;
  793. }
  794. else {
  795. closesock();
  796. THROWJSOCKEXCEPTION(saverr);
  797. }
  798. }
  799. if (!connectionless()) {
  800. if (::listen(sock, listen_queue_size) != 0) {
  801. saverr = ERRNO();
  802. if (saverr==EADDRINUSE)
  803. goto ErrPortInUse;
  804. closesock();
  805. THROWJSOCKEXCEPTION(saverr);
  806. }
  807. }
  808. if (mcastreq) {
  809. if (setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP,(char*)mcastreq, sizeof(*mcastreq))!=0) {
  810. saverr = ERRNO();
  811. closesock();
  812. THROWJSOCKEXCEPTION(saverr);
  813. }
  814. }
  815. state = ss_open;
  816. #ifdef SOCKTRACE
  817. PROGLOG("SOCKTRACE: opened socket return");
  818. #endif
  819. set_inherit(false);
  820. }
  821. ISocket* CSocket::accept(bool allowcancel)
  822. {
  823. if ((accept_cancel_state!=accept_not_cancelled) && allowcancel) {
  824. accept_cancel_state=accept_cancelled;
  825. return NULL;
  826. }
  827. if (state != ss_open) {
  828. ERRLOG("invalid accept, state = %d",(int)state);
  829. THROWJSOCKEXCEPTION(JSOCKERR_not_opened);
  830. }
  831. if (connectionless()) {
  832. THROWJSOCKEXCEPTION(JSOCKERR_connectionless_socket);
  833. }
  834. T_SOCKET newsock;
  835. loop {
  836. in_accept = true;
  837. newsock = (sock!=INVALID_SOCKET)?::accept(sock, NULL, NULL):INVALID_SOCKET;
  838. in_accept = false;
  839. #ifdef SOCKTRACE
  840. PROGLOG("SOCKTRACE: accept created socket %x %d (%x)", newsock,newsock,this);
  841. #endif
  842. if (newsock!=INVALID_SOCKET) {
  843. if ((sock==INVALID_SOCKET)||(accept_cancel_state==accept_cancel_pending)) {
  844. ::close(newsock);
  845. newsock=INVALID_SOCKET;
  846. }
  847. else {
  848. accept_cancel_state = accept_not_cancelled;
  849. break;
  850. }
  851. }
  852. int saverr;
  853. saverr = ERRNO();
  854. if ((sock==INVALID_SOCKET)||(accept_cancel_state==accept_cancel_pending)) {
  855. accept_cancel_state = accept_cancelled;
  856. if (allowcancel)
  857. return NULL;
  858. THROWJSOCKEXCEPTION(JSOCKERR_cancel_accept);
  859. }
  860. if (saverr != EINTRCALL) {
  861. accept_cancel_state = accept_not_cancelled;
  862. THROWJSOCKEXCEPTION(saverr);
  863. }
  864. }
  865. if (state != ss_open) {
  866. accept_cancel_state = accept_cancelled;
  867. if (allowcancel)
  868. return NULL;
  869. THROWJSOCKEXCEPTION(JSOCKERR_cancel_accept);
  870. }
  871. CSocket *ret = new CSocket(newsock,sm_tcp,true);
  872. ret->set_inherit(false);
  873. return ret;
  874. }
  875. void CSocket::set_linger(int lingertime)
  876. {
  877. struct linger l;
  878. l.l_onoff = (lingertime>=0)?1:0;
  879. l.l_linger = (lingertime>=0)?lingertime:0;
  880. if (setsockopt(sock, SOL_SOCKET, SO_LINGER, (char*)&l, sizeof(l)) != 0) {
  881. WARNLOG("Linger not set");
  882. }
  883. }
  884. void CSocket::set_keep_alive(bool set)
  885. {
  886. int on=set?1:0;
  887. if (setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char*)&on, sizeof(on)) != 0) {
  888. WARNLOG("KeepAlive not set");
  889. }
  890. }
  891. int CSocket::name(char *retname,size32_t namemax)
  892. {
  893. if (!retname)
  894. namemax = 0;
  895. if (namemax)
  896. retname[0] = 0;
  897. retname[0] = 0;
  898. if (state != ss_open) {
  899. THROWJSOCKEXCEPTION(JSOCKERR_not_opened);
  900. }
  901. DEFINE_SOCKADDR(u);
  902. socklen_t ul = sizeof(u);
  903. if (::getsockname(sock,&u.sa, &ul)<0) {
  904. THROWJSOCKEXCEPTION(ERRNO());
  905. }
  906. SocketEndpoint ep;
  907. getSockAddrEndpoint(u,ul,ep);
  908. StringBuffer s;
  909. ep.getIpText(s);
  910. if (namemax>=1) {
  911. if (namemax-1<s.length())
  912. s.setLength(namemax-1);
  913. memcpy(retname,s.str(),s.length());
  914. }
  915. return ep.port;
  916. }
  917. int CSocket::peer_name(char *retname,size32_t namemax)
  918. {
  919. // should not raise exceptions
  920. int ret = 0;
  921. if (!retname)
  922. namemax = 0;
  923. if (namemax)
  924. retname[0] = 0;
  925. if (state != ss_open) {
  926. return -1; // don't log as used to test socket
  927. }
  928. StringBuffer s;
  929. if (sockmode==sm_udp_server) { // udp server
  930. returnep.getIpText(s);
  931. ret = returnep.port;
  932. }
  933. else {
  934. DEFINE_SOCKADDR(u);
  935. socklen_t ul = sizeof(u);
  936. if (::getpeername(sock,&u.sa, &ul)<0)
  937. return -1; // don't log as used to test socket
  938. SocketEndpoint ep;
  939. getSockAddrEndpoint(u,ul,ep);
  940. ep.getIpText(s);
  941. ret = ep.port;
  942. }
  943. if (namemax>1) {
  944. if (namemax-1<s.length())
  945. s.setLength(namemax-1);
  946. memcpy(retname,s.str(),s.length()+1);
  947. }
  948. return ret;
  949. }
  950. SocketEndpoint &CSocket::getPeerEndpoint(SocketEndpoint &ep)
  951. {
  952. if (state != ss_open) {
  953. THROWJSOCKEXCEPTION(JSOCKERR_not_opened);
  954. }
  955. StringBuffer s;
  956. if (sockmode==sm_udp_server) { // udp server
  957. ep.set(returnep);
  958. }
  959. else {
  960. DEFINE_SOCKADDR(u);
  961. socklen_t ul = sizeof(u);
  962. if (::getpeername(sock,&u.sa, &ul)<0) {
  963. DBGLOG("getpeername failed %d",ERRNO());
  964. ep.set(NULL, 0);
  965. }
  966. else
  967. getSockAddrEndpoint(u,ul,ep);
  968. }
  969. return ep;
  970. }
  971. IpAddress & CSocket::getPeerAddress(IpAddress &addr)
  972. {
  973. SocketEndpoint ep;
  974. getPeerEndpoint(ep);
  975. addr = ep;
  976. return addr;
  977. }
  978. void CSocket::set_return_addr(int port,const char *retname)
  979. {
  980. if (!returnep.ipset(retname)) {
  981. IJSOCK_Exception *e = new SocketException(JSOCKERR_bad_address); // don't use THROWJSOCKEXCEPTION here
  982. throw e;
  983. }
  984. returnep.port = port;
  985. }
  986. void CSocket::cancel_accept()
  987. {
  988. if (connectionless()) {
  989. THROWJSOCKEXCEPTION(JSOCKERR_connectionless_socket);
  990. }
  991. #ifdef SOCKTRACE
  992. PROGLOG("SOCKTRACE: Cancel accept socket %x %d (%x)", sock, sock, this);
  993. #endif
  994. if (!in_accept) {
  995. accept_cancel_state = accept_cancelled;
  996. errclose();
  997. return;
  998. }
  999. accept_cancel_state = accept_cancel_pending;
  1000. errclose(); // this should cause accept to terminate (not supported on all linux though)
  1001. #ifdef _WIN32
  1002. for (unsigned i=0;i<5;i++) { // windows closes on different lower priority thread
  1003. Sleep(i);
  1004. if (accept_cancel_state==accept_cancelled)
  1005. return;
  1006. }
  1007. #else
  1008. Sleep(0);
  1009. if (accept_cancel_state==accept_cancelled)
  1010. return;
  1011. #endif
  1012. // Wakeup listener using a connection
  1013. SocketEndpoint ep(hostport,queryHostIP());
  1014. Owned<CSocket> sock = new CSocket(ep,sm_tcp,NULL);
  1015. try {
  1016. sock->connect_timeout(100,true);
  1017. }
  1018. catch (IJSOCK_Exception *e) {
  1019. EXCLOG(e,"CSocket::cancel_eccept");
  1020. e->Release();
  1021. }
  1022. }
  1023. // ================================================================================
  1024. // connect versions
  1025. ISocket* ISocket::connect( const SocketEndpoint &ep )
  1026. {
  1027. // general connect
  1028. return ISocket::connect_wait(ep,DEFAULT_CONNECT_TIME);
  1029. }
  1030. inline void refused_sleep(CTimeMon &tm, unsigned &refuseddelay)
  1031. {
  1032. unsigned remaining;
  1033. if (!tm.timedout(&remaining)) {
  1034. if (refuseddelay<remaining/4) {
  1035. Sleep(refuseddelay);
  1036. if (refuseddelay<CONNECT_TIMEOUT_REFUSED_WAIT/2)
  1037. refuseddelay *=2;
  1038. else
  1039. refuseddelay = CONNECT_TIMEOUT_REFUSED_WAIT;
  1040. }
  1041. else
  1042. Sleep(remaining/4); // towards end of timeout approach gradually
  1043. }
  1044. }
  1045. bool CSocket::connect_timeout( unsigned timeout, bool noexception)
  1046. {
  1047. // simple connect with timeout (no fancy stuff!)
  1048. unsigned startt = usTick();
  1049. CTimeMon tm(timeout);
  1050. unsigned remaining;
  1051. unsigned refuseddelay = 1;
  1052. int err;
  1053. while (!tm.timedout(&remaining)) {
  1054. err = pre_connect(false);
  1055. if ((err == EINPROGRESS)||(err == EWOULDBLOCK)) {
  1056. T_FD_SET fds;
  1057. struct timeval tv;
  1058. XFD_ZERO(&fds);
  1059. FD_SET((unsigned)sock, &fds);
  1060. T_FD_SET except;
  1061. XFD_ZERO(&except);
  1062. FD_SET((unsigned)sock, &except);
  1063. tv.tv_sec = remaining / 1000;
  1064. tv.tv_usec = (remaining % 1000)*1000;
  1065. CHECKSOCKRANGE(sock);
  1066. int rc = ::select( sock + 1, NULL, (fd_set *)&fds, (fd_set *)&except, &tv );
  1067. if (rc>0) {
  1068. // select succeeded - return error from socket (0 if connected)
  1069. socklen_t errlen = sizeof(err);
  1070. rc = getsockopt(sock, SOL_SOCKET, SO_ERROR, (char *)&err, &errlen); // check for error
  1071. if ((rc!=0)&&!err)
  1072. err = ERRNO(); // some implementations of getsockopt duff
  1073. if (err) // probably ECONNREFUSED but treat all errors same
  1074. refused_sleep(tm,refuseddelay);
  1075. }
  1076. else if (rc<0) {
  1077. err = ERRNO();
  1078. LOGERR2(err,2,"::select");
  1079. }
  1080. }
  1081. if (err==0) {
  1082. err = post_connect();
  1083. if (err==0) {
  1084. STATS.connects++;
  1085. STATS.connecttime+=usTick()-startt;
  1086. #ifdef _TRACE
  1087. char peer[256];
  1088. peer[0] = 'C';
  1089. peer[1] = '!';
  1090. strcpy(peer+2,hostname?hostname:"(NULL)");
  1091. free(tracename);
  1092. tracename = strdup(peer);
  1093. #endif
  1094. return true;
  1095. }
  1096. }
  1097. errclose();
  1098. }
  1099. #ifdef SOCKTRACE
  1100. PROGLOG("connect_timeout: failed %d",err);
  1101. #endif
  1102. STATS.failedconnects++;
  1103. STATS.failedconnecttime+=usTick()-startt;
  1104. if (!noexception)
  1105. THROWJSOCKEXCEPTION(JSOCKERR_connection_failed);
  1106. return false;
  1107. }
  1108. ISocket* ISocket::connect_timeout(const SocketEndpoint &ep,unsigned timeout)
  1109. {
  1110. if (ep.isNull()||(ep.port==0))
  1111. THROWJSOCKEXCEPTION2(JSOCKERR_bad_address);
  1112. Owned<CSocket> sock = new CSocket(ep,sm_tcp,NULL);
  1113. sock->connect_timeout(timeout,false);
  1114. return sock.getClear();
  1115. }
  1116. #define POLLTIME 50
  1117. void CSocket::connect_wait(unsigned timems)
  1118. {
  1119. // simple connect with timeout (no fancy stuff!)
  1120. unsigned startt = usTick();
  1121. CTimeMon tm(timems);
  1122. bool exit = false;
  1123. int err;
  1124. unsigned refuseddelay = 1;
  1125. while (!exit) {
  1126. #ifdef CENTRAL_NODE_RANDOM_DELAY
  1127. ForEachItemIn(cn,CentralNodeArray) {
  1128. SocketEndpoint &ep=CentralNodeArray.item(cn);
  1129. if (ep.ipequals(targetip)) {
  1130. unsigned sleeptime = getRandom() % 1000;
  1131. StringBuffer s;
  1132. ep.getIpText(s);
  1133. PrintLog("Connection to central node %s - sleeping %d milliseconds", s.str(), sleeptime);
  1134. Sleep(sleeptime);
  1135. break;
  1136. }
  1137. }
  1138. #endif
  1139. unsigned remaining;
  1140. exit = tm.timedout(&remaining);
  1141. bool blockselect = exit; // if last time round block
  1142. {
  1143. CriticalBlock block(crit);
  1144. if (++connectingcount>4)
  1145. blockselect = true;
  1146. }
  1147. err = pre_connect(blockselect);
  1148. if (blockselect) {
  1149. if (err&&!exit)
  1150. refused_sleep(tm,refuseddelay); // probably ECONNREFUSED but treat all errors same
  1151. }
  1152. else {
  1153. unsigned timeoutms = (exit||(remaining<10000))?10000:remaining;
  1154. unsigned polltime = 1;
  1155. while (!blockselect && ((err == EINPROGRESS)||(err == EWOULDBLOCK))) {
  1156. T_FD_SET fds;
  1157. struct timeval tv;
  1158. XFD_ZERO(&fds);
  1159. FD_SET((unsigned)sock, &fds);
  1160. T_FD_SET except;
  1161. XFD_ZERO(&except);
  1162. FD_SET((unsigned)sock, &except);
  1163. #ifdef BLOCK_POLLED_SINGLE_CONNECTS
  1164. tv.tv_sec = timeoutms / 1000;
  1165. tv.tv_usec = (timeoutms % 1000)*1000;
  1166. #else
  1167. tv.tv_sec = 0;
  1168. tv.tv_usec = 0;
  1169. #endif
  1170. CHECKSOCKRANGE(sock);
  1171. int rc = ::select( sock + 1, NULL, (fd_set *)&fds, (fd_set *)&except, &tv );
  1172. if (rc>0) {
  1173. // select succeeded - return error from socket (0 if connected)
  1174. socklen_t errlen = sizeof(err);
  1175. rc = getsockopt(sock, SOL_SOCKET, SO_ERROR, (char *)&err, &errlen); // check for error
  1176. if ((rc!=0)&&!err)
  1177. err = ERRNO(); // some implementations of getsockopt duff
  1178. if (err)
  1179. refused_sleep(tm,refuseddelay); // probably ECONNREFUSED but treat all errors same
  1180. break;
  1181. }
  1182. if (rc<0) {
  1183. err = ERRNO();
  1184. LOGERR2(err,2,"::select");
  1185. break;
  1186. }
  1187. if (!timeoutms) {
  1188. #ifdef SOCKTRACE
  1189. PROGLOG("connecttimeout: timed out");
  1190. #endif
  1191. err = -1;
  1192. break;
  1193. }
  1194. #ifdef BLOCK_POLLED_SINGLE_CONNECTS
  1195. break;
  1196. #else
  1197. if (timeoutms<polltime)
  1198. polltime = timeoutms;
  1199. Sleep(polltime); // sleeps 1-50ms (to let other threads run)
  1200. timeoutms -= polltime;
  1201. if (polltime>POLLTIME/2)
  1202. polltime = POLLTIME;
  1203. else
  1204. polltime *= 2;
  1205. #endif
  1206. }
  1207. }
  1208. {
  1209. CriticalBlock block(crit);
  1210. --connectingcount;
  1211. }
  1212. if (err==0) {
  1213. err = post_connect();
  1214. if (err==0) {
  1215. STATS.connects++;
  1216. STATS.connecttime+=usTick()-startt;
  1217. #ifdef _TRACE
  1218. char peer[256];
  1219. peer[0] = 'C';
  1220. peer[1] = '!';
  1221. strcpy(peer+2,hostname?hostname:"(NULL)");
  1222. free(tracename);
  1223. tracename = strdup(peer);
  1224. #endif
  1225. return;
  1226. }
  1227. }
  1228. errclose();
  1229. }
  1230. #ifdef SOCKTRACE
  1231. PROGLOG("connect_wait: failed %d",err);
  1232. #endif
  1233. STATS.failedconnects++;
  1234. STATS.failedconnecttime+=usTick()-startt;
  1235. THROWJSOCKEXCEPTION(JSOCKERR_connection_failed);
  1236. }
  1237. ISocket* ISocket::connect_wait( const SocketEndpoint &ep, unsigned timems)
  1238. {
  1239. if (ep.isNull()||(ep.port==0))
  1240. THROWJSOCKEXCEPTION2(JSOCKERR_bad_address);
  1241. Owned<CSocket> sock = new CSocket(ep,sm_tcp,NULL);
  1242. sock->connect_wait(timems);
  1243. return sock.getClear();
  1244. }
  1245. void CSocket::udpconnect()
  1246. {
  1247. DEFINE_SOCKADDR(u);
  1248. if (targetip.isNull()) {
  1249. set_return_addr(hostport,hostname);
  1250. targetip.ipset(returnep);
  1251. }
  1252. socklen_t ul = setSockAddr(u,targetip,hostport);
  1253. sock = ::socket(u.sa.sa_family, SOCK_DGRAM, targetip.isIp4()?0:PF_INET6);
  1254. #ifdef SOCKTRACE
  1255. PROGLOG("SOCKTRACE: udp connected socket %x %d (%x)", sock, sock, this);
  1256. #endif
  1257. STATS.activesockets++;
  1258. if (sock == INVALID_SOCKET) {
  1259. THROWJSOCKEXCEPTION(ERRNO());
  1260. }
  1261. int res = ::connect(sock, &u.sa, ul);
  1262. if (res != 0) { // works for UDP
  1263. closesock();
  1264. THROWJSOCKEXCEPTION(JSOCKERR_connection_failed);
  1265. }
  1266. nagling = false; // means nothing for UDP
  1267. state = ss_open;
  1268. #ifdef _TRACE
  1269. char peer[256];
  1270. peer[0] = 'C';
  1271. peer[1] = '!';
  1272. strcpy(peer+2,hostname?hostname:"(NULL)");
  1273. free(tracename);
  1274. tracename = strdup(peer);
  1275. #endif
  1276. }
  1277. int CSocket::wait_read(unsigned timeout)
  1278. {
  1279. int ret = 0;
  1280. while (sock!=INVALID_SOCKET) {
  1281. T_FD_SET fds;
  1282. XFD_ZERO(&fds);
  1283. FD_SET((unsigned)sock, &fds);
  1284. CHECKSOCKRANGE(sock);
  1285. if (timeout==WAIT_FOREVER) {
  1286. ret = ::select( sock + 1, (fd_set *)&fds, NULL, NULL, NULL );
  1287. }
  1288. else {
  1289. struct timeval tv;
  1290. tv.tv_sec = timeout / 1000;
  1291. tv.tv_usec = (timeout % 1000)*1000;
  1292. ret = ::select( sock + 1, (fd_set *)&fds, NULL, NULL, &tv );
  1293. }
  1294. if (ret==SOCKET_ERROR) {
  1295. int err = ERRNO();
  1296. if (err!=EINTRCALL) { // else retry (should adjust time but for our usage don't think it matters that much)
  1297. LOGERR2(err,1,"wait_read");
  1298. break;
  1299. }
  1300. }
  1301. else
  1302. break;
  1303. }
  1304. return ret;
  1305. }
  1306. int CSocket::wait_write(unsigned timeout)
  1307. {
  1308. int ret = 0;
  1309. while (sock!=INVALID_SOCKET) {
  1310. T_FD_SET fds;
  1311. XFD_ZERO(&fds);
  1312. FD_SET((unsigned)sock, &fds);
  1313. CHECKSOCKRANGE(sock);
  1314. if (timeout==WAIT_FOREVER) {
  1315. ret = ::select( sock + 1, NULL, (fd_set *)&fds, NULL, NULL );
  1316. }
  1317. else {
  1318. struct timeval tv;
  1319. tv.tv_sec = timeout / 1000;
  1320. tv.tv_usec = (timeout % 1000)*1000;
  1321. ret = ::select( sock + 1, NULL, (fd_set *)&fds, NULL, &tv );
  1322. }
  1323. if (ret==SOCKET_ERROR) {
  1324. int err = ERRNO();
  1325. if (err!=EINTRCALL) { // else retry (should adjust time but for our usage don't think it matters that much)
  1326. LOGERR2(err,1,"wait_write");
  1327. break;
  1328. }
  1329. }
  1330. else
  1331. break;
  1332. }
  1333. return ret;
  1334. }
  1335. void CSocket::readtms(void* buf, size32_t min_size, size32_t max_size, size32_t &size_read,
  1336. unsigned timeoutms)
  1337. {
  1338. if (timeoutms == WAIT_FOREVER) {
  1339. read(buf,min_size, max_size, size_read,WAIT_FOREVER);
  1340. return;
  1341. }
  1342. unsigned startt=usTick();
  1343. size_read = 0;
  1344. if (state != ss_open) {
  1345. THROWJSOCKEXCEPTION(JSOCKERR_not_opened);
  1346. }
  1347. unsigned start;
  1348. unsigned timeleft;
  1349. start = msTick();
  1350. timeleft = timeoutms;
  1351. do {
  1352. int rc = wait_read(timeleft);
  1353. if (rc < 0) {
  1354. THROWJSOCKEXCEPTION(ERRNO());
  1355. }
  1356. if (rc == 0) {
  1357. THROWJSOCKEXCEPTION(JSOCKERR_timeout_expired);
  1358. }
  1359. unsigned elapsed = (msTick()-start);
  1360. if (elapsed<timeoutms)
  1361. timeleft = timeoutms-elapsed;
  1362. else
  1363. timeleft = 0;
  1364. unsigned retrycount=100;
  1365. EintrRetry:
  1366. if (sockmode==sm_udp_server) { // udp server
  1367. DEFINE_SOCKADDR(u);
  1368. socklen_t ul=sizeof(u);
  1369. rc = recvfrom(sock, (char*)buf + size_read, max_size - size_read, 0, &u.sa,&ul);
  1370. getSockAddrEndpoint(u,ul,returnep);
  1371. }
  1372. else {
  1373. rc = recv(sock, (char*)buf + size_read, max_size - size_read, 0);
  1374. }
  1375. if (rc < 0) {
  1376. int err = ERRNO();
  1377. if (BADSOCKERR(err)) {
  1378. // don't think this should happen but convert to same as shutdown while investigation
  1379. LOGERR2(err,1,"Socket closed during read");
  1380. rc = 0;
  1381. }
  1382. else if ((err==EINTRCALL)&&(retrycount--!=0)) {
  1383. LOGERR2(err,1,"EINTR retrying");
  1384. goto EintrRetry;
  1385. }
  1386. else {
  1387. LOGERR2(err,1,"readtms");
  1388. if ((err==ECONNRESET)||(err==EINTRCALL)||(err==ECONNABORTED)) {
  1389. errclose();
  1390. err = JSOCKERR_broken_pipe;
  1391. }
  1392. THROWJSOCKEXCEPTION(err);
  1393. }
  1394. }
  1395. if (rc == 0) {
  1396. state = ss_shutdown;
  1397. if (min_size==0)
  1398. break; // if min_read is 0 return 0 if socket closed
  1399. THROWJSOCKEXCEPTION(JSOCKERR_graceful_close);
  1400. }
  1401. size_read += rc;
  1402. } while (size_read < min_size);
  1403. STATS.reads++;
  1404. STATS.readsize += size_read;
  1405. STATS.readtime+=usTick()-startt;
  1406. }
  1407. void CSocket::read(void* buf, size32_t min_size, size32_t max_size, size32_t &size_read,
  1408. unsigned timeoutsecs)
  1409. {
  1410. unsigned startt=usTick();
  1411. size_read = 0;
  1412. unsigned start;
  1413. unsigned timeleft = 0;
  1414. if (state != ss_open) {
  1415. THROWJSOCKEXCEPTION(JSOCKERR_not_opened);
  1416. }
  1417. if (timeoutsecs != WAIT_FOREVER) {
  1418. start = (unsigned)time(NULL);
  1419. timeleft = timeoutsecs;
  1420. }
  1421. do {
  1422. int rc;
  1423. if (timeoutsecs != WAIT_FOREVER) {
  1424. rc = wait_read(timeleft*1000);
  1425. if (rc < 0) {
  1426. THROWJSOCKEXCEPTION(ERRNO());
  1427. }
  1428. if (rc == 0) {
  1429. THROWJSOCKEXCEPTION(JSOCKERR_timeout_expired);
  1430. }
  1431. unsigned elapsed = ((unsigned)time(NULL))-start;
  1432. if (elapsed<timeoutsecs)
  1433. timeleft = timeoutsecs-elapsed;
  1434. else
  1435. timeleft = 0;
  1436. }
  1437. unsigned retrycount=100;
  1438. EintrRetry:
  1439. if (sockmode==sm_udp_server) { // udp server
  1440. DEFINE_SOCKADDR(u);
  1441. socklen_t ul=sizeof(u.sin);
  1442. rc = recvfrom(sock, (char*)buf + size_read, max_size - size_read, 0, &u.sa,&ul);
  1443. getSockAddrEndpoint(u,ul,returnep);
  1444. }
  1445. else {
  1446. rc = recv(sock, (char*)buf + size_read, max_size - size_read, 0);
  1447. }
  1448. if (rc < 0) {
  1449. int err = ERRNO();
  1450. if (BADSOCKERR(err)) {
  1451. // don't think this should happen but convert to same as shutdown while investigation
  1452. LOGERR2(err,3,"Socket closed during read");
  1453. rc = 0;
  1454. }
  1455. else if ((err==EINTRCALL)&&(retrycount--!=0)) {
  1456. if (sock==INVALID_SOCKET)
  1457. rc = 0; // convert an EINTR after closed to a graceful close
  1458. else {
  1459. LOGERR2(err,3,"EINTR retrying");
  1460. goto EintrRetry;
  1461. }
  1462. }
  1463. else {
  1464. LOGERR2(err,3,"read");
  1465. if ((err==ECONNRESET)||(err==EINTRCALL)||(err==ECONNABORTED)) {
  1466. errclose();
  1467. err = JSOCKERR_broken_pipe;
  1468. }
  1469. THROWJSOCKEXCEPTION(err);
  1470. }
  1471. }
  1472. if (rc == 0) {
  1473. state = ss_shutdown;
  1474. if (min_size==0)
  1475. break; // if min_read is 0 return 0 if socket closed
  1476. THROWJSOCKEXCEPTION(JSOCKERR_graceful_close);
  1477. }
  1478. size_read += rc;
  1479. } while (size_read < min_size);
  1480. STATS.reads++;
  1481. STATS.readsize += size_read;
  1482. STATS.readtime+=usTick()-startt;
  1483. }
  1484. void CSocket::read(void* buf, size32_t size)
  1485. {
  1486. if (!size)
  1487. return;
  1488. unsigned startt=usTick();
  1489. size32_t size_read=size;
  1490. if (state != ss_open) {
  1491. THROWJSOCKEXCEPTION(JSOCKERR_not_opened);
  1492. }
  1493. do {
  1494. unsigned retrycount=100;
  1495. EintrRetry:
  1496. int rc;
  1497. if (sockmode==sm_udp_server) { // udp server
  1498. DEFINE_SOCKADDR(u);
  1499. socklen_t ul=sizeof(u.sin);
  1500. rc = recvfrom(sock, (char*)buf, size, 0, &u.sa,&ul);
  1501. getSockAddrEndpoint(u,ul,returnep);
  1502. }
  1503. else {
  1504. rc = recv(sock, (char*)buf, size, 0);
  1505. }
  1506. if (rc < 0) {
  1507. int err = ERRNO();
  1508. if (BADSOCKERR(err)) {
  1509. // don't think this should happen but convert to same as shutdown while investigation
  1510. LOGERR2(err,5,"Socket closed during read");
  1511. rc = 0;
  1512. }
  1513. else if ((err==EINTRCALL)&&(retrycount--!=0)) {
  1514. LOGERR2(err,5,"EINTR retrying");
  1515. goto EintrRetry;
  1516. }
  1517. else {
  1518. LOGERR2(err,5,"read");
  1519. if ((err==ECONNRESET)||(err==EINTRCALL)||(err==ECONNABORTED)) {
  1520. errclose();
  1521. err = JSOCKERR_broken_pipe;
  1522. }
  1523. THROWJSOCKEXCEPTION(err);
  1524. }
  1525. }
  1526. if (rc == 0) {
  1527. state = ss_shutdown;
  1528. THROWJSOCKEXCEPTION(JSOCKERR_graceful_close);
  1529. }
  1530. buf = (char*)buf + rc;
  1531. size -= rc;
  1532. } while (size != 0);
  1533. STATS.reads++;
  1534. STATS.readsize += size_read;
  1535. STATS.readtime+=usTick()-startt;
  1536. }
  1537. size32_t CSocket::write(void const* buf, size32_t size)
  1538. {
  1539. if (size==0)
  1540. return 0;
  1541. unsigned startt=usTick();
  1542. size32_t size_writ = size;
  1543. if (state != ss_open) {
  1544. THROWJSOCKEXCEPTION(JSOCKERR_not_opened);
  1545. }
  1546. size32_t res=0;
  1547. do {
  1548. unsigned retrycount=100;
  1549. EintrRetry:
  1550. int rc;
  1551. if (sockmode==sm_udp_server) { // udp server
  1552. DEFINE_SOCKADDR(u);
  1553. socklen_t ul = setSockAddr(u,returnep,returnep.port);
  1554. rc = sendto(sock, (char*)buf, size, 0, &u.sa, ul);
  1555. }
  1556. else {
  1557. rc = send(sock, (char*)buf, size, SEND_FLAGS);
  1558. }
  1559. if (rc < 0) {
  1560. int err=ERRNO();
  1561. if (BADSOCKERR(err)) {
  1562. LOGERR2(err,7,"Socket closed during write");
  1563. rc = 0;
  1564. }
  1565. else if ((err==EINTRCALL)&&(retrycount--!=0)) {
  1566. LOGERR2(err,7,"EINTR retrying");
  1567. goto EintrRetry;
  1568. }
  1569. else {
  1570. if (((sockmode==sm_multicast)||(sockmode==sm_udp))&&(err==ECONNREFUSED))
  1571. break; // ignore
  1572. LOGERR2(err,7,"write");
  1573. if ((err==ECONNRESET)||(err==EINTRCALL)||(err==ECONNABORTED)
  1574. #ifndef _WIN32
  1575. ||(err==EPIPE)||(err==ETIMEDOUT) // linux can raise these on broken pipe
  1576. #endif
  1577. ) {
  1578. errclose();
  1579. err = JSOCKERR_broken_pipe;
  1580. }
  1581. if ((err == EWOULDBLOCK) && nonblocking)
  1582. break;
  1583. THROWJSOCKEXCEPTION(err);
  1584. }
  1585. }
  1586. res += rc;
  1587. if (rc == 0) {
  1588. state = ss_shutdown;
  1589. THROWJSOCKEXCEPTION(JSOCKERR_graceful_close);
  1590. }
  1591. if (nonblocking)
  1592. break;
  1593. buf = (char*)buf + rc;
  1594. size -= rc;
  1595. } while (size != 0);
  1596. STATS.writes++;
  1597. STATS.writesize += size_writ;
  1598. STATS.writetime+=usTick()-startt;
  1599. return res;
  1600. }
  1601. bool CSocket::check_connection()
  1602. {
  1603. if (state != ss_open)
  1604. return false;
  1605. unsigned retrycount=100;
  1606. EintrRetry:
  1607. int rc;
  1608. if (sockmode==sm_udp_server) { // udp server
  1609. DEFINE_SOCKADDR(u);
  1610. socklen_t ul = setSockAddr(u,returnep,returnep.port);
  1611. rc = sendto(sock, NULL, 0, 0, &u.sa, ul);
  1612. }
  1613. else {
  1614. rc = send(sock, NULL, 0, SEND_FLAGS);
  1615. }
  1616. if (rc < 0) {
  1617. int err=ERRNO();
  1618. if ((err==EINTRCALL)&&(retrycount--!=0)) {
  1619. LOGERR2(err,7,"EINTR retrying");
  1620. goto EintrRetry;
  1621. }
  1622. else
  1623. return false;
  1624. }
  1625. return true;
  1626. }
  1627. size32_t CSocket::udp_write_to(SocketEndpoint &ep, void const* buf, size32_t size)
  1628. {
  1629. if (size==0)
  1630. return 0;
  1631. unsigned startt=usTick();
  1632. size32_t size_writ = size;
  1633. if (state != ss_open) {
  1634. THROWJSOCKEXCEPTION(JSOCKERR_not_opened);
  1635. }
  1636. size32_t res=0;
  1637. DEFINE_SOCKADDR(u);
  1638. loop {
  1639. socklen_t ul = setSockAddr(u,ep,ep.port);
  1640. int rc = sendto(sock, (char*)buf, size, 0, &u.sa, ul);
  1641. if (rc < 0) {
  1642. int err=ERRNO();
  1643. if (((sockmode==sm_multicast)||(sockmode==sm_udp))&&(err==ECONNREFUSED))
  1644. break; // ignore
  1645. if (err!=EINTRCALL) {
  1646. THROWJSOCKEXCEPTION(err);
  1647. }
  1648. }
  1649. else {
  1650. res = (size32_t)rc;
  1651. break;
  1652. }
  1653. }
  1654. STATS.writes++;
  1655. STATS.writesize += res;
  1656. STATS.writetime+=usTick()-startt;
  1657. return res;
  1658. }
  1659. size32_t CSocket::write_multiple(unsigned num,const void **buf, size32_t *size)
  1660. {
  1661. assertex(sockmode!=sm_udp_server);
  1662. assertex(!nonblocking);
  1663. if (num==1)
  1664. return write(buf[0],size[0]);
  1665. size32_t total = 0;
  1666. unsigned i;
  1667. for (i=0;i<num;i++)
  1668. total += size[i];
  1669. if (total==0)
  1670. return 0;
  1671. unsigned startt=usTick();
  1672. if (state != ss_open) {
  1673. THROWJSOCKEXCEPTION(JSOCKERR_not_opened);
  1674. }
  1675. size32_t res=0;
  1676. #ifdef _WIN32
  1677. WSABUF *bufs = (WSABUF *)alloca(sizeof(WSABUF)*num);
  1678. for (i=0;i<num;i++) {
  1679. bufs[i].buf = (char *)buf[i];
  1680. bufs[i].len = size[i];
  1681. }
  1682. unsigned retrycount=100;
  1683. EintrRetry:
  1684. DWORD sent;
  1685. if (WSASendTo(sock,bufs,num,&sent,0,NULL,0,NULL,NULL)==SOCKET_ERROR) {
  1686. int err=ERRNO();
  1687. if (BADSOCKERR(err)) {
  1688. LOGERR2(err,8,"Socket closed during write");
  1689. sent = 0;
  1690. }
  1691. else if ((err==EINTRCALL)&&(retrycount--!=0)) {
  1692. LOGERR2(err,8,"EINTR retrying");
  1693. goto EintrRetry;
  1694. }
  1695. else {
  1696. LOGERR2(err,8,"write_multiple");
  1697. if ((err==ECONNRESET)||(err==EINTRCALL)||(err==ECONNABORTED)||(err==ETIMEDOUT)) {
  1698. errclose();
  1699. err = JSOCKERR_broken_pipe;
  1700. }
  1701. THROWJSOCKEXCEPTION(err);
  1702. }
  1703. }
  1704. if (sent == 0) {
  1705. state = ss_shutdown;
  1706. THROWJSOCKEXCEPTION(JSOCKERR_graceful_close);
  1707. }
  1708. res = sent;
  1709. #else
  1710. #ifdef USE_CORK
  1711. if (total>1024) {
  1712. class Copt
  1713. {
  1714. T_SOCKET sock;
  1715. bool nagling;
  1716. public:
  1717. Copt(T_SOCKET _sock,bool _nagling)
  1718. {
  1719. nagling = _nagling;
  1720. int enabled = 1;
  1721. int disabled = 0;
  1722. if (!nagling)
  1723. setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char*)&disabled, sizeof(disabled));
  1724. setsockopt(sock, IPPROTO_TCP, TCP_CORK, (char*)&enabled, sizeof(enabled));
  1725. }
  1726. ~Copt()
  1727. {
  1728. int enabled = 1;
  1729. int disabled = 0;
  1730. setsockopt(sock, IPPROTO_TCP, TCP_CORK, (char*)&disabled, sizeof(disabled));
  1731. if (!nagling)
  1732. setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char*)&enabled, sizeof(enabled));
  1733. }
  1734. } copt(sock,nagling);
  1735. for (i=0;i<num;i++)
  1736. res += write(buf[i],size[i]);
  1737. }
  1738. else {
  1739. byte b[1024];
  1740. byte *p=b;
  1741. for (i=0;i<num;i++) {
  1742. memcpy(p,buf[i],size[i]);
  1743. p += size[i];
  1744. }
  1745. res = write(b,total);
  1746. }
  1747. #else
  1748. // send in equal chunks about 64K
  1749. unsigned n = (total+0xffff)/0x10000;
  1750. size32_t outbufsize = (total+n-1)/n;
  1751. MemoryAttr ma;
  1752. byte *outbuf = (byte *)ma.allocate(outbufsize);
  1753. size32_t outwr = 0;
  1754. i = 0;
  1755. size32_t os = 0;
  1756. size32_t left = total;
  1757. byte *b = NULL;
  1758. size32_t s=0;
  1759. loop {
  1760. while (!s&&(i<num)) {
  1761. b = (byte *)buf[i];
  1762. s = size[i];
  1763. i++;
  1764. }
  1765. if ((os==0)&&(s==left)) {
  1766. write(b,s); // go for it
  1767. break;
  1768. }
  1769. else {
  1770. size32_t cpy = outbufsize-os;
  1771. if (cpy>s)
  1772. cpy = s;
  1773. memcpy(outbuf+os,b,cpy);
  1774. os += cpy;
  1775. left -= cpy;
  1776. s -= cpy;
  1777. b += cpy;
  1778. if (left==0) {
  1779. write(outbuf,os);
  1780. break;
  1781. }
  1782. else if (os==outbufsize) {
  1783. write(outbuf,os);
  1784. os = 0;
  1785. }
  1786. }
  1787. }
  1788. #endif
  1789. #endif
  1790. STATS.writes++;
  1791. STATS.writesize += res;
  1792. STATS.writetime+=usTick()-startt;
  1793. return res;
  1794. }
  1795. bool CSocket::send_block(const void *blk,size32_t sz)
  1796. {
  1797. unsigned startt=usTick();
  1798. #ifdef TRACE_SLOW_BLOCK_TRANSFER
  1799. unsigned startt2 = startt;
  1800. unsigned startt3 = startt;
  1801. #endif
  1802. if (blockflags&BF_SYNC_TRANSFER_PULL) {
  1803. size32_t rd;
  1804. bool eof = true;
  1805. readtms(&eof,sizeof(eof),sizeof(eof),rd,blocktimeoutms);
  1806. if (eof)
  1807. return false;
  1808. #ifdef TRACE_SLOW_BLOCK_TRANSFER
  1809. startt2=usTick();
  1810. #endif
  1811. }
  1812. if (!blk||!sz) {
  1813. sz = 0;
  1814. write(&sz,sizeof(sz));
  1815. try {
  1816. bool reply;
  1817. size32_t rd;
  1818. readtms(&reply,sizeof(reply),sizeof(reply),rd,blocktimeoutms);
  1819. }
  1820. catch (IJSOCK_Exception *e) {
  1821. if ((e->errorCode()!=JSOCKERR_broken_pipe)&&(e->errorCode()!=JSOCKERR_graceful_close))
  1822. EXCLOG(e,"CSocket::send_block");
  1823. e->Release();
  1824. }
  1825. return false;
  1826. }
  1827. size32_t rsz=sz;
  1828. _WINREV(rsz);
  1829. write(&rsz,sizeof(rsz));
  1830. if (blockflags&BF_SYNC_TRANSFER_PUSH) {
  1831. #ifdef TRACE_SLOW_BLOCK_TRANSFER
  1832. startt2=usTick();
  1833. #endif
  1834. size32_t rd;
  1835. bool eof = true;
  1836. readtms(&eof,sizeof(eof),sizeof(eof),rd,blocktimeoutms);
  1837. if (eof)
  1838. return false;
  1839. #ifdef TRACE_SLOW_BLOCK_TRANSFER
  1840. startt3=usTick();
  1841. #endif
  1842. }
  1843. write(blk,sz);
  1844. if (blockflags&BF_RELIABLE_TRANSFER) {
  1845. bool isok=false;
  1846. size32_t rd;
  1847. readtms(&isok,sizeof(isok),sizeof(isok),rd,blocktimeoutms);
  1848. if (!isok)
  1849. return false;
  1850. }
  1851. unsigned nowt = usTick();
  1852. unsigned elapsed = nowt-startt;
  1853. STATS.blocksendtime+=elapsed;
  1854. STATS.numblocksends++;
  1855. STATS.blocksendsize+=sz;
  1856. if (elapsed>STATS.longestblocksend) {
  1857. STATS.longestblocksend = elapsed;
  1858. STATS.longestblocksize = sz;
  1859. }
  1860. #ifdef TRACE_SLOW_BLOCK_TRANSFER
  1861. static unsigned lastreporttime=0;
  1862. static unsigned lastexceeded=0;
  1863. if (elapsed>1000000*60) { // over 1min
  1864. unsigned t = msTick();
  1865. if (1) { //((t-lastreporttime>1000*60) || // only report once per min
  1866. // (elapsed>lastexceeded*2)) {
  1867. lastexceeded = elapsed;
  1868. lastreporttime = t;
  1869. WARNLOG("send_block took %ds to %s (%d,%d,%d)",elapsed/1000000,tracename,startt2-startt,startt3-startt2,nowt-startt3);
  1870. }
  1871. }
  1872. #endif
  1873. return true;
  1874. }
  1875. #ifdef USERECVSEM
  1876. class CSemProtect
  1877. {
  1878. Semaphore *sem;
  1879. bool *owned;
  1880. public:
  1881. CSemProtect() { clear(); }
  1882. ~CSemProtect()
  1883. {
  1884. if (sem&&*owned) {
  1885. *owned = false;
  1886. sem->signal();
  1887. }
  1888. }
  1889. void set(Semaphore *_sem,bool *_owned)
  1890. {
  1891. sem = _sem;
  1892. owned = _owned;
  1893. }
  1894. bool wait(Semaphore *_sem,bool *_owned,unsigned timeout) {
  1895. if (!*_owned&&!_sem->wait(timeout))
  1896. return false;
  1897. *_owned = true;
  1898. set(_sem,_owned);
  1899. return true;
  1900. }
  1901. void clear() { sem = NULL; owned = NULL; }
  1902. };
  1903. #endif
  1904. size32_t CSocket::receive_block_size()
  1905. {
  1906. // assumed always paired with receive_block
  1907. if (nextblocksize) {
  1908. if (blockflags&BF_SYNC_TRANSFER_PULL) {
  1909. bool eof=false;
  1910. write(&eof,sizeof(eof));
  1911. }
  1912. size32_t rd;
  1913. readtms(&nextblocksize,sizeof(nextblocksize),sizeof(nextblocksize),rd,blocktimeoutms);
  1914. _WINREV(nextblocksize);
  1915. if (nextblocksize==0) { // confirm eof
  1916. try {
  1917. bool confirm=true;
  1918. write(&confirm,sizeof(confirm));
  1919. }
  1920. catch (IJSOCK_Exception *e) {
  1921. if ((e->errorCode()!=JSOCKERR_broken_pipe)&&(e->errorCode()!=JSOCKERR_graceful_close))
  1922. EXCLOG(e,"receive_block_size");
  1923. e->Release();
  1924. }
  1925. }
  1926. else if (blockflags&BF_SYNC_TRANSFER_PUSH) { // leaves receiveblocksem clear
  1927. #ifdef USERECVSEM
  1928. CSemProtect semprot; // this will catch exception in write
  1929. while (!semprot.wait(&receiveblocksem,&receiveblocksemowned,60*1000*5))
  1930. WARNLOG("Receive block stalled");
  1931. #endif
  1932. bool eof=false;
  1933. write(&eof,sizeof(eof));
  1934. #ifdef USERECVSEM
  1935. semprot.clear();
  1936. #endif
  1937. }
  1938. }
  1939. return nextblocksize;
  1940. }
  1941. size32_t CSocket::receive_block(void *blk,size32_t maxsize)
  1942. {
  1943. #ifdef USERECVSEM
  1944. CSemProtect semprot; // this will catch exceptions
  1945. #endif
  1946. size32_t sz = nextblocksize;
  1947. if (sz) {
  1948. if (sz==UINT_MAX) { // need to get size
  1949. if (!blk||!maxsize) {
  1950. if (blockflags&BF_SYNC_TRANSFER_PUSH) { // ignore block size
  1951. size32_t rd;
  1952. readtms(&nextblocksize,sizeof(nextblocksize),sizeof(nextblocksize),rd,blocktimeoutms);
  1953. }
  1954. if (blockflags&(BF_SYNC_TRANSFER_PULL|BF_SYNC_TRANSFER_PUSH)) { // signal eof
  1955. bool eof=true;
  1956. write(&eof,sizeof(eof));
  1957. nextblocksize = 0;
  1958. return 0;
  1959. }
  1960. }
  1961. sz = receive_block_size();
  1962. if (!sz)
  1963. return 0;
  1964. }
  1965. unsigned startt=usTick(); // include sem block but not initial handshake
  1966. #ifdef USERECVSEM
  1967. if (blockflags&BF_SYNC_TRANSFER_PUSH) // read_block_size sets semaphore
  1968. semprot.set(&receiveblocksem,&receiveblocksemowned); // this will reset semaphore on exit
  1969. #endif
  1970. nextblocksize = UINT_MAX;
  1971. size32_t rd;
  1972. if (sz<=maxsize) {
  1973. readtms(blk,sz,sz,rd,blocktimeoutms);
  1974. }
  1975. else { // truncate
  1976. readtms(blk,maxsize,maxsize,rd,blocktimeoutms);
  1977. sz -= maxsize;
  1978. void *tmp=malloc(sz);
  1979. readtms(tmp,sz,sz,rd,blocktimeoutms);
  1980. free(tmp);
  1981. sz = maxsize;
  1982. }
  1983. if (blockflags&BF_RELIABLE_TRANSFER) {
  1984. bool isok=true;
  1985. write(&isok,sizeof(isok));
  1986. }
  1987. unsigned elapsed = usTick()-startt;
  1988. STATS.blockrecvtime+=elapsed;
  1989. STATS.numblockrecvs++;
  1990. STATS.blockrecvsize+=sz;
  1991. }
  1992. return sz;
  1993. }
  1994. void CSocket::set_block_mode(unsigned flags, size32_t recsize, unsigned _timeoutms)
  1995. {
  1996. blockflags = flags;
  1997. nextblocksize = UINT_MAX;
  1998. blocktimeoutms = _timeoutms?_timeoutms:WAIT_FOREVER;
  1999. }
  2000. void CSocket::shutdown(unsigned mode)
  2001. {
  2002. if (state == ss_open) {
  2003. state = ss_shutdown;
  2004. #ifdef SOCKTRACE
  2005. PROGLOG("SOCKTRACE: shutdown(%d) socket %x %d (%x)", mode, sock, sock, this);
  2006. #endif
  2007. int rc = ::shutdown(sock, mode);
  2008. if (rc != 0) {
  2009. int err=ERRNO();
  2010. if (err==ENOTCONN) {
  2011. LOGERR2(err,9,"shutdown");
  2012. err = JSOCKERR_broken_pipe;
  2013. }
  2014. THROWJSOCKEXCEPTION(err);
  2015. }
  2016. }
  2017. }
  2018. void CSocket::errclose()
  2019. {
  2020. #ifdef USERECVSEM
  2021. if (receiveblocksemowned) {
  2022. receiveblocksemowned = false;
  2023. receiveblocksem.signal();
  2024. }
  2025. #endif
  2026. if (state != ss_close) {
  2027. state = ss_close;
  2028. #ifdef SOCKTRACE
  2029. PROGLOG("SOCKTRACE: errclose socket %x %d (%x)", sock, sock, this);
  2030. #endif
  2031. if (mcastreq)
  2032. setsockopt(sock, IPPROTO_IP, IP_DROP_MEMBERSHIP,(char*)mcastreq,sizeof(*mcastreq));
  2033. closesock();
  2034. }
  2035. }
  2036. void CSocket::close()
  2037. {
  2038. #ifdef USERECVSEM
  2039. if (receiveblocksemowned) {
  2040. receiveblocksemowned = false;
  2041. receiveblocksem.signal();
  2042. }
  2043. #endif
  2044. if (state != ss_close) {
  2045. #ifdef SOCKTRACE
  2046. PROGLOG("SOCKTRACE: close socket %x %d (%x)", sock, sock, this);
  2047. #endif
  2048. state = ss_close;
  2049. if (mcastreq)
  2050. setsockopt(sock, IPPROTO_IP, IP_DROP_MEMBERSHIP,(char*)mcastreq,sizeof(*mcastreq));
  2051. if (closesock() != 0) {
  2052. THROWJSOCKEXCEPTION(ERRNO());
  2053. }
  2054. }
  2055. }
  2056. size32_t CSocket::get_max_send_size()
  2057. {
  2058. size32_t maxsend=0;
  2059. socklen_t size = sizeof(maxsend);
  2060. #if _WIN32
  2061. getsockopt(sock, SOL_SOCKET, SO_MAX_MSG_SIZE, (char *) &maxsend, &size);
  2062. #else
  2063. getsockopt(sock, SOL_SOCKET, SO_SNDBUF, (char *) &maxsend, &size); // not the same but closest I can find
  2064. #endif
  2065. return maxsend;
  2066. }
  2067. size32_t CSocket::get_send_buffer_size()
  2068. {
  2069. size32_t maxsend=0;
  2070. socklen_t size = sizeof(maxsend);
  2071. getsockopt(sock, SOL_SOCKET, SO_SNDBUF, (char *) &maxsend, &size);
  2072. return maxsend;
  2073. }
  2074. void CSocket::set_send_buffer_size(size32_t maxsend)
  2075. {
  2076. if (setsockopt(sock, SOL_SOCKET, SO_SNDBUF, (char *)&maxsend, sizeof(maxsend))!=0) {
  2077. LOGERR2(ERRNO(),1,"setsockopt(SO_SNDBUF)");
  2078. }
  2079. #ifdef CHECKBUFSIZE
  2080. size32_t v;
  2081. if (getsockopt(sock, SOL_SOCKET, SO_SNDBUF, (char *)&v, sizeof(v))!=0) {
  2082. LOGERR2(ERRNO(),1,"getsockopt(SO_SNDBUF)");
  2083. }
  2084. if (v!=maxsend)
  2085. WARNLOG("set_send_buffer_size requested %d, got %d",maxsend,v);
  2086. #endif
  2087. }
  2088. size32_t CSocket::get_receive_buffer_size()
  2089. {
  2090. size32_t max=0;
  2091. socklen_t size = sizeof(max);
  2092. getsockopt(sock, SOL_SOCKET, SO_RCVBUF, (char *) &max, &size);
  2093. return max;
  2094. }
  2095. void CSocket::set_receive_buffer_size(size32_t max)
  2096. {
  2097. if (setsockopt(sock, SOL_SOCKET, SO_RCVBUF, (char *)&max, sizeof(max))!=0) {
  2098. LOGERR2(ERRNO(),1,"setsockopt(SO_RCVBUF)");
  2099. }
  2100. #ifdef CHECKBUFSIZE
  2101. size32_t v;
  2102. if (getsockopt(sock, SOL_SOCKET, SO_RCVBUF, (char *)&v, sizeof(v))!=0) {
  2103. LOGERR2(ERRNO(),1,"getsockopt(SO_RCVBUF)");
  2104. }
  2105. if (v<max)
  2106. WARNLOG("set_receive_buffer_size requested %d, got %d",max,v);
  2107. #endif
  2108. }
  2109. bool CSocket::join_multicast_group(SocketEndpoint &ep)
  2110. {
  2111. StringBuffer s;
  2112. ep.getIpText(s); // will improve later
  2113. MCASTREQ req(s.str());
  2114. if (setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP,(char*)&req, sizeof(req))!=0) {
  2115. return false;
  2116. }
  2117. return true;
  2118. }
  2119. bool CSocket::leave_multicast_group(SocketEndpoint &ep)
  2120. {
  2121. StringBuffer s;
  2122. ep.getIpText(s); // will improve later
  2123. MCASTREQ req(s.str());
  2124. if (setsockopt(sock, IPPROTO_IP, IP_DROP_MEMBERSHIP,(char*)&req, sizeof(req))!=0) {
  2125. return false;
  2126. }
  2127. return true;
  2128. }
  2129. CSocket::~CSocket()
  2130. {
  2131. if (owned)
  2132. close();
  2133. free(hostname);
  2134. hostname = NULL;
  2135. #ifdef _TRACE
  2136. free(tracename);
  2137. tracename = NULL;
  2138. #endif
  2139. delete mcastreq;
  2140. }
  2141. CSocket::CSocket(const SocketEndpoint &ep,SOCKETMODE smode,const char *name)
  2142. {
  2143. state = ss_close;
  2144. nonblocking = false;
  2145. #ifdef USERECVSEM
  2146. receiveblocksemowned = false;
  2147. #endif
  2148. nagling = true; // until turned off
  2149. hostport = ep.port;
  2150. hostname = NULL;
  2151. mcastreq = NULL;
  2152. tracename = NULL;
  2153. StringBuffer tmp;
  2154. if ((smode==sm_multicast_server)&&(name&&*name)) {
  2155. mcastreq = new MCASTREQ(name);
  2156. }
  2157. else {
  2158. if (!name&&!ep.isNull())
  2159. name = ep.getIpText(tmp).str();
  2160. hostname = name?strdup(name):NULL;
  2161. }
  2162. sock = INVALID_SOCKET;
  2163. sockmode = smode;
  2164. owned = true;
  2165. nextblocksize = 0;
  2166. in_accept = false;
  2167. accept_cancel_state = accept_not_cancelled;
  2168. #ifdef _TRACE
  2169. char peer[256];
  2170. peer[0] = name?'T':'S';
  2171. peer[1] = '>';
  2172. if (name)
  2173. strcpy(peer+2,name);
  2174. else {
  2175. SocketEndpoint self;
  2176. self.setLocalHost(0);
  2177. self.getUrlStr(peer+2,sizeof(peer)-2);
  2178. }
  2179. tracename = strdup(peer);
  2180. #endif
  2181. }
  2182. CSocket::CSocket(T_SOCKET new_sock,SOCKETMODE smode,bool _owned)
  2183. {
  2184. nonblocking = false;
  2185. #ifdef USERECVSEM
  2186. receiveblocksemowned = false;
  2187. #endif
  2188. nagling = true; // until turned off
  2189. sock = new_sock;
  2190. if (new_sock!=INVALID_SOCKET)
  2191. STATS.activesockets++;
  2192. hostname = NULL;
  2193. mcastreq = NULL;
  2194. hostport = 0;
  2195. tracename = NULL;
  2196. state = ss_open;
  2197. sockmode = smode;
  2198. owned = _owned;
  2199. nextblocksize = 0;
  2200. in_accept = false;
  2201. accept_cancel_state = accept_not_cancelled;
  2202. set_nagle(false);
  2203. //set_linger(DEFAULT_LINGER_TIME); -- experiment with removing this as closesocket should still endevour to send outstanding data
  2204. #ifdef _TRACE
  2205. char peer[256];
  2206. peer[0] = 'A';
  2207. peer[1] = '!';
  2208. peer_name(peer+2,sizeof(peer)-2);
  2209. tracename = strdup(peer);
  2210. #endif
  2211. }
  2212. ISocket* ISocket::create(unsigned short p,int listen_queue_size)
  2213. {
  2214. if (p==0)
  2215. THROWJSOCKEXCEPTION2(JSOCKERR_bad_address);
  2216. SocketEndpoint ep;
  2217. ep.port = p;
  2218. Owned<CSocket> sock = new CSocket(ep,sm_tcp_server,NULL);
  2219. sock->open(listen_queue_size);
  2220. return sock.getClear();
  2221. }
  2222. ISocket* ISocket::create_ip(unsigned short p,const char *host,int listen_queue_size)
  2223. {
  2224. if (p==0)
  2225. THROWJSOCKEXCEPTION2(JSOCKERR_bad_address);
  2226. SocketEndpoint ep(host,p);
  2227. Owned<CSocket> sock = new CSocket(ep,sm_tcp_server,host);
  2228. sock->open(listen_queue_size);
  2229. return sock.getClear();
  2230. }
  2231. ISocket* ISocket::udp_create(unsigned short p)
  2232. {
  2233. SocketEndpoint ep;
  2234. ep.port=p;
  2235. Owned<CSocket> sock = new CSocket(ep,(p==0)?sm_udp:sm_udp_server,NULL);
  2236. sock->open(0);
  2237. return sock.getClear();
  2238. }
  2239. ISocket* ISocket::multicast_create(unsigned short p, const char *mcip)
  2240. {
  2241. if (p==0)
  2242. THROWJSOCKEXCEPTION2(JSOCKERR_bad_address);
  2243. SocketEndpoint ep(mcip,p);
  2244. Owned<CSocket> sock = new CSocket(ep,sm_multicast_server,mcip);
  2245. sock->open(0,true);
  2246. return sock.getClear();
  2247. }
  2248. ISocket* ISocket::multicast_create(unsigned short p, const IpAddress &ip)
  2249. {
  2250. if (p==0)
  2251. THROWJSOCKEXCEPTION2(JSOCKERR_bad_address);
  2252. SocketEndpoint ep(p, ip);
  2253. StringBuffer tmp;
  2254. Owned<CSocket> sock = new CSocket(ep,sm_multicast_server,ip.getIpText(tmp).str());
  2255. sock->open(0,true);
  2256. return sock.getClear();
  2257. }
  2258. ISocket* ISocket::udp_connect(unsigned short p, char const* name)
  2259. {
  2260. if (!name||!*name||(p==0))
  2261. THROWJSOCKEXCEPTION2(JSOCKERR_bad_address);
  2262. SocketEndpoint ep(name, p);
  2263. Owned<CSocket> sock = new CSocket(ep,sm_udp,name);
  2264. sock->udpconnect();
  2265. return sock.getClear();
  2266. }
  2267. ISocket* ISocket::udp_connect(const SocketEndpoint &ep)
  2268. {
  2269. Owned<CSocket> sock = new CSocket(ep,sm_udp,NULL);
  2270. sock->udpconnect();
  2271. return sock.getClear();
  2272. }
  2273. ISocket* ISocket::multicast_connect(unsigned short p, char const* mcip, unsigned _ttl)
  2274. {
  2275. if (p==0)
  2276. THROWJSOCKEXCEPTION2(JSOCKERR_bad_address);
  2277. SocketEndpoint ep(mcip,p);
  2278. return multicast_connect(ep, _ttl);
  2279. }
  2280. ISocket* ISocket::multicast_connect(const SocketEndpoint &ep, unsigned _ttl)
  2281. {
  2282. Owned<CSocket> sock = new CSocket(ep,sm_multicast,NULL);
  2283. sock->udpconnect();
  2284. u_char ttl = _ttl;
  2285. setsockopt(sock->OShandle(), IPPROTO_IP, IP_MULTICAST_TTL, (char *) &ttl, sizeof(ttl));
  2286. return sock.getClear();
  2287. }
  2288. ISocket* ISocket::attach(int s, bool tcpip)
  2289. {
  2290. CSocket* sock = new CSocket((SOCKET)s, tcpip?sm_tcp:sm_udp, false);
  2291. return sock;
  2292. }
  2293. bool isInterfaceIp(const IpAddress &ip, const char *ifname)
  2294. {
  2295. #ifdef _WIN32
  2296. return false;
  2297. #else
  2298. int fd = socket(AF_INET, SOCK_DGRAM, 0); // IPV6 TBD
  2299. if (fd<0)
  2300. return false;
  2301. MemoryAttr ma;
  2302. char *buf = (char *)ma.allocate(1024);
  2303. struct ifconf ifc;
  2304. ifc.ifc_len = 1024;
  2305. ifc.ifc_buf = buf;
  2306. if(ioctl(fd, SIOCGIFCONF, &ifc) < 0) // query interfaces
  2307. return false;
  2308. struct ifreq *ifr = ifc.ifc_req;
  2309. unsigned n = ifc.ifc_len/sizeof(struct ifreq);
  2310. bool match = false;
  2311. for(unsigned i=0; i<n; i++)
  2312. {
  2313. struct ifreq *item = &ifr[i];
  2314. if (ifname&&*ifname)
  2315. if (!WildMatch(item->ifr_name,ifname))
  2316. continue;
  2317. IpAddress iptest((inet_ntoa(((struct sockaddr_in *)&item->ifr_addr)->sin_addr)));
  2318. if (ip.ipequals(iptest))
  2319. {
  2320. match = true;
  2321. break;
  2322. }
  2323. }
  2324. close(fd);
  2325. return match;
  2326. #endif
  2327. }
  2328. bool getInterfaceIp(IpAddress &ip,const char *ifname)
  2329. {
  2330. #ifdef _WIN32
  2331. return false;
  2332. #else
  2333. ip.ipset(NULL);
  2334. int fd = socket(AF_INET, SOCK_DGRAM, 0); // IPV6 TBD
  2335. if (fd<0)
  2336. return false;
  2337. MemoryAttr ma;
  2338. char *buf = (char *)ma.allocate(1024);
  2339. struct ifconf ifc;
  2340. ifc.ifc_len = 1024;
  2341. ifc.ifc_buf = buf;
  2342. if(ioctl(fd, SIOCGIFCONF, &ifc) < 0) // query interfaces
  2343. return false;
  2344. struct ifreq *ifr = ifc.ifc_req;
  2345. unsigned n = ifc.ifc_len/sizeof(struct ifreq);
  2346. for (int loopback = 0; loopback <= 1; loopback++)
  2347. {
  2348. for (int i=0; i<n; i++)
  2349. {
  2350. bool useLoopback = (loopback==1);
  2351. struct ifreq *item = &ifr[i];
  2352. if (ifname&&*ifname)
  2353. if (!WildMatch(item->ifr_name,ifname))
  2354. continue;
  2355. IpAddress iptest((inet_ntoa(((struct sockaddr_in *)&item->ifr_addr)->sin_addr)));
  2356. if (iptest.isLoopBack() == useLoopback)
  2357. {
  2358. if (ip.isNull())
  2359. ip.ipset(iptest);
  2360. else if (!PreferredSubnet.isNull()&&!PreferredSubnet.test(ip)&&PreferredSubnet.test(iptest))
  2361. ip.ipset(iptest);
  2362. }
  2363. }
  2364. if (!ip.isNull())
  2365. break;
  2366. }
  2367. close(fd);
  2368. return !ip.isNull();
  2369. #endif
  2370. }
  2371. static StringAttr cachehostname;
  2372. static IpAddress cachehostip;
  2373. static IpAddress localhostip;
  2374. static CriticalSection hostnamesect;
  2375. static StringBuffer EnvConfPath;
  2376. const char * GetCachedHostName()
  2377. {
  2378. CriticalBlock c(hostnamesect);
  2379. if (!cachehostname.get())
  2380. {
  2381. #ifndef _WIN32
  2382. IpAddress ip;
  2383. if (EnvConfPath.length() == 0)
  2384. EnvConfPath.append(CONFIG_DIR).append(PATHSEPSTR).append("environment.conf");
  2385. Owned<IProperties> conf = createProperties(EnvConfPath.str(), true);
  2386. StringBuffer ifs;
  2387. conf->getProp("interface", ifs);
  2388. if (getInterfaceIp(ip, ifs.str()))
  2389. {
  2390. StringBuffer ips;
  2391. ip.getIpText(ips);
  2392. if (ips.length())
  2393. {
  2394. cachehostname.set(ips.str());
  2395. cachehostip.ipset(ip);
  2396. return cachehostname.get();
  2397. }
  2398. }
  2399. #endif
  2400. char temp[1024];
  2401. if (gethostname(temp, sizeof(temp))==0)
  2402. cachehostname.set(temp);
  2403. else
  2404. cachehostname.set("localhost"); // assume no NIC card
  2405. }
  2406. return cachehostname.get();
  2407. }
  2408. IpAddress & queryLocalIP()
  2409. {
  2410. CriticalBlock c(hostnamesect);
  2411. if (localhostip.isNull())
  2412. if (IP6preferred)
  2413. localhostip.ipset("::1"); //IPv6
  2414. else
  2415. localhostip.ipset("127.0.0.1"); //IPv4
  2416. return localhostip;
  2417. }
  2418. IpAddress & queryHostIP()
  2419. {
  2420. CriticalBlock c(hostnamesect);
  2421. if (cachehostip.isNull()) {
  2422. if (!cachehostip.ipset(GetCachedHostName())) {
  2423. cachehostip.ipset(queryLocalIP());
  2424. printf("hostname %s not resolved, using localhost\n",GetCachedHostName()); // don't use jlog in case recursive
  2425. }
  2426. }
  2427. return cachehostip;
  2428. }
  2429. IpAddress &GetHostIp(IpAddress &ip)
  2430. {
  2431. ip.ipset(queryHostIP());
  2432. return ip;
  2433. }
  2434. IpAddress &localHostToNIC(IpAddress &ip)
  2435. {
  2436. if (ip.isLoopBack())
  2437. GetHostIp(ip);
  2438. return ip;
  2439. }
  2440. // IpAddress
  2441. inline bool isIp4(const unsigned *netaddr)
  2442. {
  2443. if (IP4only)
  2444. return true;
  2445. if (netaddr[2]==0xffff0000)
  2446. return (netaddr[1]==0)&&(netaddr[0]==0);
  2447. if (netaddr[2]==0)
  2448. if ((netaddr[3]==0)&&(netaddr[0]==0)&&(netaddr[1]==0))
  2449. return true; // null address
  2450. // maybe should get loopback here
  2451. return false;
  2452. }
  2453. bool IpAddress::isIp4() const
  2454. {
  2455. return ::isIp4(netaddr);
  2456. }
  2457. bool IpAddress::isNull() const
  2458. {
  2459. return (netaddr[3]==0)&&(IP4only||((netaddr[2]==0)&&(netaddr[1]==0)&&(netaddr[0]==0)));
  2460. }
  2461. bool IpAddress::isLoopBack() const
  2462. {
  2463. if (::isIp4(netaddr)&&((netaddr[3] & 0x000000ff)==0x000007f))
  2464. return true;
  2465. return (netaddr[3]==0x1000000)&&(netaddr[2]==0)&&(netaddr[1]==0)&&(netaddr[0]==0);
  2466. }
  2467. bool IpAddress::isLocal() const
  2468. {
  2469. if (isLoopBack() || isHost())
  2470. return true;
  2471. IpAddress ip(*this);
  2472. return isInterfaceIp(ip, NULL);
  2473. }
  2474. bool IpAddress::ipequals(const IpAddress & other) const
  2475. {
  2476. // reverse compare for speed
  2477. return (other.netaddr[3]==netaddr[3])&&(IP4only||((other.netaddr[2]==netaddr[2])&&(other.netaddr[1]==netaddr[1])&&(other.netaddr[0]==netaddr[0])));
  2478. }
  2479. int IpAddress::ipcompare(const IpAddress & other) const
  2480. {
  2481. return memcmp(&netaddr, &other.netaddr, sizeof(netaddr));
  2482. }
  2483. unsigned IpAddress::iphash(unsigned prev) const
  2484. {
  2485. return hashc((const byte *)&netaddr,sizeof(netaddr),prev);
  2486. }
  2487. bool IpAddress::isHost() const
  2488. {
  2489. return ipequals(queryHostIP());
  2490. }
  2491. static bool decodeNumericIP(const char *text,unsigned *netaddr)
  2492. {
  2493. if (!text)
  2494. return false;
  2495. bool isv6 = strchr(text,':')!=NULL;
  2496. StringBuffer tmp;
  2497. if ((*text=='[')&&!IP4only) {
  2498. text++;
  2499. size32_t l = strlen(text);
  2500. if ((l<=2)||(text[l-1]!=']'))
  2501. return false;
  2502. text = tmp.append(l-2,text);
  2503. }
  2504. if (!isv6&&isdigit(text[0])) {
  2505. if (_inet_pton(AF_INET, text, &netaddr[3])>0) {
  2506. netaddr[2] = netaddr[3]?0xffff0000:0; // check for NULL
  2507. netaddr[1] = 0;
  2508. netaddr[0] = 0; // special handling for loopback?
  2509. return true;
  2510. }
  2511. }
  2512. else if (isv6&&!IP4only) {
  2513. int ret = _inet_pton(AF_INET6, text, netaddr);
  2514. if (ret>=0)
  2515. return (ret>0);
  2516. int err = ERRNO();
  2517. StringBuffer tmp("_inet_pton: ");
  2518. tmp.append(text);
  2519. LOGERR(err,1,tmp.str());
  2520. }
  2521. return false;
  2522. }
  2523. static bool lookupHostAddress(const char *name,unsigned *netaddr)
  2524. {
  2525. // if IP4only or using MS V6 can only resolve IPv4 using
  2526. static bool recursioncheck = false; // needed to stop error message recursing
  2527. unsigned retry=10;
  2528. #if defined(__linux__) || defined(getaddrinfo)
  2529. if (IP4only) {
  2530. #else
  2531. {
  2532. #endif
  2533. CriticalBlock c(hostnamesect);
  2534. hostent * entry = gethostbyname(name);
  2535. while (entry==NULL) {
  2536. if (retry--==0) {
  2537. if (!recursioncheck) {
  2538. recursioncheck = true;
  2539. LogErr(h_errno,1,"gethostbyname failed",__LINE__,name);
  2540. recursioncheck = false;
  2541. }
  2542. return false;
  2543. }
  2544. {
  2545. CriticalUnblock ub(hostnamesect);
  2546. Sleep((10-retry)*100);
  2547. }
  2548. entry = gethostbyname(name);
  2549. }
  2550. if (entry->h_addr_list[0]) {
  2551. unsigned ptr = 0;
  2552. if (!PreferredSubnet.isNull()) {
  2553. loop {
  2554. ptr++;
  2555. if (entry->h_addr_list[ptr]==NULL) {
  2556. ptr = 0;
  2557. break;
  2558. }
  2559. IpAddress ip;
  2560. ip.setNetAddress(sizeof(unsigned),entry->h_addr_list[ptr]);
  2561. if (PreferredSubnet.test(ip))
  2562. break;
  2563. }
  2564. }
  2565. memcpy(&netaddr[3], entry->h_addr_list[ptr], sizeof(netaddr[3]));
  2566. netaddr[2] = 0xffff0000;
  2567. netaddr[1] = 0;
  2568. netaddr[0] = 0;
  2569. return true;
  2570. }
  2571. return false;
  2572. }
  2573. #if defined(__linux__) || defined(getaddrinfo)
  2574. struct addrinfo hints;
  2575. memset(&hints,0,sizeof(hints));
  2576. struct addrinfo *addrInfo = NULL;
  2577. loop {
  2578. memset(&hints,0,sizeof(hints));
  2579. int ret = getaddrinfo(name, NULL , &hints, &addrInfo);
  2580. if (!ret)
  2581. break;
  2582. if (retry--==0) {
  2583. if (!recursioncheck) {
  2584. recursioncheck = true;
  2585. LogErr(ret,1,"getaddrinfo failed",__LINE__,name);
  2586. #ifdef _DEBUG
  2587. PrintStackReport();
  2588. #endif
  2589. recursioncheck = false;
  2590. }
  2591. return false;
  2592. }
  2593. Sleep((10-retry)*100);
  2594. }
  2595. struct addrinfo *best = NULL;
  2596. bool snm = !PreferredSubnet.isNull();
  2597. loop {
  2598. struct addrinfo *ai;
  2599. for (ai = addrInfo; ai; ai = ai->ai_next) {
  2600. // printf("flags=%d, family=%d, socktype=%d, protocol=%d, addrlen=%d, canonname=%s\n",ai->ai_flags,ai->ai_family,ai->ai_socktype,ai->ai_protocol,ai->ai_addrlen,ai->ai_canonname?ai->ai_canonname:"NULL");
  2601. switch (ai->ai_family) {
  2602. case AF_INET: {
  2603. if (snm) {
  2604. IpAddress ip;
  2605. ip.setNetAddress(sizeof(in_addr),&(((sockaddr_in *)ai->ai_addr)->sin_addr));
  2606. if (!PreferredSubnet.test(ip))
  2607. continue;
  2608. }
  2609. if ((best==NULL)||((best->ai_family==AF_INET6)&&!IP6preferred))
  2610. best = ai;
  2611. break;
  2612. }
  2613. case AF_INET6: {
  2614. if (snm) {
  2615. IpAddress ip;
  2616. ip.setNetAddress(sizeof(in_addr6),&(((sockaddr_in6 *)ai->ai_addr)->sin6_addr));
  2617. if (!PreferredSubnet.test(ip))
  2618. continue;
  2619. }
  2620. if ((best==NULL)||((best->ai_family==AF_INET)&&IP6preferred))
  2621. best = ai;
  2622. break;
  2623. }
  2624. }
  2625. }
  2626. if (best||!snm)
  2627. break;
  2628. snm = false;
  2629. }
  2630. if (best) {
  2631. if (best->ai_family==AF_INET6)
  2632. memcpy(netaddr,&(((sockaddr_in6 *)best->ai_addr)->sin6_addr),sizeof(in6_addr));
  2633. else {
  2634. memcpy(netaddr+3,&(((sockaddr_in *)best->ai_addr)->sin_addr),sizeof(in_addr));
  2635. netaddr[2] = 0xffff0000;
  2636. netaddr[1] = 0;
  2637. netaddr[0] = 0;
  2638. }
  2639. }
  2640. freeaddrinfo(addrInfo);
  2641. return best!=NULL;
  2642. #endif
  2643. return false;
  2644. }
  2645. bool IpAddress::ipset(const char *text)
  2646. {
  2647. if (text&&*text) {
  2648. if ((text[0]=='.')&&(text[1]==0)) {
  2649. ipset(queryHostIP());
  2650. return true;
  2651. }
  2652. if (decodeNumericIP(text,netaddr))
  2653. return true;
  2654. const char *s;
  2655. for (s=text;*s;s++)
  2656. if (!isdigit(*s)&&(*s!=':')&&(*s!='.'))
  2657. break;
  2658. if (!*s)
  2659. return ipset(NULL);
  2660. if (lookupHostAddress(text,netaddr))
  2661. return true;
  2662. }
  2663. memset(&netaddr,0,sizeof(netaddr));
  2664. return false;
  2665. }
  2666. inline char * addbyte(char *s,byte b)
  2667. {
  2668. if (b>=100) {
  2669. *(s++) = b/100+'0';
  2670. b %= 100;
  2671. *(s++) = b/10+'0';
  2672. b %= 10;
  2673. }
  2674. else if (b>=10) {
  2675. *(s++) = b/10+'0';
  2676. b %= 10;
  2677. }
  2678. *(s++) = b+'0';
  2679. return s;
  2680. }
  2681. StringBuffer & IpAddress::getIpText(StringBuffer & out) const
  2682. {
  2683. if (::isIp4(netaddr)) {
  2684. const byte *ip = (const byte *)&netaddr[3];
  2685. char ips[16];
  2686. char *s = ips;
  2687. for (unsigned i=0;i<4;i++) {
  2688. if (i)
  2689. *(s++) = '.';
  2690. s = addbyte(s,ip[i]);
  2691. }
  2692. return out.append(s-ips,ips);
  2693. }
  2694. char tmp[INET6_ADDRSTRLEN];
  2695. const char *res = _inet_ntop(AF_INET6, &netaddr, tmp, sizeof(tmp));
  2696. if (!res)
  2697. throw MakeOsException(errno);
  2698. return out.append(res);
  2699. }
  2700. void IpAddress::ipserialize(MemoryBuffer & out) const
  2701. {
  2702. if (((netaddr[2]==0xffff0000)||(netaddr[2]==0))&&(netaddr[1]==0)&&(netaddr[0]==0)) {
  2703. if (netaddr[3]==IPV6_SERIALIZE_PREFIX)
  2704. throw MakeStringException(-1,"Invalid network address"); // hack prevention
  2705. out.append(sizeof(netaddr[3]), &netaddr[3]);
  2706. }
  2707. else {
  2708. unsigned pfx = IPV6_SERIALIZE_PREFIX;
  2709. out.append(sizeof(pfx),&pfx).append(sizeof(netaddr),&netaddr);
  2710. }
  2711. }
  2712. void IpAddress::ipdeserialize(MemoryBuffer & in)
  2713. {
  2714. unsigned pfx;
  2715. in.read(sizeof(pfx),&pfx);
  2716. if (pfx!=IPV6_SERIALIZE_PREFIX) {
  2717. netaddr[0] = 0;
  2718. netaddr[1] = 0;
  2719. netaddr[2] = (pfx == 0 || pfx == 0x1000000) ? 0 : 0xffff0000; // catch null and loopback
  2720. netaddr[3] = pfx;
  2721. }
  2722. else
  2723. in.read(sizeof(netaddr),&netaddr);
  2724. }
  2725. unsigned IpAddress::ipdistance(const IpAddress &other,unsigned offset) const
  2726. {
  2727. if (offset>3)
  2728. offset = 3;
  2729. int i1;
  2730. _cpyrev4(&i1,&netaddr[3-offset]);
  2731. int i2;
  2732. _cpyrev4(&i2,&other.netaddr[3-offset]);
  2733. i1-=i2;
  2734. if (i1>0)
  2735. return i1;
  2736. return -i1;
  2737. }
  2738. bool IpAddress::ipincrement(unsigned count,byte minoctet,byte maxoctet,unsigned short minipv6piece,unsigned maxipv6piece)
  2739. {
  2740. unsigned base;
  2741. if (::isIp4(netaddr)) {
  2742. base = maxoctet-minoctet+1;
  2743. if (!base||(base>256))
  2744. return false;
  2745. byte * ips = (byte *)&netaddr[3];
  2746. byte * ip = ips+4;
  2747. while (count) {
  2748. if (ip==ips)
  2749. return false; // overflow
  2750. ip--;
  2751. unsigned v = (count+((*ip>minoctet)?(*ip-minoctet):0));
  2752. *ip = minoctet + v%base;
  2753. count = v/base;
  2754. }
  2755. }
  2756. else {
  2757. base = maxipv6piece-minipv6piece+1;
  2758. if (!base||(base>0x10000))
  2759. return false;
  2760. unsigned short * ps = (unsigned short *)&netaddr;
  2761. unsigned short * p = ps+8;
  2762. while (count) {
  2763. if (p==ps)
  2764. return false; // overflow (actually near impossible!)
  2765. p--;
  2766. unsigned v = (count+((*p>minipv6piece)?(*p-minipv6piece):0));
  2767. *p = minipv6piece + v%base;
  2768. count = v/base;
  2769. }
  2770. }
  2771. return true;
  2772. }
  2773. unsigned IpAddress::ipsetrange( const char *text) // e.g. 10.173.72.1-65 ('-' may be omitted)
  2774. {
  2775. unsigned e=0;
  2776. unsigned f=0;
  2777. const char *r = strchr(text,'-');
  2778. bool ok;
  2779. if (r) {
  2780. e = atoi(r+1);
  2781. StringBuffer tmp(r-text,text);
  2782. ok = ipset(tmp.str());
  2783. if (!::isIp4(netaddr))
  2784. IPV6_NOT_IMPLEMENTED(); // TBD IPv6
  2785. if (ok) {
  2786. while ((r!=text)&&(*(r-1)!='.'))
  2787. r--;
  2788. f = (r!=text)?atoi(r):0;
  2789. }
  2790. }
  2791. else
  2792. ok = ipset(text);
  2793. if ((f>e)||!ok)
  2794. return 0;
  2795. return e-f+1;
  2796. }
  2797. size32_t IpAddress::getNetAddress(size32_t maxsz,void *dst) const
  2798. {
  2799. if (maxsz==sizeof(unsigned)) {
  2800. if (::isIp4(netaddr)) {
  2801. *(unsigned *)dst = netaddr[3];
  2802. return maxsz;
  2803. }
  2804. }
  2805. else if (!IP4only&&(maxsz==sizeof(netaddr))) {
  2806. memcpy(dst,&netaddr,maxsz);
  2807. return maxsz;
  2808. }
  2809. return 0;
  2810. }
  2811. void IpAddress::setNetAddress(size32_t sz,const void *src)
  2812. {
  2813. if (sz==sizeof(unsigned)) { // IPv4
  2814. netaddr[0] = 0;
  2815. netaddr[1] = 0;
  2816. netaddr[2]=0xffff0000;
  2817. netaddr[3] = *(const unsigned *)src;
  2818. }
  2819. else if (!IP4only&&(sz==sizeof(netaddr))) { // IPv6
  2820. memcpy(&netaddr,src,sz);
  2821. if ((netaddr[2]==0)&&(netaddr[3]!=0)&&(netaddr[3]!=0x1000000)&&(netaddr[0]==0)&&(netaddr[1]==0))
  2822. netaddr[2]=0xffff0000; // use this form only
  2823. }
  2824. else
  2825. memset(&netaddr,0,sizeof(netaddr));
  2826. }
  2827. void SocketEndpoint::deserialize(MemoryBuffer & in)
  2828. {
  2829. ipdeserialize(in);
  2830. in.read(port);
  2831. }
  2832. void SocketEndpoint::serialize(MemoryBuffer & out) const
  2833. {
  2834. ipserialize(out);
  2835. out.append(port);
  2836. }
  2837. bool SocketEndpoint::set(const char *name,unsigned short _port)
  2838. {
  2839. if (name) {
  2840. if (*name=='[') {
  2841. const char *s = name+1;
  2842. const char *t = strchr(s,']');
  2843. if (t) {
  2844. StringBuffer tmp(t-s,s);
  2845. if (t[1]==':')
  2846. _port = atoi(t+2);
  2847. return set(tmp.str(),_port);
  2848. }
  2849. }
  2850. const char * colon = strchr(name, ':');
  2851. if (colon) {
  2852. if (!IP4only&&strchr(colon+1, ':'))
  2853. colon = NULL; // hello its IpV6
  2854. }
  2855. else
  2856. colon = strchr(name, '|'); // strange hole convention
  2857. char ips[260];
  2858. if (colon) {
  2859. size32_t l = colon-name;
  2860. if (l>=sizeof(ips))
  2861. l = sizeof(ips)-1;
  2862. memcpy(ips,name,l);
  2863. ips[l] = 0;
  2864. name = ips;
  2865. _port = atoi(colon+1);
  2866. }
  2867. if (ipset(name)) {
  2868. port = _port;
  2869. return true;
  2870. }
  2871. }
  2872. ipset(NULL);
  2873. port = 0;
  2874. return false;
  2875. }
  2876. void SocketEndpoint::getUrlStr(char * str, size32_t len) const
  2877. {
  2878. if (len==0)
  2879. return;
  2880. StringBuffer _str;
  2881. getUrlStr(_str);
  2882. size32_t l = _str.length()+1;
  2883. if (l>len)
  2884. {
  2885. l = len-1;
  2886. str[l] = 0;
  2887. }
  2888. memcpy(str,_str.toCharArray(),l);
  2889. }
  2890. StringBuffer &SocketEndpoint::getUrlStr(StringBuffer &str) const
  2891. {
  2892. getIpText(str);
  2893. if (port)
  2894. str.append(':').append((unsigned)port); // TBD IPv6 put [] on
  2895. return str;
  2896. }
  2897. unsigned SocketEndpoint::hash(unsigned prev) const
  2898. {
  2899. return hashc((const byte *)&port,sizeof(port),iphash(prev));
  2900. }
  2901. //---------------------------------------------------------------------------
  2902. SocketListCreator::SocketListCreator()
  2903. {
  2904. lastPort = 0;
  2905. }
  2906. void SocketListCreator::addSocket(const SocketEndpoint &ep)
  2907. {
  2908. StringBuffer ipstr;
  2909. ep.getIpText(ipstr);
  2910. addSocket(ipstr.str(), ep.port);
  2911. }
  2912. void SocketListCreator::addSocket(const char * ip, unsigned port)
  2913. {
  2914. if (fullText.length())
  2915. fullText.append("|");
  2916. const char * prev = lastIp;
  2917. const char * startCopy = ip;
  2918. if (prev)
  2919. {
  2920. if (strcmp(ip, prev) == 0)
  2921. {
  2922. fullText.append("=");
  2923. startCopy = NULL;
  2924. }
  2925. else
  2926. {
  2927. const char * cur = ip;
  2928. loop
  2929. {
  2930. char n = *cur;
  2931. if (!n)
  2932. break;
  2933. if (n != *prev)
  2934. break;
  2935. cur++;
  2936. prev++;
  2937. if (n == '.')
  2938. startCopy = cur;
  2939. }
  2940. if (startCopy != ip)
  2941. fullText.append("*");
  2942. }
  2943. }
  2944. fullText.append(startCopy);
  2945. if (lastPort != port)
  2946. fullText.append(":").append(port);
  2947. lastIp.set(ip);
  2948. lastPort = port;
  2949. }
  2950. const char * SocketListCreator::getText()
  2951. {
  2952. return fullText.str();
  2953. }
  2954. void SocketListCreator::addSockets(SocketEndpointArray &array)
  2955. {
  2956. ForEachItemIn(i,array) {
  2957. SocketEndpoint &sockep=array.item(i);
  2958. StringBuffer ipstr;
  2959. sockep.getIpText(ipstr);
  2960. addSocket(ipstr.str(),sockep.port);
  2961. }
  2962. }
  2963. //---------------------------------------------------------------------------
  2964. SocketListParser::SocketListParser(const char * text)
  2965. {
  2966. fullText.set(text);
  2967. cursor = NULL;
  2968. lastPort = 0;
  2969. }
  2970. void SocketListParser::first(unsigned port)
  2971. {
  2972. cursor = fullText;
  2973. lastIp.set(NULL);
  2974. lastPort = port;
  2975. }
  2976. bool SocketListParser::get(StringAttr & ip, unsigned & port, unsigned index, unsigned defport)
  2977. {
  2978. first(defport);
  2979. do
  2980. {
  2981. if (!next(ip, port))
  2982. return false;
  2983. } while (index--);
  2984. return true;
  2985. }
  2986. bool SocketListParser::next(StringAttr & ip, unsigned & port)
  2987. {
  2988. // IPV6TBD
  2989. StringBuffer ipText;
  2990. if (*cursor == 0)
  2991. return false;
  2992. if (*cursor == '=')
  2993. {
  2994. ipText.append(lastIp);
  2995. cursor++;
  2996. }
  2997. else if (*cursor == '*')
  2998. {
  2999. cursor++;
  3000. //count the number of dots in the tail
  3001. const char * cur = cursor;
  3002. unsigned count = 0;
  3003. loop
  3004. {
  3005. char c = *cur++;
  3006. switch (c)
  3007. {
  3008. case 0:
  3009. case '|':
  3010. case ',':
  3011. case ':':
  3012. goto done;
  3013. case '.':
  3014. ++count;
  3015. break;
  3016. }
  3017. }
  3018. done:
  3019. //copy up to the appropriate dot from the previous ip.
  3020. const unsigned dotCount = 3; //more what about 6 digit ip's
  3021. cur = lastIp;
  3022. loop
  3023. {
  3024. char c = *cur++;
  3025. switch (c)
  3026. {
  3027. case 0:
  3028. case '|':
  3029. case ',':
  3030. case ':':
  3031. assertex(!"Should not get here!");
  3032. goto done2;
  3033. case '.':
  3034. ipText.append(c);
  3035. if (++count == dotCount)
  3036. goto done2;
  3037. break;
  3038. default:
  3039. ipText.append(c);
  3040. break;
  3041. }
  3042. }
  3043. done2:;
  3044. }
  3045. bool inPort = false;
  3046. port = lastPort;
  3047. loop
  3048. {
  3049. char c = *cursor++;
  3050. switch (c)
  3051. {
  3052. case 0:
  3053. cursor--;
  3054. goto doneCopy;
  3055. case '|':
  3056. case ',':
  3057. goto doneCopy;
  3058. case ':':
  3059. port = atoi(cursor);
  3060. inPort = true;
  3061. break;;
  3062. default:
  3063. if (!inPort)
  3064. ipText.append(c);
  3065. break;
  3066. }
  3067. }
  3068. doneCopy:
  3069. lastIp.set(ipText.str());
  3070. ip.set(lastIp);
  3071. lastPort = port;
  3072. return true;
  3073. }
  3074. unsigned SocketListParser::getSockets(SocketEndpointArray &array,unsigned defport)
  3075. {
  3076. first(defport);
  3077. StringAttr ip;
  3078. unsigned port;
  3079. while (next(ip,port)) {
  3080. SocketEndpoint ep(ip,port);
  3081. array.append(ep);
  3082. }
  3083. return array.ordinality();
  3084. }
  3085. void getSocketStatistics(JSocketStatistics &stats)
  3086. {
  3087. // should put in simple lock
  3088. memcpy(&stats,&STATS,sizeof(stats));
  3089. }
  3090. void resetSocketStatistics()
  3091. {
  3092. unsigned activesockets=STATS.activesockets;
  3093. memset(&STATS,0,sizeof(STATS));
  3094. STATS.activesockets = activesockets;
  3095. }
  3096. static StringBuffer &appendtime(StringBuffer &s,unsigned us)
  3097. {
  3098. // attemp to get into more sensible units
  3099. if (us>10000000)
  3100. return s.append(us/1000000).append('s');
  3101. if (us>10000)
  3102. return s.append(us/1000).append("ms");
  3103. return s.append(us).append("us");
  3104. }
  3105. StringBuffer &getSocketStatisticsString(JSocketStatistics &stats,StringBuffer &str)
  3106. {
  3107. str.append("connects=").append(stats.connects).append('\n');
  3108. appendtime(str.append("connecttime="),stats.connecttime).append('\n');
  3109. str.append("failedconnects=").append(stats.failedconnects).append('\n');
  3110. appendtime(str.append("failedconnecttime="),stats.failedconnecttime).append('\n');
  3111. str.append("reads=").append(stats.reads).append('\n');
  3112. appendtime(str.append("readtime="),stats.readtime).append('\n');
  3113. str.append("readsize=").append(stats.readsize).append(" bytes\n");
  3114. str.append("writes=").append(stats.writes).append('\n');
  3115. appendtime(str.append("writetime="),stats.writetime).append('\n');
  3116. str.append("writesize=").append(stats.writesize).append(" bytes").append('\n');
  3117. str.append("activesockets=").append(stats.activesockets).append('\n');
  3118. str.append("numblockrecvs=").append(stats.numblockrecvs).append('\n');
  3119. str.append("numblocksends=").append(stats.numblocksends).append('\n');
  3120. str.append("blockrecvsize=").append(stats.blockrecvsize).append('\n');
  3121. str.append("blocksendsize=").append(stats.blocksendsize).append('\n');
  3122. str.append("blockrecvtime=").append(stats.blockrecvtime).append('\n');
  3123. str.append("blocksendtime=").append(stats.blocksendtime).append('\n');
  3124. str.append("longestblocksend=").append(stats.longestblocksend).append('\n');
  3125. str.append("longestblocksize=").append(stats.longestblocksize);
  3126. return str;
  3127. }
  3128. // ===============================================================================
  3129. // select thread for handling multiple selects
  3130. struct SelectItem
  3131. {
  3132. ISocket *sock;
  3133. T_SOCKET handle;
  3134. ISocketSelectNotify *nfy;
  3135. byte mode;
  3136. bool del;
  3137. bool add_epoll;
  3138. };
  3139. inline SelectItem &Array__Member2Param(SelectItem &src) { return src; }
  3140. inline void Array__Assign(SelectItem & dest, SelectItem &src) { dest=src; }
  3141. inline bool Array__Equal(SelectItem &m, SelectItem &p) { return m.sock==p.sock; }
  3142. inline void Array__Destroy(SelectItem &p) { }
  3143. class SelectItemArray : public ArrayOf<SelectItem, SelectItem &> { };
  3144. #define SELECT_TIMEOUT_SECS 1 // but it does (TBD)
  3145. #ifdef _WIN32
  3146. // fd_set utility functions
  3147. inline T_FD_SET *cpyfds(T_FD_SET &dst,const T_FD_SET &src)
  3148. {
  3149. unsigned i = src.fd_count;
  3150. dst.fd_count = i;
  3151. while (i--)
  3152. dst.fd_array[i] = src.fd_array[i]; // possibly better as memcpy
  3153. return &dst;
  3154. }
  3155. inline bool findfds(T_FD_SET &s,T_SOCKET h,bool &c)
  3156. {
  3157. unsigned n = s.fd_count;
  3158. unsigned i;
  3159. for(i=0;i<n;i++) {
  3160. if (s.fd_array[i] == h) {
  3161. if (--n)
  3162. s.fd_array[i] = s.fd_array[n]; // remove item
  3163. else
  3164. c = false;
  3165. s.fd_count = n;
  3166. return true;
  3167. }
  3168. }
  3169. return false;
  3170. }
  3171. inline T_SOCKET popfds(T_FD_SET &s)
  3172. {
  3173. unsigned n = s.fd_count;
  3174. T_SOCKET ret;
  3175. if (n) {
  3176. ret = s.fd_array[--n];
  3177. s.fd_count = n;
  3178. }
  3179. else
  3180. ret = NULL;
  3181. return ret;
  3182. }
  3183. #else
  3184. #define _USE_PIPE_FOR_SELECT_TRIGGER
  3185. // not as optimized as windows but I am expecting to convert to using poll anyway
  3186. inline T_FD_SET *cpyfds(T_FD_SET &dst,const T_FD_SET &src)
  3187. {
  3188. memcpy(&dst,&src,sizeof(T_FD_SET));
  3189. return &dst;
  3190. }
  3191. inline bool findfds(T_FD_SET &s,T_SOCKET h,bool &c)
  3192. {
  3193. if ((unsigned)h>=XFD_SETSIZE)
  3194. return false;
  3195. return FD_ISSET(h,&s); // does not remove entry or set termination flag when done
  3196. }
  3197. #endif
  3198. class CSocketMultiThread: public Thread
  3199. {
  3200. protected:
  3201. bool terminating;
  3202. CriticalSection sect;
  3203. Semaphore ticksem;
  3204. atomic_t tickwait;
  3205. SelectItemArray items;
  3206. unsigned offset;
  3207. bool selectvarschange;
  3208. unsigned waitingchange;
  3209. Semaphore waitingchangesem;
  3210. int validateselecterror;
  3211. unsigned validateerrcount;
  3212. const char *selecttrace;
  3213. unsigned basesize;
  3214. #ifdef _USE_PIPE_FOR_SELECT_TRIGGER
  3215. T_SOCKET dummysock[2];
  3216. #else
  3217. T_SOCKET dummysock;
  3218. #endif
  3219. bool dummysockopen;
  3220. CSocketMultiThread(const char *trc) : Thread("CSocketMultiThread")
  3221. {
  3222. }
  3223. ~CSocketMultiThread()
  3224. {
  3225. }
  3226. void triggerselect()
  3227. {
  3228. if (atomic_read(&tickwait))
  3229. ticksem.signal();
  3230. #ifdef _USE_PIPE_FOR_SELECT_TRIGGER
  3231. CriticalBlock block(sect);
  3232. char c = 0;
  3233. if(write(dummysock[1], &c, 1) != 1) {
  3234. int err = ERRNO();
  3235. LOGERR(err,1,"Socket closed during trigger select");
  3236. }
  3237. #else
  3238. closedummy();
  3239. #endif
  3240. }
  3241. void resettrigger()
  3242. {
  3243. #ifdef _USE_PIPE_FOR_SELECT_TRIGGER
  3244. CriticalBlock block(sect);
  3245. char c;
  3246. while((::read(dummysock[0], &c, sizeof(c))) == sizeof(c));
  3247. #endif
  3248. }
  3249. bool remove(ISocket *sock)
  3250. {
  3251. if (terminating)
  3252. return false;
  3253. CriticalBlock block(sect);
  3254. if (sock==NULL) { // wait until no changes outstanding
  3255. while (selectvarschange) {
  3256. waitingchange++;
  3257. CriticalUnblock unblock(sect);
  3258. waitingchangesem.wait();
  3259. }
  3260. return true;
  3261. }
  3262. ForEachItemIn(i,items) {
  3263. SelectItem &si = items.item(i);
  3264. if (!si.del&&(si.sock==sock)) {
  3265. si.del = true;
  3266. selectvarschange = true;
  3267. triggerselect();
  3268. return true;
  3269. }
  3270. }
  3271. return false;
  3272. }
  3273. void stop(bool wait)
  3274. {
  3275. terminating = true;
  3276. triggerselect();
  3277. if (wait)
  3278. join();
  3279. }
  3280. bool sockOk(T_SOCKET sock)
  3281. {
  3282. PROGLOG("CSocketMultiThread: sockOk testing %d",sock);
  3283. int err = 0;
  3284. int t=0;
  3285. socklen_t tl = sizeof(t);
  3286. if (getsockopt(sock, SOL_SOCKET, SO_TYPE, (char *)&t, &tl)!=0) {
  3287. StringBuffer sockstr;
  3288. const char *tracename = sockstr.append((unsigned)sock).str();
  3289. LOGERR2(ERRNO(),1,"CSocketMultiThread select handle");
  3290. return false;
  3291. }
  3292. T_FD_SET fds;
  3293. struct timeval tv;
  3294. XFD_ZERO(&fds);
  3295. FD_SET((unsigned)sock, &fds);
  3296. //FD_SET((unsigned)sock, &except);
  3297. tv.tv_sec = 0;
  3298. tv.tv_usec = 0;
  3299. CHECKSOCKRANGE(sock);
  3300. int rc = ::select( sock + 1, NULL, (fd_set *)&fds, NULL, &tv );
  3301. if (rc<0) {
  3302. StringBuffer sockstr;
  3303. const char *tracename = sockstr.append((unsigned)sock).str();
  3304. LOGERR2(ERRNO(),2,"CSocketMultiThread select handle");
  3305. return false;
  3306. }
  3307. else if (rc>0)
  3308. PROGLOG("CSocketMultiThread: select handle %d selected(2) %d",sock,rc);
  3309. XFD_ZERO(&fds);
  3310. FD_SET((unsigned)sock, &fds);
  3311. tv.tv_sec = 0;
  3312. tv.tv_usec = 0;
  3313. rc = ::select( sock + 1, (fd_set *)&fds, NULL, NULL, &tv );
  3314. if (rc<0) {
  3315. StringBuffer sockstr;
  3316. const char *tracename = sockstr.append((unsigned)sock).str();
  3317. LOGERR2(ERRNO(),3,"CSocketMultiThread select handle");
  3318. return false;
  3319. }
  3320. else if (rc>0)
  3321. PROGLOG("CSocketMultiThread: select handle %d selected(2) %d",sock,rc);
  3322. return true;
  3323. }
  3324. bool checkSocks()
  3325. {
  3326. bool ret = false;
  3327. ForEachItemIn(i,items) {
  3328. SelectItem &si = items.item(i);
  3329. if (si.del)
  3330. ret = true; // maybe that bad one
  3331. else if (!sockOk(si.handle)) {
  3332. si.del = true;
  3333. ret = true;
  3334. }
  3335. }
  3336. return ret;
  3337. }
  3338. };
  3339. class CSocketSelectThread: public CSocketMultiThread
  3340. {
  3341. void opendummy()
  3342. {
  3343. CriticalBlock block(sect);
  3344. if (!dummysockopen) {
  3345. #ifdef _USE_PIPE_FOR_SELECT_TRIGGER
  3346. if(pipe(dummysock)) {
  3347. WARNLOG("CSocketSelectThread: create pipe failed %d",ERRNO());
  3348. return;
  3349. }
  3350. for (unsigned i=0;i<2;i++) {
  3351. int flags = fcntl(dummysock[i], F_GETFL, 0);
  3352. if (flags!=-1) {
  3353. flags |= O_NONBLOCK;
  3354. fcntl(dummysock[i], F_SETFL, flags);
  3355. }
  3356. flags = fcntl(dummysock[i], F_GETFD, 0);
  3357. if (flags!=-1) {
  3358. flags |= FD_CLOEXEC;
  3359. fcntl(dummysock[i], F_SETFD, flags);
  3360. }
  3361. }
  3362. CHECKSOCKRANGE(dummysock[0]);
  3363. #else
  3364. if (IP6preferred)
  3365. dummysock = ::socket(AF_INET6, SOCK_STREAM, PF_INET6);
  3366. else
  3367. dummysock = ::socket(AF_INET, SOCK_STREAM, 0);
  3368. CHECKSOCKRANGE(dummysock);
  3369. #endif
  3370. dummysockopen = true;
  3371. }
  3372. }
  3373. void closedummy()
  3374. {
  3375. CriticalBlock block(sect);
  3376. if (dummysockopen) {
  3377. #ifdef _USE_PIPE_FOR_SELECT_TRIGGER
  3378. #ifdef SOCKTRACE
  3379. PROGLOG("SOCKTRACE: Closing dummy sockets %x %d %x %d (%x)", dummysock[0], dummysock[0], dummysock[1], dummysock[1], this);
  3380. #endif
  3381. ::close(dummysock[0]);
  3382. ::close(dummysock[1]);
  3383. #else
  3384. #ifdef _WIN32
  3385. ::closesocket(dummysock);
  3386. #else
  3387. ::close(dummysock);
  3388. #endif
  3389. #endif
  3390. dummysockopen = false;
  3391. }
  3392. }
  3393. void triggerselect()
  3394. {
  3395. CSocketMultiThread::triggerselect();
  3396. }
  3397. void resettrigger()
  3398. {
  3399. CSocketMultiThread::resettrigger();
  3400. }
  3401. #ifdef _WIN32
  3402. #define HASHTABSIZE 256
  3403. #define HASHNULL (HASHTABSIZE-1)
  3404. #define HASHTABMASK (HASHTABSIZE-1)
  3405. byte hashtab[HASHTABSIZE];
  3406. #define HASHSOCKET(s) ((((unsigned)s)>>2)&HASHTABMASK) // with some knowledge of windows handles
  3407. void inithash()
  3408. {
  3409. memset(&hashtab,HASHNULL,sizeof(hashtab));
  3410. assertex(FD_SETSIZE<255);
  3411. }
  3412. void reinithash()
  3413. { // done this way because index of items changes and hash table not that big
  3414. inithash();
  3415. assertex(items.ordinality()<HASHTABSIZE-1);
  3416. ForEachItemIn(i,items) {
  3417. unsigned h = HASHSOCKET(items.item(i).handle);
  3418. loop {
  3419. if (hashtab[h]==HASHNULL) {
  3420. hashtab[h] = (byte)i;
  3421. break;
  3422. }
  3423. if (++h==HASHTABSIZE)
  3424. h = 0;
  3425. }
  3426. }
  3427. }
  3428. inline SelectItem &findhash(T_SOCKET handle)
  3429. {
  3430. unsigned h = HASHSOCKET(handle);
  3431. unsigned sh = h;
  3432. loop {
  3433. SelectItem &i=items.item(hashtab[h]);
  3434. if (i.handle==handle)
  3435. return i;
  3436. if (++h==HASHTABSIZE)
  3437. h = 0;
  3438. assertex(h!=sh);
  3439. }
  3440. }
  3441. inline void processfds(T_FD_SET &s,byte mode,SelectItemArray &tonotify)
  3442. {
  3443. loop {
  3444. T_SOCKET sock = popfds(s);
  3445. if (!sock)
  3446. break;
  3447. if (sock!=dummysock) {
  3448. SelectItem si = findhash(sock); // nb copies
  3449. if (!si.del) {
  3450. si.mode = mode;
  3451. tonotify.append(si);
  3452. }
  3453. }
  3454. }
  3455. }
  3456. #endif
  3457. public:
  3458. IMPLEMENT_IINTERFACE;
  3459. CSocketSelectThread(const char *trc)
  3460. : CSocketMultiThread("CSocketSelectThread")
  3461. {
  3462. dummysockopen = false;
  3463. opendummy();
  3464. terminating = false;
  3465. atomic_set(&tickwait,0);
  3466. waitingchange = 0;
  3467. selectvarschange = false;
  3468. validateselecterror = 0;
  3469. validateerrcount = 0;
  3470. offset = 0;
  3471. selecttrace = trc;
  3472. basesize = 0;
  3473. #ifdef _WIN32
  3474. inithash();
  3475. #endif
  3476. }
  3477. ~CSocketSelectThread()
  3478. {
  3479. closedummy();
  3480. ForEachItemIn(i,items) {
  3481. try {
  3482. SelectItem &si = items.item(i);
  3483. si.sock->Release();
  3484. si.nfy->Release();
  3485. }
  3486. catch (IException *e) {
  3487. EXCLOG(e,"~CSocketSelectThread");
  3488. e->Release();
  3489. }
  3490. }
  3491. }
  3492. Owned<IException> termexcept;
  3493. void updateItems()
  3494. {
  3495. // must be in CriticalBlock block(sect);
  3496. unsigned n = items.ordinality();
  3497. bool hashupdateneeded = (n!=basesize); // additions all come at end
  3498. for (unsigned i=0;i<n;) {
  3499. SelectItem &si = items.item(i);
  3500. if (si.del) {
  3501. si.nfy->Release();
  3502. try {
  3503. #ifdef SOCKTRACE
  3504. PROGLOG("CSocketSelectThread::updateItems release %d",si.handle);
  3505. #endif
  3506. si.sock->Release();
  3507. }
  3508. catch (IException *e) {
  3509. EXCLOG(e,"CSocketSelectThread::updateItems");
  3510. e->Release();
  3511. }
  3512. n--;
  3513. if (i<n)
  3514. si = items.item(n);
  3515. items.remove(n);
  3516. hashupdateneeded = true;
  3517. }
  3518. else
  3519. i++;
  3520. }
  3521. assertex(n<=XFD_SETSIZE-1);
  3522. #ifdef _WIN32
  3523. if (hashupdateneeded)
  3524. reinithash();
  3525. #endif
  3526. basesize = n;
  3527. }
  3528. bool add(ISocket *sock,unsigned mode,ISocketSelectNotify *nfy)
  3529. {
  3530. // maybe check once to prevent 1st delay? TBD
  3531. CriticalBlock block(sect);
  3532. unsigned n=0;
  3533. ForEachItemIn(i,items) {
  3534. SelectItem &si = items.item(i);
  3535. if (!si.del) {
  3536. if (si.sock==sock) {
  3537. si.del = true;
  3538. }
  3539. else
  3540. n++;
  3541. }
  3542. }
  3543. if (n>=XFD_SETSIZE-1) // leave 1 spare
  3544. return false;
  3545. SelectItem sn;
  3546. sn.nfy = LINK(nfy);
  3547. sn.sock = LINK(sock);
  3548. sn.mode = (byte)mode;
  3549. sn.handle = (T_SOCKET)sock->OShandle();
  3550. CHECKSOCKRANGE(sn.handle);
  3551. sn.del = false;
  3552. sn.add_epoll = false;
  3553. items.append(sn);
  3554. selectvarschange = true;
  3555. triggerselect();
  3556. return true;
  3557. }
  3558. bool remove(ISocket *sock)
  3559. {
  3560. return CSocketMultiThread::remove(sock);
  3561. }
  3562. void stop(bool wait)
  3563. {
  3564. CSocketMultiThread::stop(wait);
  3565. }
  3566. bool sockOk(T_SOCKET sock)
  3567. {
  3568. return CSocketMultiThread::sockOk(sock);
  3569. }
  3570. bool checkSocks()
  3571. {
  3572. return CSocketMultiThread::checkSocks();
  3573. }
  3574. void updateSelectVars(T_FD_SET &rdfds,T_FD_SET &wrfds,T_FD_SET &exfds,bool &isrd,bool &iswr,bool &isex,unsigned &ni,T_SOCKET &max_sockid)
  3575. {
  3576. CriticalBlock block(sect);
  3577. selectvarschange = false;
  3578. if (waitingchange) {
  3579. waitingchangesem.signal(waitingchange);
  3580. waitingchange = 0;
  3581. }
  3582. if (validateselecterror) { // something went wrong so check sockets
  3583. validateerrcount++;
  3584. if (!checkSocks()) {
  3585. // bad socket not found
  3586. PROGLOG("CSocketSelectThread::updateSelectVars cannot find socket error");
  3587. if (validateerrcount>10)
  3588. throw MakeStringException(-1,"CSocketSelectThread:Socket select error %d",validateselecterror);
  3589. }
  3590. }
  3591. else
  3592. validateerrcount = 0;
  3593. updateItems();
  3594. XFD_ZERO( &rdfds );
  3595. XFD_ZERO( &wrfds );
  3596. XFD_ZERO( &exfds );
  3597. isrd=false;
  3598. iswr=false;
  3599. isex=false;
  3600. #ifdef _USE_PIPE_FOR_SELECT_TRIGGER
  3601. max_sockid=dummysockopen?dummysock[0]:0;
  3602. #else
  3603. opendummy();
  3604. max_sockid=dummysockopen?dummysock:0;
  3605. #endif
  3606. ni = items.ordinality();
  3607. #ifdef _WIN32
  3608. if (offset>=ni)
  3609. #endif
  3610. offset = 0;
  3611. unsigned j=offset;
  3612. ForEachItemIn(i,items) {
  3613. SelectItem &si = items.item(j);
  3614. j++;
  3615. if (j==ni)
  3616. j = 0;
  3617. if (si.mode & SELECTMODE_READ) {
  3618. FD_SET( si.handle, &rdfds );
  3619. isrd = true;
  3620. }
  3621. if (si.mode & SELECTMODE_WRITE) {
  3622. FD_SET( si.handle, &wrfds );
  3623. iswr = true;
  3624. }
  3625. if (si.mode & SELECTMODE_EXCEPT) {
  3626. FD_SET( si.handle, &exfds );
  3627. isex = true;
  3628. }
  3629. max_sockid=std::max(si.handle, max_sockid);
  3630. }
  3631. if (dummysockopen) {
  3632. #ifdef _USE_PIPE_FOR_SELECT_TRIGGER
  3633. FD_SET( dummysock[0], &rdfds );
  3634. isrd = true;
  3635. #else
  3636. FD_SET( dummysock, &exfds );
  3637. isex = true;
  3638. #endif
  3639. }
  3640. validateselecterror = 0;
  3641. max_sockid++;
  3642. #ifdef SOCKTRACE
  3643. PROGLOG("SOCKTRACE: selecting on %d sockets",ni);
  3644. #endif
  3645. }
  3646. int run()
  3647. {
  3648. try {
  3649. T_FD_SET rdfds;
  3650. T_FD_SET wrfds;
  3651. T_FD_SET exfds;
  3652. timeval selecttimeout;
  3653. bool isrd = false;
  3654. bool iswr = false;
  3655. bool isex = false;
  3656. T_SOCKET maxsockid = 0;
  3657. unsigned ni = 0;
  3658. selectvarschange = true;
  3659. unsigned numto = 0;
  3660. unsigned lastnumto = 0;
  3661. unsigned totnum = 0;
  3662. unsigned total = 0;
  3663. while (!terminating) {
  3664. selecttimeout.tv_sec = SELECT_TIMEOUT_SECS; // linux modifies so initialize inside loop
  3665. selecttimeout.tv_usec = 0;
  3666. if (selectvarschange) {
  3667. updateSelectVars(rdfds,wrfds,exfds,isrd,iswr,isex,ni,maxsockid);
  3668. }
  3669. if (ni==0) {
  3670. validateerrcount = 0;
  3671. atomic_inc(&tickwait);
  3672. if(!selectvarschange&&!terminating)
  3673. ticksem.wait(SELECT_TIMEOUT_SECS*1000);
  3674. atomic_dec(&tickwait);
  3675. continue;
  3676. }
  3677. T_FD_SET rs;
  3678. T_FD_SET ws;
  3679. T_FD_SET es;
  3680. T_FD_SET *rsp = isrd?cpyfds(rs,rdfds):NULL;
  3681. T_FD_SET *wsp = iswr?cpyfds(ws,wrfds):NULL;
  3682. T_FD_SET *esp = isex?cpyfds(es,exfds):NULL;
  3683. int n = ::select(maxsockid,(fd_set *)rsp,(fd_set *)wsp,(fd_set *)esp,&selecttimeout); // first parameter needed for posix
  3684. if (terminating)
  3685. break;
  3686. if (n < 0) {
  3687. CriticalBlock block(sect);
  3688. int err = ERRNO();
  3689. if (err != EINTRCALL) {
  3690. if (dummysockopen) {
  3691. LOGERR(err,12,"CSocketSelectThread select error"); // should cache error ?
  3692. validateselecterror = err;
  3693. #ifndef _USE_PIPE_FOR_SELECT_TRIGGER
  3694. closedummy(); // just in case was culprit
  3695. #endif
  3696. }
  3697. selectvarschange = true;
  3698. continue;
  3699. }
  3700. n = 0;
  3701. }
  3702. else if (n>0) {
  3703. validateerrcount = 0;
  3704. numto = 0;
  3705. lastnumto = 0;
  3706. total += n;
  3707. totnum++;
  3708. SelectItemArray tonotify;
  3709. {
  3710. CriticalBlock block(sect);
  3711. #ifdef _WIN32
  3712. if (isrd)
  3713. processfds(rs,SELECTMODE_READ,tonotify);
  3714. if (iswr)
  3715. processfds(ws,SELECTMODE_WRITE,tonotify);
  3716. if (isex)
  3717. processfds(es,SELECTMODE_EXCEPT,tonotify);
  3718. #else
  3719. unsigned i;
  3720. SelectItem *si = items.getArray(offset);
  3721. SelectItem *sie = items.getArray(ni-1)+1;
  3722. bool r = isrd;
  3723. bool w = iswr;
  3724. bool e = isex;
  3725. #ifdef _USE_PIPE_FOR_SELECT_TRIGGER
  3726. if (r&&dummysockopen&&findfds(rs,dummysock[0],r)) {
  3727. resettrigger();
  3728. --n;
  3729. }
  3730. #endif
  3731. for (i=0;(n>0)&&(i<ni);i++) {
  3732. if (r&&findfds(rs,si->handle,r)) {
  3733. if (!si->del) {
  3734. tonotify.append(*si);
  3735. tonotify.item(tonotify.length()-1).mode = SELECTMODE_READ;
  3736. }
  3737. --n;
  3738. }
  3739. if (w&&findfds(ws,si->handle,w)) {
  3740. if (!si->del) {
  3741. tonotify.append(*si);
  3742. tonotify.item(tonotify.length()-1).mode = SELECTMODE_WRITE;
  3743. }
  3744. --n;
  3745. }
  3746. if (e&&findfds(es,si->handle,e)) {
  3747. if (!si->del) {
  3748. tonotify.append(*si);
  3749. tonotify.item(tonotify.length()-1).mode = SELECTMODE_EXCEPT;
  3750. }
  3751. --n;
  3752. }
  3753. si++;
  3754. if (si==sie)
  3755. si = items.getArray();
  3756. }
  3757. #endif
  3758. }
  3759. ForEachItemIn(j,tonotify) {
  3760. SelectItem &si = tonotify.item(j);
  3761. try {
  3762. si.nfy->notifySelected(si.sock,si.mode); // ignore return
  3763. }
  3764. catch (IException *e) { // should be acted upon by notifySelected
  3765. EXCLOG(e,"CSocketSelectThread notifySelected");
  3766. throw ;
  3767. }
  3768. }
  3769. }
  3770. else {
  3771. validateerrcount = 0;
  3772. if ((++numto>=lastnumto*2)) {
  3773. lastnumto = numto;
  3774. if (selecttrace&&(numto>4))
  3775. PROGLOG("%s: Select Idle(%d), %d,%d,%0.2f",selecttrace,numto,totnum,total,totnum?((double)total/(double)totnum):0.0);
  3776. }
  3777. /*
  3778. if (numto&&(numto%100)) {
  3779. CriticalBlock block(sect);
  3780. if (!selectvarschange)
  3781. selectvarschange = checkSocks();
  3782. }
  3783. */
  3784. }
  3785. if (++offset>=ni)
  3786. offset = 0;
  3787. }
  3788. }
  3789. catch (IException *e) {
  3790. EXCLOG(e,"CSocketSelectThread");
  3791. termexcept.setown(e);
  3792. }
  3793. CriticalBlock block(sect);
  3794. try {
  3795. updateItems();
  3796. }
  3797. catch (IException *e) {
  3798. EXCLOG(e,"CSocketSelectThread(2)");
  3799. if (!termexcept)
  3800. termexcept.setown(e);
  3801. else
  3802. e->Release();
  3803. }
  3804. return 0;
  3805. }
  3806. };
  3807. class CSocketSelectHandler: public CInterface, implements ISocketSelectHandler
  3808. {
  3809. CIArrayOf<CSocketSelectThread> threads;
  3810. CriticalSection sect;
  3811. bool started;
  3812. StringAttr selecttrace;
  3813. public:
  3814. IMPLEMENT_IINTERFACE;
  3815. CSocketSelectHandler(const char *trc)
  3816. : selecttrace(trc)
  3817. {
  3818. started = false;
  3819. }
  3820. void start()
  3821. {
  3822. CriticalBlock block(sect);
  3823. if (!started) {
  3824. started = true;
  3825. ForEachItemIn(i,threads) {
  3826. threads.item(i).start();
  3827. }
  3828. }
  3829. }
  3830. void add(ISocket *sock,unsigned mode,ISocketSelectNotify *nfy)
  3831. {
  3832. CriticalBlock block(sect);
  3833. loop {
  3834. bool added=false;
  3835. ForEachItemIn(i,threads) {
  3836. if (added)
  3837. threads.item(i).remove(sock);
  3838. else
  3839. added = threads.item(i).add(sock,mode,nfy);
  3840. }
  3841. if (added)
  3842. return;
  3843. CSocketSelectThread *thread = new CSocketSelectThread(selecttrace);
  3844. threads.append(*thread);
  3845. if (started)
  3846. thread->start();
  3847. }
  3848. }
  3849. void remove(ISocket *sock)
  3850. {
  3851. CriticalBlock block(sect);
  3852. ForEachItemIn(i,threads) {
  3853. if (threads.item(i).remove(sock)&&sock)
  3854. break;
  3855. }
  3856. }
  3857. void stop(bool wait)
  3858. {
  3859. IException *e=NULL;
  3860. CriticalBlock block(sect);
  3861. unsigned i = 0;
  3862. while (i<threads.ordinality()) {
  3863. CSocketSelectThread &t=threads.item(i);
  3864. {
  3865. CriticalUnblock unblock(sect);
  3866. t.stop(wait); // not quite as quick as could be if wait true
  3867. }
  3868. if (wait && !e && t.termexcept)
  3869. e = t.termexcept.getClear();
  3870. i++;
  3871. }
  3872. #if 0 // don't throw error as too late
  3873. if (e)
  3874. throw e;
  3875. #else
  3876. ::Release(e);
  3877. #endif
  3878. }
  3879. };
  3880. #ifdef _HAS_EPOLL_SUPPORT
  3881. class CSocketEpollThread: public CSocketMultiThread
  3882. {
  3883. int epfd;
  3884. int *epfdtbl;
  3885. void opendummy()
  3886. {
  3887. CriticalBlock block(sect);
  3888. if (!dummysockopen) {
  3889. #ifdef _USE_PIPE_FOR_SELECT_TRIGGER
  3890. if(pipe(dummysock)) {
  3891. WARNLOG("CSocketEpollThread: create pipe failed %d",ERRNO());
  3892. return;
  3893. }
  3894. for (unsigned i=0;i<2;i++) {
  3895. int flags = fcntl(dummysock[i], F_GETFL, 0);
  3896. if (flags!=-1) {
  3897. flags |= O_NONBLOCK;
  3898. fcntl(dummysock[i], F_SETFL, flags);
  3899. }
  3900. flags = fcntl(dummysock[i], F_GETFD, 0);
  3901. if (flags!=-1) {
  3902. flags |= FD_CLOEXEC;
  3903. fcntl(dummysock[i], F_SETFD, flags);
  3904. }
  3905. }
  3906. CHECKSOCKRANGE(dummysock[0]);
  3907. int srtn;
  3908. struct epoll_event event;
  3909. event.events = EPOLLIN; // TODO - add other bits ? (such as RDHUP)
  3910. event.data.fd = dummysock[0];
  3911. srtn = ::epoll_ctl(epfd, EPOLL_CTL_ADD, dummysock[0], &event);
  3912. if (srtn < 0) {
  3913. int err = ERRNO();
  3914. LOGERR(err,1,"epoll_ctl(ADD)");
  3915. THROWJSOCKEXCEPTION2(err);
  3916. }
  3917. # ifdef EPOLLTRACE
  3918. DBGLOG("EPOLL: added dummy fd %d to epfd %d", dummysock[0], epfd);
  3919. # endif
  3920. #else
  3921. if (IP6preferred)
  3922. dummysock = ::socket(AF_INET6, SOCK_STREAM, PF_INET6);
  3923. else
  3924. dummysock = ::socket(AF_INET, SOCK_STREAM, 0);
  3925. CHECKSOCKRANGE(dummysock);
  3926. int srtn;
  3927. struct epoll_event event;
  3928. event.events = EPOLLIN | EPOLLERR; // TODO - add other bits ? (such as RDHUP)
  3929. event.data.fd = dummysock;
  3930. srtn = ::epoll_ctl(epfd, EPOLL_CTL_ADD, dummysock, &event);
  3931. if (srtn < 0) {
  3932. int err = ERRNO();
  3933. LOGERR(err,1,"epoll_ctl(ADD)");
  3934. THROWJSOCKEXCEPTION2(err);
  3935. }
  3936. # ifdef EPOLLTRACE
  3937. DBGLOG("EPOLL: added dummy fd %d to epfd %d", dummysock, epfd);
  3938. # endif
  3939. #endif
  3940. dummysockopen = true;
  3941. }
  3942. }
  3943. void closedummy()
  3944. {
  3945. CriticalBlock block(sect);
  3946. if (dummysockopen) {
  3947. #ifdef _USE_PIPE_FOR_SELECT_TRIGGER
  3948. struct epoll_event event;
  3949. ::epoll_ctl(epfd, EPOLL_CTL_DEL, dummysock[0], &event);
  3950. # ifdef EPOLLTRACE
  3951. DBGLOG("EPOLL: removed dummy fd %d from epfd %d", dummysock[0], epfd);
  3952. # endif
  3953. #ifdef SOCKTRACE
  3954. PROGLOG("SOCKTRACE: Closing dummy sockets %x %d %x %d (%x)", dummysock[0], dummysock[0], dummysock[1], dummysock[1], this);
  3955. #endif
  3956. ::close(dummysock[0]);
  3957. ::close(dummysock[1]);
  3958. #else
  3959. struct epoll_event event;
  3960. ::epoll_ctl(epfd, EPOLL_CTL_DEL, dummysock, &event);
  3961. # ifdef EPOLLTRACE
  3962. DBGLOG("EPOLL: removed dummy fd %d from epfd %d", dummysock, epfd);
  3963. # endif
  3964. ::close(dummysock);
  3965. #endif
  3966. dummysockopen = false;
  3967. }
  3968. }
  3969. void triggerselect()
  3970. {
  3971. CSocketMultiThread::triggerselect();
  3972. }
  3973. void resettrigger()
  3974. {
  3975. CSocketMultiThread::resettrigger();
  3976. }
  3977. public:
  3978. IMPLEMENT_IINTERFACE;
  3979. CSocketEpollThread(const char *trc)
  3980. : CSocketMultiThread("CSocketEpollThread")
  3981. {
  3982. dummysockopen = false;
  3983. terminating = false;
  3984. atomic_set(&tickwait,0);
  3985. waitingchange = 0;
  3986. selectvarschange = false;
  3987. validateselecterror = 0;
  3988. validateerrcount = 0;
  3989. offset = 0;
  3990. selecttrace = trc;
  3991. epfd = ::epoll_create(XFD_SETSIZE);
  3992. if (epfd < 0) {
  3993. int err = ERRNO();
  3994. LOGERR(err,1,"epoll_create()");
  3995. THROWJSOCKEXCEPTION2(err);
  3996. }
  3997. # if defined(_DEBUG) || defined(EPOLLTRACE)
  3998. DBGLOG("EPOLL: creating epfd %d", epfd );
  3999. # endif
  4000. try {
  4001. epfdtbl = new int[XFD_SETSIZE];
  4002. } catch (const std::bad_alloc &e) {
  4003. int err = ERRNO();
  4004. LOGERR(err,1,"epfdtbl alloc()");
  4005. THROWJSOCKEXCEPTION2(err);
  4006. }
  4007. for (int i=0; i<XFD_SETSIZE; i++) {
  4008. epfdtbl[i] = -1;
  4009. }
  4010. opendummy();
  4011. }
  4012. ~CSocketEpollThread()
  4013. {
  4014. closedummy();
  4015. ForEachItemIn(i,items) {
  4016. try {
  4017. SelectItem &si = items.item(i);
  4018. struct epoll_event event;
  4019. ::epoll_ctl(epfd, EPOLL_CTL_DEL, si.handle, &event);
  4020. # ifdef EPOLLTRACE
  4021. DBGLOG("EPOLL: removed fd %d from epfd %d", si.handle, epfd);
  4022. # endif
  4023. si.sock->Release();
  4024. si.nfy->Release();
  4025. }
  4026. catch (IException *e) {
  4027. EXCLOG(e,"~CSocketEpollThread");
  4028. e->Release();
  4029. }
  4030. }
  4031. if (epfd >= 0) {
  4032. # ifdef EPOLLTRACE
  4033. DBGLOG("EPOLL: closing epfd %d", epfd );
  4034. # endif
  4035. ::close(epfd);
  4036. epfd = -1;
  4037. delete [] epfdtbl;
  4038. }
  4039. }
  4040. Owned<IException> termexcept;
  4041. void updateItems()
  4042. {
  4043. // must be in CriticalBlock block(sect);
  4044. unsigned n = items.ordinality();
  4045. bool reindex = false;
  4046. for (unsigned i=0;i<n;) {
  4047. SelectItem &si = items.item(i);
  4048. if (si.add_epoll) {
  4049. reindex = true;
  4050. }
  4051. if (si.del) {
  4052. struct epoll_event event;
  4053. ::epoll_ctl(epfd, EPOLL_CTL_DEL, si.handle, &event);
  4054. # ifdef EPOLLTRACE
  4055. DBGLOG("EPOLL: removed fd %d from epfd %d", si.handle, epfd);
  4056. # endif
  4057. epfdtbl[si.handle] = -1;
  4058. reindex = true;
  4059. si.nfy->Release();
  4060. try {
  4061. #ifdef SOCKTRACE
  4062. PROGLOG("CSocketEpollThread::updateItems release %d",si.handle);
  4063. #endif
  4064. si.sock->Release();
  4065. }
  4066. catch (IException *e) {
  4067. EXCLOG(e,"CSocketEpollThread::updateItems");
  4068. e->Release();
  4069. }
  4070. n--;
  4071. if (i<n)
  4072. si = items.item(n);
  4073. items.remove(n);
  4074. }
  4075. else
  4076. i++;
  4077. }
  4078. assertex(n<=XFD_SETSIZE-1);
  4079. if (reindex) {
  4080. # ifdef EPOLLTRACE
  4081. int max_sockid = 0;
  4082. # endif
  4083. ForEachItemIn(j,items) {
  4084. SelectItem &si = items.item(j);
  4085. epfdtbl[si.handle] = j;
  4086. if (si.add_epoll) {
  4087. si.add_epoll = false;
  4088. int srtn, ep_mode;
  4089. struct epoll_event event;
  4090. if (si.mode != 0) {
  4091. ep_mode = 0;
  4092. if (si.mode & SELECTMODE_READ) {
  4093. ep_mode |= (EPOLLIN | EPOLLPRI);
  4094. }
  4095. if (si.mode & SELECTMODE_WRITE) {
  4096. ep_mode |= EPOLLOUT;
  4097. }
  4098. if (si.mode & SELECTMODE_EXCEPT) {
  4099. ep_mode |= EPOLLERR;
  4100. }
  4101. if (ep_mode != 0) {
  4102. ep_mode |= EPOLLRDHUP;
  4103. event.events = ep_mode;
  4104. event.data.fd = si.handle;
  4105. srtn = ::epoll_ctl(epfd, EPOLL_CTL_ADD, si.handle, &event);
  4106. if (srtn < 0) {
  4107. int err = ERRNO();
  4108. LOGERR(err,1,"epoll_ctl(ADD)");
  4109. THROWJSOCKEXCEPTION2(err);
  4110. }
  4111. # ifdef EPOLLTRACE
  4112. DBGLOG("EPOLL: added fd %d to epfd %d", si.handle, epfd);
  4113. # endif
  4114. }
  4115. }
  4116. # ifdef EPOLLTRACE
  4117. max_sockid=std::max(si.handle, max_sockid);
  4118. # endif
  4119. }
  4120. # ifdef EPOLLTRACE
  4121. max_sockid++;
  4122. for(int ix=0; ix<max_sockid; ix++) {
  4123. DBGLOG("EPOLL: epfdtbl[%d] = %d", ix, epfdtbl[ix]);
  4124. }
  4125. # endif
  4126. }
  4127. # ifdef EPOLLTRACE
  4128. DBGLOG("EPOLL: leaving updateItems(), reindex = %d", reindex);
  4129. # endif
  4130. }
  4131. }
  4132. bool add(ISocket *sock,unsigned mode,ISocketSelectNotify *nfy)
  4133. {
  4134. // maybe check once to prevent 1st delay? TBD
  4135. CriticalBlock block(sect);
  4136. unsigned n=0;
  4137. ForEachItemIn(i,items) {
  4138. SelectItem &si = items.item(i);
  4139. if (!si.del) {
  4140. if (si.sock==sock) {
  4141. si.del = true;
  4142. }
  4143. else
  4144. n++;
  4145. }
  4146. }
  4147. if (n>=XFD_SETSIZE-1) // leave 1 spare
  4148. return false;
  4149. SelectItem sn;
  4150. sn.nfy = LINK(nfy);
  4151. sn.sock = LINK(sock);
  4152. sn.mode = (byte)mode;
  4153. sn.handle = (T_SOCKET)sock->OShandle();
  4154. CHECKSOCKRANGE(sn.handle);
  4155. sn.del = false;
  4156. sn.add_epoll = true;
  4157. items.append(sn);
  4158. selectvarschange = true;
  4159. triggerselect();
  4160. return true;
  4161. }
  4162. bool remove(ISocket *sock)
  4163. {
  4164. return CSocketMultiThread::remove(sock);
  4165. }
  4166. void stop(bool wait)
  4167. {
  4168. CSocketMultiThread::stop(wait);
  4169. }
  4170. bool sockOk(T_SOCKET sock)
  4171. {
  4172. return CSocketMultiThread::sockOk(sock);
  4173. }
  4174. bool checkSocks()
  4175. {
  4176. return CSocketMultiThread::checkSocks();
  4177. }
  4178. void updateEpollVars(unsigned &ni)
  4179. {
  4180. CriticalBlock block(sect);
  4181. selectvarschange = false;
  4182. if (waitingchange) {
  4183. waitingchangesem.signal(waitingchange);
  4184. waitingchange = 0;
  4185. }
  4186. if (validateselecterror) { // something went wrong so check sockets
  4187. validateerrcount++;
  4188. if (!checkSocks()) {
  4189. // bad socket not found
  4190. PROGLOG("CSocketEpollThread::updateEpollVars cannot find socket error");
  4191. if (validateerrcount>10)
  4192. throw MakeStringException(-1,"CSocketEpollThread:Socket epoll error %d",validateselecterror);
  4193. }
  4194. }
  4195. else
  4196. validateerrcount = 0;
  4197. updateItems();
  4198. #ifndef _USE_PIPE_FOR_SELECT_TRIGGER
  4199. opendummy();
  4200. #endif
  4201. ni = items.ordinality();
  4202. validateselecterror = 0;
  4203. }
  4204. int run()
  4205. {
  4206. try {
  4207. unsigned ni = 0;
  4208. unsigned numto = 0;
  4209. unsigned lastnumto = 0;
  4210. unsigned totnum = 0;
  4211. unsigned total = 0;
  4212. struct epoll_event events[XFD_SETSIZE];
  4213. selectvarschange = true;
  4214. while (!terminating) {
  4215. if (selectvarschange) {
  4216. updateEpollVars(ni);
  4217. }
  4218. if (ni==0) {
  4219. validateerrcount = 0;
  4220. atomic_inc(&tickwait);
  4221. if(!selectvarschange&&!terminating)
  4222. ticksem.wait(SELECT_TIMEOUT_SECS*1000);
  4223. atomic_dec(&tickwait);
  4224. continue;
  4225. }
  4226. int n = ::epoll_wait(epfd, events, XFD_SETSIZE, 1000);
  4227. # ifdef EPOLLTRACE
  4228. if(n > 0)
  4229. DBGLOG("EPOLL: after epoll_wait(), n = %d, ni = %d", n, ni);
  4230. # endif
  4231. if (terminating)
  4232. break;
  4233. if (n < 0) {
  4234. CriticalBlock block(sect);
  4235. int err = ERRNO();
  4236. if (err != EINTRCALL) {
  4237. if (dummysockopen) {
  4238. LOGERR(err,12,"CSocketEpollThread epoll error"); // should cache error ?
  4239. validateselecterror = err;
  4240. #ifndef _USE_PIPE_FOR_SELECT_TRIGGER
  4241. closedummy(); // just in case was culprit
  4242. #endif
  4243. }
  4244. selectvarschange = true;
  4245. continue;
  4246. }
  4247. n = 0;
  4248. }
  4249. else if (n>0) {
  4250. validateerrcount = 0;
  4251. numto = 0;
  4252. lastnumto = 0;
  4253. total += n;
  4254. totnum++;
  4255. SelectItemArray tonotify;
  4256. {
  4257. CriticalBlock block(sect);
  4258. for (int j=0;j<n;j++) {
  4259. # ifdef EPOLLTRACE
  4260. DBGLOG("EPOLL: events[%d].data.fd = %d, epfdtbl = %d, events mask = %d", j, events[j].data.fd, epfdtbl[events[j].data.fd], events[j].events);
  4261. # endif
  4262. # ifdef _USE_PIPE_FOR_SELECT_TRIGGER
  4263. if ((dummysockopen) && (events[j].data.fd == dummysock[0])) {
  4264. resettrigger();
  4265. continue;
  4266. }
  4267. # endif
  4268. if (events[j].data.fd >= 0) {
  4269. assertex(epfdtbl[events[j].data.fd] >= 0);
  4270. SelectItem *epsi = items.getArray(epfdtbl[events[j].data.fd]);
  4271. if (!epsi->del) {
  4272. unsigned int ep_mode = 0;
  4273. if (events[j].events & (EPOLLIN | EPOLLPRI)) {
  4274. ep_mode |= SELECTMODE_READ;
  4275. }
  4276. if (events[j].events & (EPOLLERR | EPOLLHUP)) {
  4277. ep_mode |= SELECTMODE_READ;
  4278. }
  4279. if (events[j].events & EPOLLRDHUP) {
  4280. // TODO - or should we set EXCEPT ?
  4281. ep_mode |= SELECTMODE_READ;
  4282. }
  4283. if (events[j].events & EPOLLOUT) {
  4284. ep_mode |= SELECTMODE_WRITE;
  4285. }
  4286. if (ep_mode != 0) {
  4287. tonotify.append(*epsi);
  4288. tonotify.item(tonotify.length()-1).mode = ep_mode;
  4289. }
  4290. }
  4291. }
  4292. }
  4293. }
  4294. ForEachItemIn(j,tonotify) {
  4295. SelectItem &si = tonotify.item(j);
  4296. try {
  4297. si.nfy->notifySelected(si.sock,si.mode); // ignore return
  4298. }
  4299. catch (IException *e) { // should be acted upon by notifySelected
  4300. EXCLOG(e,"CSocketEpollThread notifySelected");
  4301. throw ;
  4302. }
  4303. }
  4304. }
  4305. else {
  4306. validateerrcount = 0;
  4307. if ((++numto>=lastnumto*2)) {
  4308. lastnumto = numto;
  4309. if (selecttrace&&(numto>4))
  4310. PROGLOG("%s: Epoll Idle(%d), %d,%d,%0.2f",selecttrace,numto,totnum,total,totnum?((double)total/(double)totnum):0.0);
  4311. }
  4312. /*
  4313. if (numto&&(numto%100)) {
  4314. CriticalBlock block(sect);
  4315. if (!selectvarschange)
  4316. selectvarschange = checkSocks();
  4317. }
  4318. */
  4319. }
  4320. }
  4321. }
  4322. catch (IException *e) {
  4323. EXCLOG(e,"CSocketEpollThread");
  4324. termexcept.setown(e);
  4325. }
  4326. CriticalBlock block(sect);
  4327. try {
  4328. updateItems();
  4329. }
  4330. catch (IException *e) {
  4331. EXCLOG(e,"CSocketEpollThread(2)");
  4332. if (!termexcept)
  4333. termexcept.setown(e);
  4334. else
  4335. e->Release();
  4336. }
  4337. return 0;
  4338. }
  4339. };
  4340. class CSocketEpollHandler: public CInterface, implements ISocketSelectHandler
  4341. {
  4342. CIArrayOf<CSocketEpollThread> threads;
  4343. CriticalSection sect;
  4344. bool started;
  4345. StringAttr epolltrace;
  4346. public:
  4347. IMPLEMENT_IINTERFACE;
  4348. CSocketEpollHandler(const char *trc)
  4349. : epolltrace(trc)
  4350. {
  4351. started = false;
  4352. }
  4353. void start()
  4354. {
  4355. CriticalBlock block(sect);
  4356. if (!started) {
  4357. started = true;
  4358. ForEachItemIn(i,threads) {
  4359. threads.item(i).start();
  4360. }
  4361. }
  4362. }
  4363. void add(ISocket *sock,unsigned mode,ISocketSelectNotify *nfy)
  4364. {
  4365. CriticalBlock block(sect);
  4366. loop {
  4367. bool added=false;
  4368. ForEachItemIn(i,threads) {
  4369. if (added)
  4370. threads.item(i).remove(sock);
  4371. else
  4372. added = threads.item(i).add(sock,mode,nfy);
  4373. }
  4374. if (added)
  4375. return;
  4376. CSocketEpollThread *thread = new CSocketEpollThread(epolltrace);
  4377. threads.append(*thread);
  4378. if (started)
  4379. thread->start();
  4380. }
  4381. }
  4382. void remove(ISocket *sock)
  4383. {
  4384. CriticalBlock block(sect);
  4385. ForEachItemIn(i,threads) {
  4386. if (threads.item(i).remove(sock)&&sock)
  4387. break;
  4388. }
  4389. }
  4390. void stop(bool wait)
  4391. {
  4392. IException *e=NULL;
  4393. CriticalBlock block(sect);
  4394. unsigned i = 0;
  4395. while (i<threads.ordinality()) {
  4396. CSocketEpollThread &t=threads.item(i);
  4397. {
  4398. CriticalUnblock unblock(sect);
  4399. t.stop(wait); // not quite as quick as could be if wait true
  4400. }
  4401. if (wait && !e && t.termexcept)
  4402. e = t.termexcept.getClear();
  4403. i++;
  4404. }
  4405. #if 0 // don't throw error as too late
  4406. if (e)
  4407. throw e;
  4408. #else
  4409. ::Release(e);
  4410. #endif
  4411. }
  4412. };
  4413. #endif // _HAS_EPOLL_SUPPORT
  4414. ISocketSelectHandler *createSocketSelectHandler(const char *trc)
  4415. {
  4416. #ifdef _HAS_EPOLL_SUPPORT
  4417. # if 0 // enable once we know method to get env file settings ...
  4418. if (env_file.use_epoll)
  4419. return new CSocketEpollHandler(trc);
  4420. else
  4421. # endif
  4422. return new CSocketSelectHandler(trc);
  4423. #else
  4424. return new CSocketSelectHandler(trc);
  4425. #endif
  4426. }
  4427. ISocketSelectHandler *createSocketEpollHandler(const char *trc)
  4428. {
  4429. #ifdef _HAS_EPOLL_SUPPORT
  4430. return new CSocketEpollHandler(trc);
  4431. #else
  4432. return new CSocketSelectHandler(trc);
  4433. #endif
  4434. }
  4435. void readBuffer(ISocket * socket, MemoryBuffer & buffer)
  4436. {
  4437. size32_t len;
  4438. socket->read(&len, sizeof(len));
  4439. _WINREV4(len);
  4440. if (len) {
  4441. void * target = buffer.reserve(len);
  4442. socket->read(target, len);
  4443. }
  4444. }
  4445. void readBuffer(ISocket * socket, MemoryBuffer & buffer, unsigned timeoutms)
  4446. {
  4447. size32_t len;
  4448. size32_t sizeGot;
  4449. socket->readtms(&len, sizeof(len), sizeof(len), sizeGot, timeoutms);
  4450. _WINREV4(len);
  4451. if (len) {
  4452. void * target = buffer.reserve(len);
  4453. socket->readtms(target, len, len, sizeGot, timeoutms);
  4454. }
  4455. }
  4456. void writeBuffer(ISocket * socket, MemoryBuffer & buffer)
  4457. {
  4458. unsigned len = buffer.length();
  4459. _WINREV4(len);
  4460. socket->write(&len, sizeof(len));
  4461. if (len)
  4462. socket->write(buffer.toByteArray(), buffer.length());
  4463. }
  4464. bool catchReadBuffer(ISocket * socket, MemoryBuffer & buffer)
  4465. {
  4466. try
  4467. {
  4468. readBuffer(socket, buffer);
  4469. return true;
  4470. }
  4471. catch (IException * e)
  4472. {
  4473. switch (e->errorCode())
  4474. {
  4475. case JSOCKERR_graceful_close:
  4476. break;
  4477. default:
  4478. EXCLOG(e,"catchReadBuffer");
  4479. break;
  4480. }
  4481. e->Release();
  4482. }
  4483. return false;
  4484. }
  4485. bool catchReadBuffer(ISocket * socket, MemoryBuffer & buffer, unsigned timeoutms)
  4486. {
  4487. try
  4488. {
  4489. readBuffer(socket, buffer, timeoutms);
  4490. return true;
  4491. }
  4492. catch (IException * e)
  4493. {
  4494. switch (e->errorCode())
  4495. {
  4496. case JSOCKERR_graceful_close:
  4497. break;
  4498. default:
  4499. EXCLOG(e,"catchReadBuffer");
  4500. break;
  4501. }
  4502. e->Release();
  4503. }
  4504. return false;
  4505. }
  4506. bool catchWriteBuffer(ISocket * socket, MemoryBuffer & buffer)
  4507. {
  4508. try
  4509. {
  4510. writeBuffer(socket, buffer);
  4511. return true;
  4512. }
  4513. catch (IException * e)
  4514. {
  4515. EXCLOG(e,"catchWriteBuffer");
  4516. e->Release();
  4517. }
  4518. return false;
  4519. }
  4520. // utility interface for simple conversations
  4521. // conversation is always between two ends,
  4522. // at any given time one end must be receiving and other sending (though these may swap during the conversation)
  4523. class CSingletonSocketConnection: public CInterface, implements IConversation
  4524. {
  4525. Owned<ISocket> sock;
  4526. Owned<ISocket> listensock;
  4527. enum { Snone, Saccept, Sconnect, Srecv, Ssend, Scancelled } state;
  4528. bool accepting;
  4529. bool cancelling;
  4530. SocketEndpoint ep;
  4531. CriticalSection crit;
  4532. public:
  4533. IMPLEMENT_IINTERFACE;
  4534. CSingletonSocketConnection(SocketEndpoint &_ep)
  4535. {
  4536. ep = _ep;
  4537. state = Snone;
  4538. cancelling = false;
  4539. }
  4540. ~CSingletonSocketConnection()
  4541. {
  4542. try {
  4543. if (sock)
  4544. sock->close();
  4545. }
  4546. catch (IException *e) {
  4547. if (e->errorCode()!=JSOCKERR_graceful_close)
  4548. EXCLOG(e,"CSingletonSocketConnection close");
  4549. e->Release();
  4550. }
  4551. }
  4552. bool connect(unsigned timeoutms)
  4553. {
  4554. CriticalBlock block(crit);
  4555. if (cancelling)
  4556. state = Scancelled;
  4557. if (state==Scancelled)
  4558. return false;
  4559. assertex(!sock);
  4560. ISocket *newsock=NULL;
  4561. state = Sconnect;
  4562. unsigned start = 0;
  4563. if (timeoutms!=(unsigned)INFINITE)
  4564. start = msTick();
  4565. while (state==Sconnect) {
  4566. try {
  4567. CriticalUnblock unblock(crit);
  4568. newsock = ISocket::connect_wait(ep,1000*60*4);
  4569. break;
  4570. }
  4571. catch (IException * e) {
  4572. if ((e->errorCode()==JSOCKERR_timeout_expired)||(e->errorCode()==JSOCKERR_connection_failed)) {
  4573. e->Release();
  4574. if ((state==Sconnect)&&(timeoutms!=(unsigned)INFINITE)&&(msTick()-start>timeoutms)) {
  4575. state = Snone;
  4576. return false;
  4577. }
  4578. }
  4579. else {
  4580. state = Scancelled;
  4581. EXCLOG(e,"CSingletonSocketConnection::connect");
  4582. e->Release();
  4583. return false;
  4584. }
  4585. }
  4586. }
  4587. if (state!=Sconnect) {
  4588. ::Release(newsock);
  4589. newsock = NULL;
  4590. }
  4591. if (!newsock) {
  4592. state = Scancelled;
  4593. return false;
  4594. }
  4595. sock.setown(newsock);
  4596. return true;
  4597. }
  4598. bool send(MemoryBuffer &mb)
  4599. {
  4600. CriticalBlock block(crit);
  4601. if (cancelling)
  4602. state = Scancelled;
  4603. if (state==Scancelled)
  4604. return false;
  4605. assertex(sock);
  4606. state = Srecv;
  4607. try {
  4608. CriticalUnblock unblock(crit);
  4609. writeBuffer(sock,mb);
  4610. }
  4611. catch (IException * e) {
  4612. state = Scancelled;
  4613. EXCLOG(e,"CSingletonSocketConnection::send");
  4614. e->Release();
  4615. return false;
  4616. }
  4617. state = Snone;
  4618. return true;
  4619. }
  4620. unsigned short setRandomPort(unsigned short base, unsigned num)
  4621. {
  4622. loop {
  4623. try {
  4624. ep.port = base+(unsigned short)(getRandom()%num);
  4625. listensock.setown(ISocket::create(ep.port));
  4626. return ep.port;
  4627. }
  4628. catch (IException *e) {
  4629. if (e->errorCode()!=JSOCKERR_port_in_use) {
  4630. state = Scancelled;
  4631. EXCLOG(e,"CSingletonSocketConnection::setRandomPort");
  4632. e->Release();
  4633. break;
  4634. }
  4635. e->Release();
  4636. }
  4637. }
  4638. return 0;
  4639. }
  4640. bool accept(unsigned timeoutms)
  4641. {
  4642. CriticalBlock block(crit);
  4643. if (cancelling)
  4644. state = Scancelled;
  4645. if (state==Scancelled)
  4646. return false;
  4647. if (!sock) {
  4648. ISocket *newsock=NULL;
  4649. state = Saccept;
  4650. loop {
  4651. try {
  4652. {
  4653. CriticalUnblock unblock(crit);
  4654. if (!listensock)
  4655. listensock.setown(ISocket::create(ep.port));
  4656. if ((timeoutms!=(unsigned)INFINITE)&&(!listensock->wait_read(timeoutms))) {
  4657. state = Snone;
  4658. return false;
  4659. }
  4660. }
  4661. if (cancelling)
  4662. state = Scancelled;
  4663. if (state==Scancelled)
  4664. return false;
  4665. {
  4666. CriticalUnblock unblock(crit);
  4667. newsock=listensock->accept(true);
  4668. break;
  4669. }
  4670. }
  4671. catch (IException *e) {
  4672. if (e->errorCode()==JSOCKERR_graceful_close)
  4673. PROGLOG("CSingletonSocketConnection: Closed socket on accept - retrying...");
  4674. else {
  4675. state = Scancelled;
  4676. EXCLOG(e,"CSingletonSocketConnection::accept");
  4677. e->Release();
  4678. break;
  4679. }
  4680. e->Release();
  4681. }
  4682. }
  4683. if (state!=Saccept) {
  4684. ::Release(newsock);
  4685. newsock = NULL;
  4686. }
  4687. if (!newsock) {
  4688. state = Scancelled;
  4689. return false;
  4690. }
  4691. sock.setown(newsock);
  4692. }
  4693. return true;
  4694. }
  4695. bool recv(MemoryBuffer &mb, unsigned timeoutms)
  4696. {
  4697. CriticalBlock block(crit);
  4698. if (cancelling)
  4699. state = Scancelled;
  4700. if (state==Scancelled)
  4701. return false;
  4702. assertex(sock);
  4703. state = Srecv;
  4704. try {
  4705. CriticalUnblock unblock(crit);
  4706. readBuffer(sock,mb,timeoutms);
  4707. }
  4708. catch (IException *e) {
  4709. if (e->errorCode()==JSOCKERR_timeout_expired)
  4710. state = Snone;
  4711. else {
  4712. state = Scancelled;
  4713. if (e->errorCode()!=JSOCKERR_graceful_close)
  4714. EXCLOG(e,"CSingletonSocketConnection::recv");
  4715. }
  4716. e->Release();
  4717. return false;
  4718. }
  4719. state = Snone;
  4720. return true;
  4721. }
  4722. virtual void cancel()
  4723. {
  4724. CriticalBlock block(crit);
  4725. while (state!=Scancelled) {
  4726. cancelling = true;
  4727. try {
  4728. switch (state) {
  4729. case Saccept:
  4730. {
  4731. if (listensock)
  4732. listensock->cancel_accept();
  4733. }
  4734. break;
  4735. case Sconnect:
  4736. // wait for timeout
  4737. break;
  4738. case Srecv:
  4739. {
  4740. if (sock)
  4741. sock->close();
  4742. }
  4743. break;
  4744. case Ssend:
  4745. // wait for finished
  4746. break;
  4747. default:
  4748. state = Scancelled;
  4749. break;
  4750. }
  4751. }
  4752. catch (IException *e) {
  4753. EXCLOG(e,"CSingletonSocketConnection::cancel");
  4754. e->Release();
  4755. }
  4756. {
  4757. CriticalUnblock unblock(crit);
  4758. Sleep(1000);
  4759. }
  4760. }
  4761. }
  4762. };
  4763. IConversation *createSingletonSocketConnection(unsigned short port,SocketEndpoint *_ep)
  4764. {
  4765. SocketEndpoint ep;
  4766. if (_ep)
  4767. ep = *_ep;
  4768. if (port)
  4769. ep.port = port;
  4770. return new CSingletonSocketConnection(ep);
  4771. }
  4772. // interface for reading from multiple sockets using the BF_SYNC_TRANSFER_PUSH protocol
  4773. class CSocketBufferReader: public CInterface, implements ISocketBufferReader
  4774. {
  4775. class SocketElem: public CInterface, implements ISocketSelectNotify
  4776. {
  4777. CSocketBufferReader *parent;
  4778. unsigned num; // top bit used for ready
  4779. MemoryAttr blk;
  4780. CriticalSection sect;
  4781. Linked<ISocket> sock;
  4782. bool active;
  4783. bool pending;
  4784. public:
  4785. IMPLEMENT_IINTERFACE;
  4786. void init(CSocketBufferReader *_parent,ISocket *_sock,unsigned _n)
  4787. {
  4788. parent = _parent;
  4789. num = _n;
  4790. sock.set(_sock);
  4791. active = true;
  4792. pending = false;
  4793. }
  4794. virtual bool notifySelected(ISocket *socket,unsigned selected)
  4795. {
  4796. assertex(sock==socket);
  4797. {
  4798. CriticalBlock block(sect);
  4799. if (pending) {
  4800. active = false;
  4801. parent->remove(sock);
  4802. return false;
  4803. }
  4804. pending = true;
  4805. unsigned t1=usTick();
  4806. size32_t sz = sock->receive_block_size();
  4807. unsigned t2=usTick();
  4808. if (sz)
  4809. sock->receive_block(blk.allocate(sz),sz);
  4810. else
  4811. parent->remove(sock);
  4812. unsigned t3=usTick();
  4813. if (t3-t1>60*1000000)
  4814. PROGLOG("CSocketBufferReader(%d): slow receive_block (%d,%d) sz=%d",num,t2-t1,t3-t2,sz);
  4815. }
  4816. parent->enqueue(this); // nb outside sect critical block
  4817. return false; // always return false
  4818. }
  4819. unsigned get(MemoryBuffer &mb)
  4820. {
  4821. CriticalBlock block(sect);
  4822. assertex(pending);
  4823. size32_t sz = blk.length();
  4824. if (sz)
  4825. mb.setBuffer(sz,blk.detach(),true);
  4826. pending = false;
  4827. if (!active) {
  4828. active = true;
  4829. parent->add(*this);
  4830. }
  4831. return num;
  4832. }
  4833. size32_t size()
  4834. {
  4835. return blk.length();
  4836. }
  4837. ISocket *getSocket() { return sock; }
  4838. } *elems;
  4839. SimpleInterThreadQueueOf<SocketElem, false> readyq;
  4840. Owned<ISocketSelectHandler> selecthandler;
  4841. size32_t buffersize;
  4842. size32_t buffermax;
  4843. unsigned bufferwaiting;
  4844. CriticalSection buffersect;
  4845. Semaphore buffersem;
  4846. bool isdone;
  4847. public:
  4848. IMPLEMENT_IINTERFACE;
  4849. CSocketBufferReader(const char *trc)
  4850. {
  4851. selecthandler.setown(createSocketSelectHandler(trc));
  4852. elems = NULL;
  4853. }
  4854. ~CSocketBufferReader()
  4855. {
  4856. delete [] elems;
  4857. }
  4858. virtual void init(unsigned num,ISocket **sockets,size32_t _buffermax)
  4859. {
  4860. elems = new SocketElem[num];
  4861. for (unsigned i=0;i<num;i++) {
  4862. ISocket *sock = sockets[i];
  4863. if (sock) { // can have gaps
  4864. elems[i].init(this,sock,i);
  4865. add(elems[i]);
  4866. }
  4867. }
  4868. buffersize = 0;
  4869. buffermax = _buffermax;
  4870. bufferwaiting = 0;
  4871. isdone = false;
  4872. selecthandler->start();
  4873. }
  4874. virtual unsigned get(MemoryBuffer &mb)
  4875. {
  4876. SocketElem &e = *readyq.dequeue();
  4877. CriticalBlock block(buffersect);
  4878. assertex(buffersize>=e.size());
  4879. buffersize-=e.size();
  4880. if (bufferwaiting) {
  4881. buffersem.signal(bufferwaiting);
  4882. bufferwaiting = 0;
  4883. }
  4884. return e.get(mb);
  4885. }
  4886. virtual void done(bool wait)
  4887. {
  4888. buffersem.signal(0x10000);
  4889. isdone = true;
  4890. selecthandler->stop(wait);
  4891. if (wait) {
  4892. delete [] elems;
  4893. elems = NULL;
  4894. }
  4895. }
  4896. void enqueue(SocketElem *elem)
  4897. {
  4898. if (elem) {
  4899. CriticalBlock block(buffersect);
  4900. size32_t sz = elem->size();
  4901. while ((buffersize>0)&&(sz>0)&&(buffersize+sz>buffermax)) {
  4902. if (isdone)
  4903. return;
  4904. bufferwaiting++;
  4905. CriticalUnblock unblock(buffersect);
  4906. buffersem.wait();
  4907. }
  4908. buffersize += sz;
  4909. }
  4910. readyq.enqueue(elem);
  4911. }
  4912. void remove(ISocket *sock)
  4913. {
  4914. selecthandler->remove(sock);
  4915. }
  4916. void add(SocketElem &elem)
  4917. {
  4918. selecthandler->add(elem.getSocket(),SELECTMODE_READ,&elem);
  4919. }
  4920. };
  4921. ISocketBufferReader *createSocketBufferReader(const char *trc)
  4922. {
  4923. return new CSocketBufferReader(trc);
  4924. }
  4925. extern jlib_decl void markNodeCentral(SocketEndpoint &ep)
  4926. {
  4927. #ifdef CENTRAL_NODE_RANDOM_DELAY
  4928. CriticalBlock block(CSocket::crit);
  4929. CentralNodeArray.append(ep);
  4930. #endif
  4931. }
  4932. static CSocket *prepareSocket(unsigned idx,const SocketEndpoint &ep, ISocketConnectNotify &inotify)
  4933. {
  4934. Owned<CSocket> sock = new CSocket(ep,sm_tcp,NULL);
  4935. int err = sock->pre_connect(false);
  4936. if ((err == EINPROGRESS)||(err == EWOULDBLOCK))
  4937. return sock.getClear();
  4938. if (err==0) {
  4939. int err = sock->post_connect();
  4940. if (err==0)
  4941. inotify.connected(idx,ep,sock);
  4942. else {
  4943. sock->errclose();
  4944. inotify.failed(idx,ep,err);
  4945. }
  4946. }
  4947. else
  4948. inotify.failed(idx,ep,err);
  4949. return NULL;
  4950. }
  4951. void multiConnect(const SocketEndpointArray &eps,ISocketConnectNotify &inotify,unsigned timeout)
  4952. {
  4953. class SocketElem: public CInterface, implements ISocketSelectNotify
  4954. {
  4955. CriticalSection *sect;
  4956. ISocketSelectHandler *handler;
  4957. unsigned *remaining;
  4958. Semaphore *notifysem;
  4959. ISocketConnectNotify *inotify;
  4960. public:
  4961. Owned<CSocket> sock;
  4962. SocketEndpoint ep;
  4963. unsigned idx;
  4964. IMPLEMENT_IINTERFACE;
  4965. void init(CSocket *_sock,unsigned _idx,SocketEndpoint &_ep,CriticalSection *_sect,ISocketSelectHandler *_handler,ISocketConnectNotify *_inotify, unsigned *_remaining, Semaphore *_notifysem)
  4966. {
  4967. ep = _ep;
  4968. idx = _idx;
  4969. inotify = _inotify;
  4970. sock.setown(_sock),
  4971. sect = _sect;
  4972. handler = _handler;
  4973. remaining = _remaining;
  4974. notifysem = _notifysem;
  4975. }
  4976. virtual bool notifySelected(ISocket *socket,unsigned selected)
  4977. {
  4978. CriticalBlock block(*sect);
  4979. handler->remove(socket);
  4980. int err = sock->post_connect();
  4981. CSocket *newsock = NULL;
  4982. {
  4983. CriticalUnblock unblock(*sect); // up to caller to cope with multithread
  4984. if (err==0)
  4985. inotify->connected(idx,ep,sock);
  4986. else if ((err==ETIMEDOUT)||(err==ECONNREFUSED)) {
  4987. // don't give up so easily (maybe listener not yet started (i.e. racing))
  4988. newsock = prepareSocket(idx,ep,*inotify);
  4989. Sleep(100); // not very nice but without this would just loop
  4990. }
  4991. else
  4992. inotify->failed(idx,ep,err);
  4993. }
  4994. if (newsock) {
  4995. sock.setown(newsock);
  4996. handler->add(sock,SELECTMODE_WRITE|SELECTMODE_EXCEPT,this);
  4997. }
  4998. else {
  4999. sock.clear();
  5000. (*remaining)--;
  5001. notifysem->signal();
  5002. }
  5003. return false;
  5004. }
  5005. } *elems;
  5006. unsigned n = eps.ordinality();
  5007. unsigned remaining = n;
  5008. if (!n)
  5009. return;
  5010. elems = new SocketElem[n];
  5011. unsigned i;
  5012. CriticalSection sect;
  5013. Semaphore notifysem;
  5014. Owned<ISocketSelectHandler> selecthandler = createSocketSelectHandler(
  5015. #ifdef _DEBUG
  5016. "multiConnect"
  5017. #else
  5018. NULL
  5019. #endif
  5020. );
  5021. StringBuffer name;
  5022. for (i=0;i<n;i++) {
  5023. CSocket* sock = prepareSocket(i,eps.item(i),inotify);
  5024. if (sock) {
  5025. elems[i].init(sock,i,eps.item(i),&sect,selecthandler,&inotify,&remaining,&notifysem);
  5026. selecthandler->add(sock,SELECTMODE_WRITE|SELECTMODE_EXCEPT,&elems[i]);
  5027. }
  5028. else
  5029. remaining--;
  5030. }
  5031. if (remaining) {
  5032. unsigned lastremaining=remaining;
  5033. selecthandler->start();
  5034. loop {
  5035. bool to=!notifysem.wait(timeout);
  5036. {
  5037. CriticalBlock block(sect);
  5038. if (remaining==0)
  5039. break;
  5040. if (to&&(remaining==lastremaining))
  5041. break; // nothing happened recently
  5042. lastremaining = remaining;
  5043. }
  5044. }
  5045. selecthandler->stop(true);
  5046. }
  5047. selecthandler.clear();
  5048. if (remaining) {
  5049. for (unsigned j=0;j<n;j++) { // mop up timeouts
  5050. SocketElem &elem = elems[j];
  5051. if (elem.sock.get()) {
  5052. elem.sock.clear();
  5053. inotify.failed(j,elem.ep,-1);
  5054. remaining--;
  5055. if (remaining==0)
  5056. break;
  5057. }
  5058. }
  5059. delete [] elems;
  5060. }
  5061. }
  5062. void multiConnect(const SocketEndpointArray &eps, PointerIArrayOf<ISocket> &retsockets,unsigned timeout)
  5063. {
  5064. unsigned n = eps.ordinality();
  5065. if (n==0)
  5066. return;
  5067. if (n==1) { // no need for multi
  5068. ISocket *sock = NULL;
  5069. try {
  5070. sock = ISocket::connect_timeout(eps.item(0),timeout);
  5071. }
  5072. catch (IException *e) { // ignore error just append NULL
  5073. sock = NULL;
  5074. e->Release();
  5075. }
  5076. retsockets.append(sock);
  5077. return;
  5078. }
  5079. while (retsockets.ordinality()<n)
  5080. retsockets.append(NULL);
  5081. CriticalSection sect;
  5082. class cNotify: implements ISocketConnectNotify
  5083. {
  5084. CriticalSection &sect;
  5085. PointerIArrayOf<ISocket> &retsockets;
  5086. public:
  5087. cNotify(PointerIArrayOf<ISocket> &_retsockets,CriticalSection &_sect)
  5088. : retsockets(_retsockets),sect(_sect)
  5089. {
  5090. }
  5091. void connected(unsigned idx,const SocketEndpoint &ep,ISocket *sock)
  5092. {
  5093. CriticalBlock block(sect);
  5094. assertex(idx<retsockets.ordinality());
  5095. sock->Link();
  5096. retsockets.replace(sock,idx);
  5097. }
  5098. void failed(unsigned idx,const SocketEndpoint &ep,int err)
  5099. {
  5100. StringBuffer s;
  5101. PROGLOG("multiConnect failed to %s with %d",ep.getUrlStr(s).str(),err);
  5102. }
  5103. } notify(retsockets,sect);
  5104. multiConnect(eps,notify,timeout);
  5105. }
  5106. inline void flushText(StringBuffer &text,unsigned short port,unsigned &rep,unsigned &range)
  5107. {
  5108. if (rep) {
  5109. text.append('*').append(rep+1);
  5110. rep = 0;
  5111. }
  5112. else if (range) {
  5113. text.append('-').append(range);
  5114. range = 0;
  5115. }
  5116. if (port)
  5117. text.append(':').append(port);
  5118. }
  5119. StringBuffer &SocketEndpointArray::getText(StringBuffer &text)
  5120. {
  5121. unsigned count = ordinality();
  5122. if (!count)
  5123. return text;
  5124. if (count==1)
  5125. return item(0).getUrlStr(text);
  5126. byte lastip[4];
  5127. const SocketEndpoint &first = item(0);
  5128. bool lastis4 = first.getNetAddress(sizeof(lastip),&lastip)==sizeof(lastip);
  5129. unsigned short lastport = first.port;
  5130. first.getIpText(text);
  5131. unsigned rep=0;
  5132. unsigned range=0;
  5133. for (unsigned i=1;i<count;i++) {
  5134. byte ip[4];
  5135. const SocketEndpoint &ep = item(i);
  5136. bool is4 = ep.getNetAddress(sizeof(ip),&ip)==sizeof(ip);
  5137. if (!lastis4||!is4) {
  5138. flushText(text,lastport,rep,range);
  5139. text.append(',');
  5140. ep.getIpText(text);
  5141. }
  5142. else { // try and shorten
  5143. unsigned j;
  5144. for (j=0;j<4;j++)
  5145. if (ip[j]!=lastip[j])
  5146. break;
  5147. if (ep.port==lastport) {
  5148. if (j==4) {
  5149. if (range) // cant have range and rep
  5150. j--; // pretend only 3 matched
  5151. else {
  5152. rep++;
  5153. continue;
  5154. }
  5155. }
  5156. else if ((j==3)&&(lastip[3]+1==ip[3])&&(rep==0)) {
  5157. range = ip[3];
  5158. lastip[3] = (byte)range;
  5159. continue;
  5160. }
  5161. }
  5162. flushText(text,lastport,rep,range);
  5163. // output diff
  5164. text.append(',');
  5165. if (j==4)
  5166. j--;
  5167. for (unsigned k=j;k<4;k++) {
  5168. if (k>j)
  5169. text.append('.');
  5170. text.append((int)ip[k]);
  5171. }
  5172. }
  5173. memcpy(&lastip,&ip,sizeof(lastip));
  5174. lastis4 = is4;
  5175. lastport = ep.port;
  5176. }
  5177. flushText(text,lastport,rep,range);
  5178. return text;
  5179. }
  5180. inline const char *getnum(const char *s,unsigned &n)
  5181. {
  5182. n = 0;
  5183. while (isdigit(*s)) {
  5184. n = n*10+(*s-'0');
  5185. s++;
  5186. }
  5187. return s;
  5188. }
  5189. inline bool appendv4range(SocketEndpointArray *array,char *str,SocketEndpoint &ep, unsigned defport)
  5190. {
  5191. char *s = str;
  5192. unsigned dc = 0;
  5193. unsigned port = defport;
  5194. unsigned rng = 0;
  5195. unsigned rep = 1;
  5196. bool notip = false;
  5197. while (*s) {
  5198. if (*s=='.') {
  5199. dc++;
  5200. s++;
  5201. }
  5202. else if (*s==':') {
  5203. *s = 0;
  5204. s = (char *)getnum(s+1,port);
  5205. }
  5206. else if (*s=='-') {
  5207. *s = 0;
  5208. s = (char *)getnum(s+1,rng);
  5209. }
  5210. else if (*s=='*') {
  5211. *s = 0;
  5212. s = (char *)getnum(s+1,rep);
  5213. }
  5214. else {
  5215. if (!isdigit(*s))
  5216. notip = true;
  5217. s++;
  5218. }
  5219. }
  5220. ep.port = port;
  5221. if (*str) {
  5222. if (!notip&&((dc<3)&&((dc!=1)||(strlen(str)!=1)))) {
  5223. if (!ep.isIp4()) {
  5224. return false;
  5225. }
  5226. StringBuffer tmp;
  5227. ep.getIpText(tmp);
  5228. size32_t l = tmp.length();
  5229. dc++;
  5230. loop {
  5231. if (tmp.length()==0)
  5232. return false;
  5233. if (tmp.charAt(tmp.length()-1)=='.')
  5234. if (--dc==0)
  5235. break;
  5236. tmp.setLength(tmp.length()-1);
  5237. }
  5238. tmp.append(str);
  5239. if (rng) {
  5240. tmp.appendf("-%d",rng);
  5241. rep = ep.ipsetrange(tmp.str());
  5242. }
  5243. else
  5244. ep.ipset(tmp.str());
  5245. }
  5246. else if (rng) { // not nice as have to add back range (must be better way - maybe ipincrementto) TBD
  5247. StringBuffer tmp;
  5248. tmp.appendf("%s-%d",str,rng);
  5249. rep = ep.ipsetrange(tmp.str());
  5250. }
  5251. else if (*str)
  5252. ep.ipset(str);
  5253. if (ep.isNull())
  5254. ep.port = 0;
  5255. for (unsigned i=0;i<rep;i++) {
  5256. array->append(ep);
  5257. if (rng)
  5258. ep.ipincrement(1);
  5259. }
  5260. }
  5261. else {// just a port change
  5262. if (ep.isNull()) // avoid null values with ports
  5263. ep.port = 0;
  5264. array->append(ep);
  5265. }
  5266. return true;
  5267. }
  5268. void SocketEndpointArray::fromText(const char *text,unsigned defport)
  5269. {
  5270. // this is quite complicated with (mixed) IPv4 and IPv6
  5271. // only support 'full' IPv6 and no ranges
  5272. char *str = strdup(text);
  5273. char *s = str;
  5274. SocketEndpoint ep;
  5275. bool eol = false;
  5276. loop {
  5277. while (isspace(*s)||(*s==','))
  5278. s++;
  5279. if (!*s)
  5280. break;
  5281. char *e=s;
  5282. if (*e=='[') { // we have a IPv6
  5283. while (*e&&(*e!=']'))
  5284. e++;
  5285. while ((*e!=',')&&!isspace(*e)) {
  5286. if (!*s) {
  5287. eol = true;
  5288. break;
  5289. }
  5290. e++;
  5291. }
  5292. *e = 0;
  5293. ep.set(s,defport);
  5294. if (ep.isNull()) {
  5295. // Error TBD
  5296. }
  5297. append(ep);
  5298. }
  5299. else {
  5300. bool hascolon = false;
  5301. bool isv6 = false;
  5302. do {
  5303. if (*e==':') {
  5304. if (hascolon)
  5305. isv6 = true;
  5306. else
  5307. hascolon = true;
  5308. }
  5309. e++;
  5310. if (!*e) {
  5311. eol = true;
  5312. break;
  5313. }
  5314. } while (!isspace(*e)&&(*e!=','));
  5315. *e = 0;
  5316. if (isv6) {
  5317. ep.set(s,defport);
  5318. if (ep.isNull()) {
  5319. // Error TBD
  5320. }
  5321. append(ep);
  5322. }
  5323. else {
  5324. if (!appendv4range(this,s,ep,defport)) {
  5325. // Error TBD
  5326. }
  5327. }
  5328. }
  5329. if (eol)
  5330. break;
  5331. s = e+1;
  5332. }
  5333. free(str);
  5334. }
  5335. bool IpSubNet::set(const char *_net,const char *_mask)
  5336. {
  5337. if (!_net||!decodeNumericIP(_net,net)) { // _net NULL means match everything
  5338. memset(net,0,sizeof(net));
  5339. memset(mask,0,sizeof(mask));
  5340. return (_net==NULL);
  5341. }
  5342. if (!_mask||!decodeNumericIP(_mask,mask)) { // _mask NULL means match exact
  5343. memset(mask,0xff,sizeof(mask));
  5344. return (_mask==NULL);
  5345. }
  5346. if (isIp4(net)!=isIp4(mask))
  5347. return false;
  5348. for (unsigned j=0;j<4;j++)
  5349. if (net[j]&~mask[j])
  5350. return false;
  5351. return true;
  5352. }
  5353. bool IpSubNet::test(const IpAddress &ip) const
  5354. {
  5355. unsigned i;
  5356. if (ip.getNetAddress(sizeof(i),&i)==sizeof(i)) {
  5357. if (!isIp4(net))
  5358. return false;
  5359. return (i&mask[3])==(net[3]&mask[3]);
  5360. }
  5361. unsigned na[4];
  5362. if (ip.getNetAddress(sizeof(na),&na)==sizeof(na)) {
  5363. for (unsigned j=0;j<4;j++)
  5364. if ((na[j]&mask[j])!=(net[j]&mask[j]))
  5365. return false;
  5366. return true;
  5367. }
  5368. return false;
  5369. }
  5370. StringBuffer IpSubNet::getNetText(StringBuffer &text) const
  5371. {
  5372. char tmp[INET6_ADDRSTRLEN];
  5373. const char *res = ::isIp4(net) ? _inet_ntop(AF_INET, &net[3], tmp, sizeof(tmp))
  5374. : _inet_ntop(AF_INET6, &net, tmp, sizeof(tmp));
  5375. return text.append(res);
  5376. }
  5377. StringBuffer IpSubNet::getMaskText(StringBuffer &text) const
  5378. {
  5379. char tmp[INET6_ADDRSTRLEN];
  5380. // isIp4(net) is correct here
  5381. const char *res = ::isIp4(net) ? _inet_ntop(AF_INET, &mask[3], tmp, sizeof(tmp))
  5382. : _inet_ntop(AF_INET6, &mask, tmp, sizeof(tmp));
  5383. return text.append(res);
  5384. }
  5385. bool IpSubNet::isNull() const
  5386. {
  5387. for (unsigned i=0;i<4;i++)
  5388. if (net[i]||mask[i])
  5389. return false;
  5390. return true;
  5391. }
  5392. IpSubNet &queryPreferredSubnet()
  5393. {
  5394. return PreferredSubnet;
  5395. }
  5396. bool setPreferredSubnet(const char *ip,const char *mask)
  5397. {
  5398. // also resets cached host IP
  5399. if (PreferredSubnet.set(ip,mask))
  5400. {
  5401. if (!cachehostip.isNull())
  5402. {
  5403. cachehostip.ipset(NULL);
  5404. queryHostIP();
  5405. }
  5406. return true;
  5407. }
  5408. else
  5409. return false;
  5410. }
  5411. StringBuffer lookupHostName(const IpAddress &ip,StringBuffer &ret)
  5412. {
  5413. // not a common routine (no Jlib function!) only support IPv4 initially
  5414. unsigned ipa;
  5415. if (ip.getNetAddress(sizeof(ipa),&ipa)==sizeof(ipa)) {
  5416. struct hostent *phostent = gethostbyaddr( (char *) &ipa, sizeof(ipa), PF_INET);
  5417. if (phostent)
  5418. ret.append(phostent->h_name);
  5419. else
  5420. ip.getIpText(ret);
  5421. }
  5422. else
  5423. ip.getIpText(ret);
  5424. return ret;
  5425. }
  5426. struct SocketEndpointHTElem
  5427. {
  5428. IInterface *ii;
  5429. SocketEndpoint ep;
  5430. SocketEndpointHTElem(const SocketEndpoint _ep,IInterface *_ii) { ep.set(_ep); ii = _ii; }
  5431. ~SocketEndpointHTElem() { ::Release(ii); }
  5432. };
  5433. class jlib_decl CSocketEndpointHashTable : public SuperHashTableOf<SocketEndpointHTElem, SocketEndpoint>, implements ISocketEndpointHashTable
  5434. {
  5435. virtual void onAdd(void *) {}
  5436. virtual void onRemove(void *e) { delete (SocketEndpointHTElem *)e; }
  5437. unsigned getHashFromElement(const void *e) const
  5438. {
  5439. return ((const SocketEndpointHTElem *)e)->ep.hash(0);
  5440. }
  5441. unsigned getHashFromFindParam(const void *fp) const
  5442. {
  5443. return ((const SocketEndpoint *)fp)->hash(0);
  5444. }
  5445. const void * getFindParam(const void *p) const
  5446. {
  5447. return &((const SocketEndpointHTElem *)p)->ep;
  5448. }
  5449. bool matchesFindParam(const void * et, const void *fp, unsigned) const
  5450. {
  5451. return ((const SocketEndpointHTElem *)et)->ep.equals(*(SocketEndpoint *)fp);
  5452. }
  5453. IMPLEMENT_SUPERHASHTABLEOF_REF_FIND(SocketEndpointHTElem,SocketEndpoint);
  5454. public:
  5455. IMPLEMENT_IINTERFACE;
  5456. CSocketEndpointHashTable() {}
  5457. ~CSocketEndpointHashTable() { kill(); }
  5458. void add(const SocketEndpoint &ep, IInterface *i)
  5459. {
  5460. SocketEndpointHTElem *e = SuperHashTableOf<SocketEndpointHTElem,SocketEndpoint>::find(&ep);
  5461. if (e) {
  5462. ::Release(e->ii);
  5463. e->ii = i;
  5464. }
  5465. else {
  5466. e = new SocketEndpointHTElem(ep,i);
  5467. SuperHashTableOf<SocketEndpointHTElem,SocketEndpoint>::add(*e);
  5468. }
  5469. }
  5470. void remove(const SocketEndpoint &ep)
  5471. {
  5472. SuperHashTableOf<SocketEndpointHTElem,SocketEndpoint>::remove(&ep);
  5473. }
  5474. IInterface *find(const SocketEndpoint &ep)
  5475. {
  5476. SocketEndpointHTElem *e = SuperHashTableOf<SocketEndpointHTElem,SocketEndpoint>::find(&ep);
  5477. if (e)
  5478. return e->ii;
  5479. return NULL;
  5480. }
  5481. };
  5482. ISocketEndpointHashTable *createSocketEndpointHashTable()
  5483. {
  5484. CSocketEndpointHashTable *ht = new CSocketEndpointHashTable;
  5485. return ht;
  5486. }
  5487. class CSocketConnectWait: public CInterface, implements ISocketConnectWait
  5488. {
  5489. Owned<CSocket> sock;
  5490. bool done;
  5491. CTimeMon connecttm;
  5492. unsigned startt;
  5493. bool oneshot;
  5494. bool isopen;
  5495. int initerr;
  5496. void successfulConnect()
  5497. {
  5498. STATS.connects++;
  5499. STATS.connecttime+=usTick()-startt;
  5500. #ifdef _TRACE
  5501. char peer[256];
  5502. peer[0] = 'C';
  5503. peer[1] = '!';
  5504. strcpy(peer+2,sock->hostname?sock->hostname:"(NULL)");
  5505. free(sock->tracename);
  5506. sock->tracename = strdup(peer);
  5507. #endif
  5508. }
  5509. void failedConnect()
  5510. {
  5511. STATS.failedconnects++;
  5512. STATS.failedconnecttime+=usTick()-startt;
  5513. const char* tracename = sock->tracename;
  5514. THROWJSOCKEXCEPTION(JSOCKERR_connection_failed);
  5515. }
  5516. public:
  5517. IMPLEMENT_IINTERFACE;
  5518. CSocketConnectWait(SocketEndpoint &ep,unsigned connecttimeoutms)
  5519. : connecttm(connecttimeoutms)
  5520. {
  5521. oneshot = (connecttimeoutms==0); // i.e. as long as one connect takes
  5522. done = false;
  5523. startt = usTick();
  5524. sock.setown(new CSocket(ep,sm_tcp,NULL));
  5525. isopen = true;
  5526. initerr = sock->pre_connect(false);
  5527. }
  5528. ISocket *wait(unsigned timems)
  5529. {
  5530. // this is a bit spagetti due to dual timeouts etc
  5531. CTimeMon waittm(timems);
  5532. unsigned refuseddelay = 1;
  5533. bool waittimedout = false;
  5534. bool connectimedout = false;
  5535. do {
  5536. bool connectdone = false;
  5537. unsigned remaining;
  5538. connectimedout = connecttm.timedout(&remaining);
  5539. unsigned waitremaining;
  5540. waittimedout = waittm.timedout(&waitremaining);
  5541. if (oneshot||(waitremaining<remaining))
  5542. remaining = waitremaining;
  5543. int err = 0;
  5544. if (!isopen||initerr) {
  5545. isopen = true;
  5546. err = initerr?initerr:sock->pre_connect(false);
  5547. initerr = 0;
  5548. if ((err == EINPROGRESS)||(err == EWOULDBLOCK))
  5549. err = 0; // continue
  5550. else {
  5551. if (err==0)
  5552. connectdone = true; // done immediately
  5553. else if(!oneshot) // probably ECONNREFUSED but treat all errors same
  5554. refused_sleep((waitremaining==remaining)?waittm:connecttm,refuseddelay); // this stops becoming cpu bound
  5555. }
  5556. }
  5557. if (!connectdone&&(err==0)) {
  5558. SOCKET s = sock->sock;
  5559. T_FD_SET fds;
  5560. struct timeval tv;
  5561. XFD_ZERO(&fds);
  5562. FD_SET((unsigned)s, &fds);
  5563. T_FD_SET except;
  5564. XFD_ZERO(&except);
  5565. FD_SET((unsigned)s, &except);
  5566. tv.tv_sec = remaining / 1000;
  5567. tv.tv_usec = (remaining % 1000)*1000;
  5568. CHECKSOCKRANGE(s);
  5569. int rc = ::select( s + 1, NULL, (fd_set *)&fds, (fd_set *)&except, &tv );
  5570. if (rc==0)
  5571. break; // timeout
  5572. done = true;
  5573. err = 0;
  5574. if (rc>0) {
  5575. // select succeeded - return error from socket (0 if connected)
  5576. socklen_t errlen = sizeof(err);
  5577. rc = getsockopt(s, SOL_SOCKET, SO_ERROR, (char *)&err, &errlen); // check for error
  5578. if ((rc!=0)&&!err)
  5579. err = ERRNO(); // some implementations of getsockopt duff
  5580. if (err&&!oneshot) // probably ECONNREFUSED but treat all errors same
  5581. refused_sleep((waitremaining==remaining)?waittm:connecttm,refuseddelay); // this stops becoming cpu bound
  5582. }
  5583. else { // select failed
  5584. err = ERRNO();
  5585. LOGERR(err,2,"CSocketConnectWait ::select");
  5586. }
  5587. }
  5588. if (err==0) {
  5589. err = sock->post_connect();
  5590. if (err==0) {
  5591. successfulConnect();
  5592. return sock.getClear();
  5593. }
  5594. }
  5595. sock->errclose();
  5596. isopen = false;
  5597. } while (!waittimedout&&!oneshot);
  5598. if (connectimedout) {
  5599. STATS.failedconnects++;
  5600. STATS.failedconnecttime+=usTick()-startt;
  5601. const char* tracename = sock->tracename;
  5602. THROWJSOCKEXCEPTION(JSOCKERR_connection_failed);
  5603. }
  5604. return NULL;
  5605. }
  5606. };
  5607. ISocketConnectWait *nonBlockingConnect(SocketEndpoint &ep,unsigned connecttimeoutms)
  5608. {
  5609. return new CSocketConnectWait(ep,connecttimeoutms);
  5610. }