eclrtl.cpp 157 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "limits.h"
  14. #ifdef _USE_BOOST_REGEX
  15. #include "boost/regex.hpp" // must precede platform.h ; n.b. this uses a #pragma comment(lib, ...) to link the appropriate .lib in MSVC
  16. #endif
  17. #include "platform.h"
  18. #include <math.h>
  19. #include <stdio.h>
  20. #include "jexcept.hpp"
  21. #include "jmisc.hpp"
  22. #include "jutil.hpp"
  23. #include "jlib.hpp"
  24. #include "jptree.hpp"
  25. #include "junicode.hpp"
  26. #include "eclrtl.hpp"
  27. #include "rtlbcd.hpp"
  28. #include "eclrtl_imp.hpp"
  29. #include "unicode/uchar.h"
  30. #include "unicode/ucol.h"
  31. #include "unicode/ustring.h"
  32. #include "unicode/ucnv.h"
  33. #include "unicode/schriter.h"
  34. #include "unicode/regex.h"
  35. #include "unicode/normlzr.h"
  36. #include "unicode/locid.h"
  37. #include "jlog.hpp"
  38. #include "jmd5.hpp"
  39. #include "rtlqstr.ipp"
  40. #include "roxiemem.hpp"
  41. #define UTF8_CODEPAGE "UTF-8"
  42. #define UTF8_MAXSIZE 4
  43. IRandomNumberGenerator * random_;
  44. static CriticalSection random_Sect;
  45. MODULE_INIT(INIT_PRIORITY_ECLRTL_ECLRTL)
  46. {
  47. random_ = createRandomNumberGenerator();
  48. random_->seed((unsigned)get_cycles_now());
  49. return true;
  50. }
  51. MODULE_EXIT()
  52. {
  53. random_->Release();
  54. }
  55. //=============================================================================
  56. // Miscellaneous string functions...
  57. ECLRTL_API void * rtlMalloc(size32_t size)
  58. {
  59. return malloc(size);
  60. }
  61. void rtlFree(void *ptr)
  62. {
  63. free(ptr);
  64. }
  65. ECLRTL_API void * rtlRealloc(void * _ptr, size32_t size)
  66. {
  67. return realloc(_ptr, size);
  68. }
  69. //=============================================================================
  70. ECLRTL_API void rtlReleaseRow(const void * row)
  71. {
  72. ReleaseRoxieRow(row);
  73. }
  74. ECLRTL_API void rtlReleaseRowset(unsigned count, byte * * rowset)
  75. {
  76. ReleaseRoxieRowset(count, rowset);
  77. }
  78. ECLRTL_API void * rtlLinkRow(const void * row)
  79. {
  80. LinkRoxieRow(row);
  81. return const_cast<void *>(row);
  82. }
  83. ECLRTL_API byte * * rtlLinkRowset(byte * * rowset)
  84. {
  85. LinkRoxieRowset(rowset);
  86. return rowset;
  87. }
  88. //=============================================================================
  89. // Unicode helper classes and functions
  90. // escape
  91. void escapeUnicode(unsigned inlen, UChar const * in, StringBuffer & out)
  92. {
  93. UCharCharacterIterator iter(in, inlen);
  94. for(iter.first32(); iter.hasNext(); iter.next32())
  95. {
  96. UChar32 c = iter.current32();
  97. if(c < 0x80)
  98. out.append((char) c);
  99. else if (c < 0x10000)
  100. out.appendf("\\u%04X", c);
  101. else
  102. out.appendf("\\U%08X", c);
  103. }
  104. }
  105. // locales and collators
  106. static unsigned const unicodeStrengthLimit = 5;
  107. static UCollationStrength unicodeStrength[unicodeStrengthLimit] =
  108. {
  109. UCOL_PRIMARY,
  110. UCOL_SECONDARY,
  111. UCOL_TERTIARY,
  112. UCOL_QUATERNARY,
  113. UCOL_IDENTICAL
  114. };
  115. class RTLLocale : public CInterface
  116. {
  117. public:
  118. RTLLocale(char const * _locale) : locale(_locale)
  119. {
  120. for(unsigned i=0; i<unicodeStrengthLimit; i++)
  121. colls[i] = NULL;
  122. UErrorCode err = U_ZERO_ERROR;
  123. colls[2] = ucol_open(locale.get(), &err);
  124. assertex(U_SUCCESS(err));
  125. }
  126. ~RTLLocale()
  127. {
  128. for(unsigned i=0; i<unicodeStrengthLimit; i++)
  129. if(colls[i]) ucol_close(colls[i]);
  130. }
  131. UCollator * queryCollator() const { return colls[2]; }
  132. UCollator * queryCollator(unsigned strength) const
  133. {
  134. if(strength == 0) strength = 1;
  135. if(strength > unicodeStrengthLimit) strength = unicodeStrengthLimit;
  136. if(!colls[strength-1])
  137. {
  138. UErrorCode err = U_ZERO_ERROR;
  139. const_cast<UCollator * *>(colls)[strength-1] = ucol_open(locale.get(), &err);
  140. assertex(U_SUCCESS(err));
  141. ucol_setStrength(colls[strength-1], unicodeStrength[strength-1]);
  142. }
  143. return colls[strength-1];
  144. }
  145. private:
  146. StringAttr locale;
  147. UCollator * colls[unicodeStrengthLimit];
  148. };
  149. typedef MapStringTo<RTLLocale, char const *> MapStrToLocale;
  150. MapStrToLocale *localeMap;
  151. CriticalSection localeCrit;
  152. MODULE_INIT(INIT_PRIORITY_STANDARD)
  153. {
  154. localeMap = new MapStrToLocale;
  155. return true;
  156. }
  157. MODULE_EXIT()
  158. {
  159. delete localeMap;
  160. }
  161. bool rtlGetNormalizedUnicodeLocaleName(unsigned len, char const * in, char * out)
  162. {
  163. bool isPrimary = true;
  164. bool ok = true;
  165. unsigned i;
  166. for(i=0; i<len; i++)
  167. if(in[i] == '_')
  168. {
  169. out[i] = '_';
  170. isPrimary = false;
  171. }
  172. else if(isalpha(in[i]))
  173. {
  174. out[i] = (isPrimary ? tolower(in[i]) : toupper(in[i]));
  175. }
  176. else
  177. {
  178. out[i] = 0;
  179. ok = false;
  180. }
  181. return ok;
  182. }
  183. RTLLocale * queryRTLLocale(char const * locale)
  184. {
  185. if (!locale) locale = "";
  186. CriticalBlock b(localeCrit);
  187. RTLLocale * loc = localeMap->getValue(locale);
  188. if(!loc)
  189. {
  190. unsigned ll = strlen(locale);
  191. StringBuffer lnorm;
  192. rtlGetNormalizedUnicodeLocaleName(ll, locale, lnorm.reserve(ll));
  193. localeMap->setValue(locale, lnorm.str());
  194. loc = localeMap->getValue(locale);
  195. }
  196. return loc;
  197. }
  198. // converters
  199. class RTLUnicodeConverter : public CInterface
  200. {
  201. public:
  202. RTLUnicodeConverter(char const * codepage)
  203. {
  204. UErrorCode err = U_ZERO_ERROR;
  205. conv = ucnv_open(codepage, &err);
  206. if (!U_SUCCESS(err))
  207. {
  208. StringBuffer msg;
  209. msg.append("Unrecognised codepage '").append(codepage).append("'");
  210. rtlFail(0, msg.str());
  211. }
  212. }
  213. ~RTLUnicodeConverter()
  214. {
  215. ucnv_close(conv);
  216. }
  217. UConverter * query() const { return conv; }
  218. private:
  219. UConverter * conv;
  220. };
  221. typedef MapStringTo<RTLUnicodeConverter, char const *> MapStrToUnicodeConverter;
  222. MapStrToUnicodeConverter *unicodeConverterMap;
  223. CriticalSection ucmCrit;
  224. MODULE_INIT(INIT_PRIORITY_STANDARD)
  225. {
  226. unicodeConverterMap = new MapStrToUnicodeConverter;
  227. return true;
  228. }
  229. MODULE_EXIT()
  230. {
  231. delete unicodeConverterMap;
  232. }
  233. RTLUnicodeConverter * queryRTLUnicodeConverter(char const * codepage)
  234. {
  235. CriticalBlock b(ucmCrit);
  236. RTLUnicodeConverter * conv = unicodeConverterMap->getValue(codepage);
  237. if(!conv)
  238. {
  239. unicodeConverterMap->setValue(codepage, codepage);
  240. conv = unicodeConverterMap->getValue(codepage);
  241. }
  242. return conv;
  243. }
  244. // normalization
  245. bool unicodeNeedsNormalize(unsigned inlen, UChar * in, UErrorCode * err)
  246. {
  247. return !unorm_isNormalized(in, inlen, UNORM_NFC, err);
  248. }
  249. bool vunicodeNeedsNormalize(UChar * in, UErrorCode * err)
  250. {
  251. return !unorm_isNormalized(in, -1, UNORM_NFC, err);
  252. }
  253. void unicodeReplaceNormalized(unsigned inlen, UChar * in, UErrorCode * err)
  254. {
  255. UChar * buff = (UChar *)malloc(inlen*2);
  256. unsigned len = unorm_normalize(in, inlen, UNORM_NFC, 0, buff, inlen, err);
  257. while(len<inlen) buff[len++] = 0x0020;
  258. memcpy(in, buff, inlen);
  259. free(buff);
  260. }
  261. void vunicodeReplaceNormalized(unsigned inlen, UChar * in, UErrorCode * err)
  262. {
  263. UChar * buff = (UChar *)malloc(inlen*2);
  264. unsigned len = unorm_normalize(in, -1, UNORM_NFC, 0, buff, inlen-1, err);
  265. buff[len] = 0x0000;
  266. memcpy(in, buff, inlen);
  267. free(buff);
  268. }
  269. void unicodeGetNormalized(unsigned & outlen, UChar * & out, unsigned inlen, UChar * in, UErrorCode * err)
  270. {
  271. outlen = unorm_normalize(in, inlen, UNORM_NFC, 0, 0, 0, err);
  272. out = (UChar *)malloc(outlen*2);
  273. unorm_normalize(in, inlen, UNORM_NFC, 0, out, outlen, err);
  274. }
  275. void vunicodeGetNormalized(UChar * & out, unsigned inlen, UChar * in, UErrorCode * err)
  276. {
  277. unsigned outlen = unorm_normalize(in, inlen, UNORM_NFC, 0, 0, 0, err);
  278. out = (UChar *)malloc((outlen+1)*2);
  279. unorm_normalize(in, inlen, UNORM_NFC, 0, out, outlen, err);
  280. out[outlen] = 0x0000;
  281. }
  282. void unicodeEnsureIsNormalized(unsigned len, UChar * str)
  283. {
  284. UErrorCode err = U_ZERO_ERROR;
  285. if(unicodeNeedsNormalize(len, str, &err))
  286. unicodeReplaceNormalized(len, str, &err);
  287. }
  288. void vunicodeEnsureIsNormalized(unsigned len, UChar * str)
  289. {
  290. UErrorCode err = U_ZERO_ERROR;
  291. if(vunicodeNeedsNormalize(str, &err))
  292. vunicodeReplaceNormalized(len, str, &err);
  293. }
  294. void unicodeEnsureIsNormalizedX(unsigned & len, UChar * & str)
  295. {
  296. UErrorCode err = U_ZERO_ERROR;
  297. if(unicodeNeedsNormalize(len, str, &err))
  298. {
  299. unsigned inlen = len;
  300. UChar * in = str;
  301. unicodeGetNormalized(len, str, inlen, in, &err);
  302. free(in);
  303. }
  304. }
  305. void vunicodeEnsureIsNormalizedX(unsigned inlen, UChar * & str)
  306. {
  307. UErrorCode err = U_ZERO_ERROR;
  308. if(unicodeNeedsNormalize(inlen, str, &err))
  309. {
  310. UChar * in = str;
  311. vunicodeGetNormalized(str, inlen, in, &err);
  312. free(in);
  313. }
  314. }
  315. void unicodeNormalizedCopy(UChar * out, UChar * in, unsigned len)
  316. {
  317. UErrorCode err = U_ZERO_ERROR;
  318. if(unicodeNeedsNormalize(len, in, &err))
  319. unorm_normalize(in, len, UNORM_NFC, 0, out, len, &err);
  320. else
  321. memcpy(out, in, len);
  322. }
  323. void normalizeUnicodeString(UnicodeString const & in, UnicodeString & out)
  324. {
  325. UErrorCode err = U_ZERO_ERROR;
  326. Normalizer::compose(in, false, 0, out, err);
  327. assertex(U_SUCCESS(err));
  328. }
  329. // padding
  330. void multimemset(char * out, unsigned outlen, char const * in, unsigned inlen)
  331. {
  332. unsigned outpos = 0;
  333. unsigned inpos = 0;
  334. while(outpos < outlen)
  335. {
  336. out[outpos++] = in[inpos++];
  337. if(inpos == inlen)
  338. inpos = 0;
  339. }
  340. }
  341. typedef MapStringTo<MemoryAttr, size32_t> MemoryAttrMapping;
  342. MemoryAttrMapping *unicodeBlankCache;
  343. CriticalSection ubcCrit;
  344. MODULE_INIT(INIT_PRIORITY_STANDARD)
  345. {
  346. unicodeBlankCache = new MemoryAttrMapping;
  347. return true;
  348. }
  349. MODULE_EXIT()
  350. {
  351. delete unicodeBlankCache;
  352. }
  353. UChar unicodeSpace = 0x0020;
  354. void codepageBlankFill(char const * codepage, char * out, unsigned len)
  355. {
  356. CriticalBlock b(ubcCrit);
  357. MemoryAttr * cached = unicodeBlankCache->getValue(codepage);
  358. if(cached)
  359. {
  360. char const * blank = (char const *)cached->get();
  361. size32_t blanklen = cached->length();
  362. if(blanklen==1)
  363. memset(out, *blank, len);
  364. else
  365. multimemset(out, len, blank, blanklen);
  366. }
  367. else
  368. {
  369. unsigned blanklen;
  370. char * blank;
  371. rtlUnicodeToCodepageX(blanklen, blank, 1, &unicodeSpace, codepage);
  372. unicodeBlankCache->setValue(codepage, blanklen);
  373. unicodeBlankCache->getValue(codepage)->set(blanklen, blank);
  374. if(blanklen==1)
  375. memset(out, *blank, len);
  376. else
  377. multimemset(out, len, blank, blanklen);
  378. free(blank);
  379. }
  380. }
  381. //---------------------------------------------------------------------------
  382. // floating point functions
  383. static const double smallPowers[16] = {
  384. 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
  385. 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15 };
  386. static double powerOfTen(int x)
  387. {
  388. if (x < 0)
  389. return 1 / powerOfTen(-x);
  390. double value = smallPowers[x&15];
  391. double scale = 1e16;
  392. x >>= 4;
  393. while (x)
  394. {
  395. if (x & 1)
  396. value *= scale;
  397. scale *= scale;
  398. x >>= 1;
  399. }
  400. return value;
  401. };
  402. static double kk = (1.0 / ((unsigned __int64)1<<53));
  403. __int64 rtlRound(double x)
  404. {
  405. //a fudge to make numbers that are inexact after a division round up "correctly".
  406. //coded rather oddly as microsoft's optimizer has a habit of throwing it away otherwise...
  407. volatile double tt = x * kk;
  408. x += tt;
  409. if (x >= 0.0)
  410. return (__int64)(x + 0.5);
  411. return -(__int64)(-x + 0.5);
  412. }
  413. double rtlRoundTo(const double x, int places)
  414. {
  415. if (x < 0)
  416. return -rtlRoundTo(-x, places);
  417. volatile double tt = x * kk;
  418. double x0 = x + tt;
  419. if (places >= 0)
  420. {
  421. double scale = powerOfTen(places);
  422. return floor(x * scale + 0.5) / scale;
  423. }
  424. else
  425. {
  426. double scale = powerOfTen(-places);
  427. return floor(x / scale + 0.5) * scale;
  428. }
  429. }
  430. __int64 rtlRoundDown(double x)
  431. {
  432. if (x >= 0.0)
  433. return (__int64)floor(x);
  434. return (__int64)ceil(x);
  435. }
  436. __int64 rtlRoundUp(double x)
  437. {
  438. if (x >= 0.0)
  439. return (__int64)ceil(x);
  440. return (__int64)floor(x);
  441. }
  442. //=============================================================================
  443. // Numeric conversion functions... - fixed length target
  444. #define intToStringNBody() \
  445. unsigned len = numtostr(temp, val); \
  446. if (len > l) \
  447. memset(t,'*',l); \
  448. else \
  449. { \
  450. memcpy(t,temp,len); \
  451. memset(t+len, ' ', l-len); \
  452. }
  453. void rtlUInt4ToStr(size32_t l, char * t, unsigned val)
  454. {
  455. char temp[20];
  456. intToStringNBody();
  457. }
  458. void rtlUInt8ToStr(size32_t l, char * t, unsigned __int64 val)
  459. {
  460. char temp[40];
  461. intToStringNBody();
  462. }
  463. void rtlInt4ToStr(size32_t l, char * t, int val)
  464. {
  465. char temp[20];
  466. intToStringNBody();
  467. }
  468. void rtlInt8ToStr(size32_t l, char * t, __int64 val)
  469. {
  470. char temp[40];
  471. intToStringNBody();
  472. }
  473. //=============================================================================
  474. // Numeric conversion functions... - unknown length target
  475. #define intToUnknownStringBody() \
  476. unsigned len = numtostr(temp, val); \
  477. char * result = (char *)malloc(len); \
  478. memcpy(result, temp, len); \
  479. l = len; \
  480. t = result;
  481. void rtlUInt4ToStrX(size32_t & l, char * & t, unsigned val)
  482. {
  483. char temp[20];
  484. intToUnknownStringBody();
  485. }
  486. void rtlUInt8ToStrX(size32_t & l, char * & t, unsigned __int64 val)
  487. {
  488. char temp[40];
  489. intToUnknownStringBody();
  490. }
  491. void rtlInt4ToStrX(size32_t & l, char * & t, int val)
  492. {
  493. char temp[20];
  494. intToUnknownStringBody();
  495. }
  496. void rtlInt8ToStrX(size32_t & l, char * & t, __int64 val)
  497. {
  498. char temp[40];
  499. intToUnknownStringBody();
  500. }
  501. //=============================================================================
  502. // Numeric conversion functions... - fixed length ebcdic target
  503. // ILKA - converting ebcdic to numeric still uses string in between, for more efficiency
  504. // a function numtoebcdicstr should be implemented
  505. #define intToEbcdicStringNBody() \
  506. unsigned len = numtostr(astr, val); \
  507. rtlStrToEStr(sizeof(estr),estr,len,astr); \
  508. if (len > l) \
  509. memset(t,0x2A,l); \
  510. else \
  511. { \
  512. memcpy(t,estr,len); \
  513. memset(t+len, '@', l-len); \
  514. }
  515. void rtl_l42en(size32_t l, char * t, unsigned val)
  516. {
  517. char astr[20];
  518. char estr[20];
  519. intToEbcdicStringNBody();
  520. }
  521. void rtl_l82en(size32_t l, char * t, unsigned __int64 val)
  522. {
  523. char astr[40];
  524. char estr[40];
  525. intToEbcdicStringNBody();
  526. }
  527. void rtl_ls42en(size32_t l, char * t, int val)
  528. {
  529. char astr[20];
  530. char estr[20];
  531. intToEbcdicStringNBody();
  532. }
  533. void rtl_ls82en(size32_t l, char * t, __int64 val)
  534. {
  535. char astr[40];
  536. char estr[40];
  537. intToEbcdicStringNBody();
  538. }
  539. //=============================================================================
  540. // Numeric conversion functions... - unknown length ebcdic target
  541. #define intToUnknownEbcdicStringBody() \
  542. unsigned alen = numtostr(astr, val); \
  543. rtlStrToEStrX(elen,estr,alen,astr); \
  544. char * result = (char *)malloc(elen); \
  545. memcpy(result, estr, elen); \
  546. l = elen; \
  547. t = result;
  548. #if defined _MSC_VER
  549. #pragma warning(push)
  550. #pragma warning(disable:4700)
  551. #endif
  552. void rtl_l42ex(size32_t & l, char * & t, unsigned val)
  553. {
  554. char astr[20];
  555. char * estr;
  556. unsigned elen;
  557. intToUnknownEbcdicStringBody();
  558. }
  559. void rtl_l82ex(size32_t & l, char * & t, unsigned __int64 val)
  560. {
  561. char astr[40];
  562. char * estr;
  563. unsigned elen;
  564. intToUnknownEbcdicStringBody();
  565. }
  566. void rtl_ls42ex(size32_t & l, char * & t, int val)
  567. {
  568. char astr[20];
  569. char * estr;
  570. unsigned elen;
  571. intToUnknownEbcdicStringBody();
  572. }
  573. void rtl_ls82ex(size32_t & l, char * & t, __int64 val)
  574. {
  575. char astr[40];
  576. char * estr;
  577. unsigned elen;
  578. intToUnknownEbcdicStringBody();
  579. }
  580. #ifdef _MSC_VER
  581. #pragma warning(pop)
  582. #endif
  583. //=============================================================================
  584. // Numeric conversion functions... - fixed length variable target
  585. #define intToVarStringNBody() \
  586. unsigned len = numtostr(temp, val) + 1; \
  587. if (len > l) \
  588. { \
  589. memset(t,'*',l); \
  590. t[l-1]=0; \
  591. } \
  592. else \
  593. memcpy(t,temp,len);
  594. void rtlUInt4ToVStr(size32_t l, char * t, unsigned val)
  595. {
  596. char temp[20];
  597. intToVarStringNBody();
  598. }
  599. void rtlUInt8ToVStr(size32_t l, char * t, unsigned __int64 val)
  600. {
  601. char temp[40];
  602. intToVarStringNBody();
  603. }
  604. void rtlInt4ToVStr(size32_t l, char * t, int val)
  605. {
  606. char temp[20];
  607. intToVarStringNBody();
  608. }
  609. void rtlInt8ToVStr(size32_t l, char * t, __int64 val)
  610. {
  611. char temp[40];
  612. intToVarStringNBody();
  613. }
  614. //=============================================================================
  615. // Numeric conversion functions... - unknown length variable target
  616. #define intToVarStringXBody() \
  617. unsigned len = numtostr(temp, val); \
  618. temp[len] = 0; \
  619. return strdup(temp);
  620. char * rtlUInt4ToVStrX(unsigned val)
  621. {
  622. char temp[20];
  623. intToVarStringXBody();
  624. }
  625. char * rtlUInt8ToVStrX(unsigned __int64 val)
  626. {
  627. char temp[40];
  628. intToVarStringXBody();
  629. }
  630. char * rtlInt4ToVStrX(int val)
  631. {
  632. char temp[20];
  633. intToVarStringXBody();
  634. }
  635. char * rtlInt8ToVStrX(__int64 val)
  636. {
  637. char temp[40];
  638. intToVarStringXBody();
  639. }
  640. //---------------------------------------------------------------------------
  641. double rtlStrToReal(size32_t l, const char * t)
  642. {
  643. char * temp = (char *)alloca(l+1);
  644. memcpy(temp, t, l);
  645. temp[l] = 0;
  646. return rtlVStrToReal(temp);
  647. }
  648. double rtlEStrToReal(size32_t l, const char * t)
  649. {
  650. char * astr = (char*)alloca(l);
  651. rtlEStrToStr(l,astr,l,t);
  652. char * temp = (char *)alloca(l+1);
  653. memcpy(temp, astr, l);
  654. temp[l] = 0;
  655. return rtlVStrToReal(temp);
  656. }
  657. double rtlVStrToReal(const char * t)
  658. {
  659. char * end;
  660. return strtod(t, &end);
  661. }
  662. double rtl_ex2f(const char * t)
  663. {
  664. unsigned len = strlen(t);
  665. char * astr = (char*)alloca(len+1);
  666. rtlEStrToStr(len,astr,len,t);
  667. astr[len] = 0;
  668. return rtlVStrToReal(astr);
  669. }
  670. double rtlUnicodeToReal(size32_t l, UChar const * t)
  671. {
  672. unsigned bufflen;
  673. char * buff;
  674. rtlUnicodeToCodepageX(bufflen, buff, l, t, "ascii");
  675. double ret = rtlStrToReal(bufflen, buff);
  676. rtlFree(buff);
  677. return ret;
  678. }
  679. //---------------------------------------------------------------------------
  680. void rtlRealToStr(size32_t l, char * t, double val)
  681. {
  682. StringBuffer temp;
  683. temp.append(val);
  684. unsigned len = temp.length();
  685. if (len > l)
  686. memset(t,'*',l);
  687. else
  688. {
  689. memcpy(t,temp.str(),len);
  690. memset(t+len, ' ', l-len);
  691. }
  692. }
  693. void rtlRealToStr(size32_t l, char * t, float val)
  694. {
  695. StringBuffer temp;
  696. temp.append(val);
  697. unsigned len = temp.length();
  698. if (len > l)
  699. memset(t,'*',l);
  700. else
  701. {
  702. memcpy(t,temp.str(),len);
  703. memset(t+len, ' ', l-len);
  704. }
  705. }
  706. void rtlRealToStrX(size32_t & l, char * & t, double val)
  707. {
  708. StringBuffer temp;
  709. temp.append(val);
  710. unsigned len = temp.length();
  711. char * result = (char *)malloc(len);
  712. memcpy(result,temp.str(),len);
  713. l = len;
  714. t = result;
  715. }
  716. void rtlRealToStrX(size32_t & l, char * & t, float val)
  717. {
  718. StringBuffer temp;
  719. temp.append(val);
  720. unsigned len = temp.length();
  721. char * result = (char *)malloc(len);
  722. memcpy(result,temp.str(),len);
  723. l = len;
  724. t = result;
  725. }
  726. void rtlRealToVStr(size32_t l, char * t, double val)
  727. {
  728. StringBuffer temp;
  729. temp.append(val);
  730. unsigned len = temp.length()+1;
  731. if (len > l)
  732. {
  733. memset(t,'*',l);
  734. t[l-1]=0;
  735. }
  736. else
  737. {
  738. memcpy(t,temp.str(),len);
  739. }
  740. }
  741. void rtlRealToVStr(size32_t l, char * t, float val)
  742. {
  743. StringBuffer temp;
  744. temp.append(val);
  745. unsigned len = temp.length()+1;
  746. if (len > l)
  747. {
  748. memset(t,'*',l);
  749. t[l-1]=0;
  750. }
  751. else
  752. {
  753. memcpy(t,temp.str(),len);
  754. }
  755. }
  756. char * rtlRealToVStrX(double val)
  757. {
  758. StringBuffer temp;
  759. temp.append(val);
  760. return strdup(temp);
  761. }
  762. char * rtlRealToVStrX(float val)
  763. {
  764. StringBuffer temp;
  765. temp.append(val);
  766. return strdup(temp);
  767. }
  768. //---------------------------------------------------------------------------
  769. #define SkipSpaces(l, t) \
  770. while (l) \
  771. { \
  772. char c = *t; \
  773. switch (c) \
  774. { \
  775. case ' ': \
  776. case '\t': \
  777. case '-': \
  778. case '+': \
  779. break; \
  780. default: \
  781. goto done; \
  782. } \
  783. l--; \
  784. t++; \
  785. } \
  786. done:
  787. #define SkipSignSpaces(l, t, negate) \
  788. while (l) \
  789. { \
  790. char c = *t; \
  791. switch (c) \
  792. { \
  793. case '-': \
  794. negate = true; \
  795. break; \
  796. case ' ': \
  797. case '\t': \
  798. case '+': \
  799. break; \
  800. default: \
  801. goto done; \
  802. } \
  803. l--; \
  804. t++; \
  805. } \
  806. done:
  807. unsigned rtlStrToUInt4(size32_t l, const char * t)
  808. {
  809. SkipSpaces(l, t);
  810. unsigned v = 0;
  811. while (l--)
  812. {
  813. char c = *t++;
  814. if ((c >= '0') && (c <= '9'))
  815. v = v * 10 + (c-'0');
  816. else
  817. break;
  818. }
  819. return v;
  820. }
  821. unsigned __int64 rtlStrToUInt8(size32_t l, const char * t)
  822. {
  823. SkipSpaces(l, t);
  824. unsigned __int64 v = 0;
  825. while (l--)
  826. {
  827. char c = *t++;
  828. if ((c >= '0') && (c <= '9'))
  829. v = v * 10 + (c-'0');
  830. else
  831. break;
  832. }
  833. return v;
  834. }
  835. int rtlStrToInt4(size32_t l, const char * t)
  836. {
  837. bool negate = false;
  838. SkipSignSpaces(l, t, negate);
  839. int v = 0;
  840. while (l--)
  841. {
  842. char c = *t++;
  843. if ((c >= '0') && (c <= '9'))
  844. v = v * 10 + (c-'0');
  845. else
  846. break;
  847. }
  848. return negate ? -v : v;
  849. }
  850. __int64 rtlStrToInt8(size32_t l, const char * t)
  851. {
  852. bool negate = false;
  853. SkipSignSpaces(l, t, negate);
  854. __int64 v = 0;
  855. while (l--)
  856. {
  857. char c = *t++;
  858. if ((c >= '0') && (c <= '9'))
  859. v = v * 10 + (c-'0');
  860. else
  861. break;
  862. }
  863. return negate ? -v : v;
  864. }
  865. __int64 rtlUnicodeToInt8(size32_t l, UChar const * t)
  866. {
  867. unsigned bufflen;
  868. char * buff;
  869. rtlUnicodeToCodepageX(bufflen, buff, l, t, "ascii");
  870. __int64 ret = rtlStrToInt8(bufflen, buff);
  871. rtlFree(buff);
  872. return ret;
  873. }
  874. bool rtlStrToBool(size32_t l, const char * t)
  875. {
  876. while (l--)
  877. {
  878. char c = *t++;
  879. if (c != ' ')
  880. return true;
  881. }
  882. return false;
  883. }
  884. bool rtlUnicodeToBool(size32_t l, UChar const * t)
  885. {
  886. while(l--)
  887. if(*t++ != 0x20) return true;
  888. return false;
  889. }
  890. // return true for "on", "true" or any non-zero constant, else false;
  891. bool rtlCsvStrToBool(size32_t l, const char * t)
  892. {
  893. return clipStrToBool(l, t);
  894. }
  895. //---------------------------------------------------------------------------
  896. unsigned rtlEStrToUInt4(size32_t l, const char * t)
  897. {
  898. char * astr = (char*)alloca(l);
  899. rtlEStrToStr(l,astr,l,t);
  900. return rtlStrToUInt4(l,astr);
  901. }
  902. unsigned __int64 rtlEStrToUInt8(size32_t l, const char * t)
  903. {
  904. char * astr = (char*)alloca(l);
  905. rtlEStrToStr(l,astr,l,t);
  906. return rtlStrToUInt8(l,astr);
  907. }
  908. int rtlEStrToInt4(size32_t l, const char * t)
  909. {
  910. char * astr = (char*)alloca(l);
  911. rtlEStrToStr(l,astr,l,t);
  912. return rtlStrToInt4(l,astr);
  913. }
  914. __int64 rtlEStrToInt8(size32_t l, const char * t)
  915. {
  916. char * astr = (char*)alloca(l);
  917. rtlEStrToStr(l,astr,l,t);
  918. return rtlStrToInt8(l,astr);
  919. }
  920. bool rtl_en2b(size32_t l, const char * t)
  921. {
  922. char * astr = (char*)alloca(l);
  923. rtlEStrToStr(l,astr,l,t);
  924. return rtlStrToBool(l,astr);
  925. }
  926. //---------------------------------------------------------------------------
  927. unsigned rtlVStrToUInt4(const char * t)
  928. {
  929. return rtlStrToUInt4(strlen(t), t);
  930. }
  931. unsigned __int64 rtlVStrToUInt8(const char * t)
  932. {
  933. return rtlStrToUInt8(strlen(t), t);
  934. }
  935. int rtlVStrToInt4(const char * t)
  936. {
  937. return rtlStrToInt4(strlen(t), t);
  938. }
  939. __int64 rtlVStrToInt8(const char * t)
  940. {
  941. return rtlStrToInt8(strlen(t), t);
  942. }
  943. bool rtlVStrToBool(const char * t)
  944. {
  945. char c;
  946. while ((c = *t++) != 0)
  947. {
  948. //MORE: Allow spaces if we change the semantics.
  949. return true;
  950. }
  951. return false;
  952. }
  953. //---------------------------------------------------------------------------
  954. void holeIntFormat(size32_t maxlen, char * target, __int64 value, unsigned width, unsigned flags)
  955. {
  956. StringBuffer result;
  957. if (flags & 1)
  958. result.appendf("%0*"I64F"d", width, value);
  959. else
  960. result.appendf("%*"I64F"d", width, value);
  961. size32_t written = result.length();
  962. if (written > maxlen)
  963. memset(target, '*', maxlen);
  964. else
  965. {
  966. memset(target+written, ' ', maxlen-written);
  967. memcpy(target, result.str(), written);
  968. }
  969. }
  970. void holeRealFormat(size32_t maxlen, char * target, double value, unsigned width, unsigned places)
  971. {
  972. if ((int) width < 0)
  973. return;
  974. char temp[500];
  975. if (width > sizeof(temp))
  976. {
  977. unsigned delta = width - sizeof(temp);
  978. memset(target, ' ', delta);
  979. target += delta;
  980. width = sizeof(temp);
  981. }
  982. if (places >= width) places = width-1;
  983. unsigned written = sprintf(temp, "%*.*f", width, places, value);
  984. if (written > width)
  985. {
  986. memset(target, '*', width);
  987. if (places)
  988. target[width-places-1] = '.';
  989. }
  990. else
  991. memcpy(target, temp, width);
  992. }
  993. //=============================================================================
  994. // Conversion functions...
  995. void rtlIntFormat(unsigned & len, char * & target, __int64 value, unsigned width, unsigned flags)
  996. {
  997. if ((int) width <= 0)
  998. {
  999. len = 0;
  1000. target = NULL;
  1001. return;
  1002. }
  1003. len = width;
  1004. target = (char *)malloc(width);
  1005. holeIntFormat(width, target, value, width, flags);
  1006. }
  1007. void rtlRealFormat(unsigned & len, char * & target, double value, unsigned width, unsigned places)
  1008. {
  1009. if ((int) width < 0)
  1010. {
  1011. len = 0;
  1012. target = NULL;
  1013. return;
  1014. }
  1015. len = width;
  1016. target = (char *)malloc(width);
  1017. holeRealFormat(width, target, value, width, places);
  1018. }
  1019. //=============================================================================
  1020. // String functions...
  1021. bool rtlDataToBool(unsigned len, const void * _src)
  1022. {
  1023. const char * src = (const char *)_src;
  1024. while (len--)
  1025. if (*src++)
  1026. return true;
  1027. return false;
  1028. }
  1029. void rtlBoolToData(unsigned tlen, void * tgt, bool src)
  1030. {
  1031. memset(tgt, 0, tlen);
  1032. if (src)
  1033. ((char *)tgt)[tlen-1] = 1;
  1034. }
  1035. void rtlBoolToStr(unsigned tlen, void * tgt, bool src)
  1036. {
  1037. memset(tgt, ' ', tlen);
  1038. if (src)
  1039. ((char *)tgt)[tlen-1] = '1';
  1040. }
  1041. void rtlBoolToVStr(char * tgt, bool src)
  1042. {
  1043. if (src)
  1044. *tgt++ = '1';
  1045. *tgt = 0;
  1046. }
  1047. void rtlBoolToStrX(unsigned & tlen, char * & tgt, bool src)
  1048. {
  1049. if (src)
  1050. {
  1051. char * ret = (char *)malloc(1);
  1052. ret[0] = '1';
  1053. tlen = 1;
  1054. tgt = ret;
  1055. }
  1056. else
  1057. {
  1058. tlen = 0;
  1059. tgt = NULL;
  1060. }
  1061. }
  1062. char * rtlBoolToVStrX(bool src)
  1063. {
  1064. if (src)
  1065. return strdup("1");
  1066. else
  1067. return strdup("");
  1068. }
  1069. //-----------------------------------------------------------------------------
  1070. // String copying functions....
  1071. void rtlDataToData(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1072. {
  1073. if (slen > tlen)
  1074. slen = tlen;
  1075. memcpy(tgt, src, slen);
  1076. if (tlen > slen)
  1077. memset((char *)tgt+slen, 0, tlen-slen);
  1078. }
  1079. void rtlStrToData(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1080. {
  1081. if (slen > tlen)
  1082. slen = tlen;
  1083. memcpy(tgt, src, slen);
  1084. if (tlen > slen)
  1085. memset((char *)tgt+slen, 0, tlen-slen);
  1086. }
  1087. void rtlStrToStr(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1088. {
  1089. if (slen > tlen)
  1090. slen = tlen;
  1091. memcpy(tgt, src, slen);
  1092. if (tlen > slen)
  1093. memset((char *)tgt+slen, ' ', tlen-slen);
  1094. }
  1095. void rtlStrToVStr(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1096. {
  1097. if ((slen >= tlen) && (tlen != 0))
  1098. slen = tlen-1;
  1099. memcpy(tgt, src, slen);
  1100. *((char *)tgt+slen)=0;
  1101. }
  1102. void rtlStr2EStr(unsigned tlen, char * tgt, unsigned slen, const char * src)
  1103. {
  1104. rtlStrToEStr(tlen,tgt,slen,src);
  1105. }
  1106. void rtlEStr2Data(unsigned tlen, void * tgt, unsigned slen, const char * src)
  1107. {
  1108. if (slen > tlen)
  1109. slen = tlen;
  1110. rtlEStrToStr(slen,(char *)tgt,slen,src);
  1111. if (tlen > slen)
  1112. memset((char *)tgt+slen, 0, tlen-slen);
  1113. }
  1114. void rtlEStr2Str(unsigned tlen, void * tgt, unsigned slen, const char * src)
  1115. {
  1116. rtlEStrToStr(tlen,(char *)tgt,slen,src);
  1117. }
  1118. void rtlEStrToVStr(unsigned tlen, void * tgt, unsigned slen, const char * src)
  1119. {
  1120. if (slen >= tlen)
  1121. slen = tlen-1;
  1122. rtlEStrToStr(slen,(char *)tgt,slen,src);
  1123. *((char *)tgt+slen)=0;
  1124. }
  1125. void rtlEStrToEStr(unsigned tlen, void * tgt, unsigned slen, const void * src)
  1126. {
  1127. if (slen > tlen)
  1128. slen = tlen;
  1129. memcpy(tgt, src, slen);
  1130. if (tlen > slen)
  1131. memset((char *)tgt+slen, '@', tlen-slen);
  1132. }
  1133. void rtlVStrToData(unsigned tlen, void * tgt, const char * src)
  1134. {
  1135. rtlStrToData(tlen, tgt, strlen(src), src);
  1136. }
  1137. void rtlVStrToStr(unsigned tlen, void * tgt, const char * src)
  1138. {
  1139. rtlStrToStr(tlen, tgt, strlen(src), src);
  1140. }
  1141. void rtlVStr2EStr(unsigned tlen, char * tgt, const char * src)
  1142. {
  1143. rtlStr2EStr(tlen, tgt, strlen(src), src);
  1144. }
  1145. void rtlVStrToVStr(unsigned tlen, void * tgt, const char * src)
  1146. {
  1147. rtlStrToVStr(tlen, tgt, strlen(src), src);
  1148. }
  1149. char *rtlCreateQuotedString(unsigned _len_tgt,char * tgt)
  1150. {
  1151. // Add ' at start and end. MORE! also needs to handle embedded quotes
  1152. char * result = (char *)malloc(_len_tgt + 3);
  1153. result[0] = '\'';
  1154. memcpy(result+1, tgt, _len_tgt);
  1155. result[_len_tgt+1] = '\'';
  1156. result[_len_tgt+2] = 0;
  1157. return result;
  1158. }
  1159. //-----------------------------------------------------------------------------
  1160. //List of strings with length of -1 to mark the end...
  1161. void rtlConcat(unsigned & tlen, char * * tgt, ...)
  1162. {
  1163. va_list args;
  1164. unsigned totalLength = 0;
  1165. va_start(args, tgt);
  1166. for (;;)
  1167. {
  1168. unsigned len = va_arg(args, unsigned);
  1169. if (len+1==0)
  1170. break;
  1171. char * str = va_arg(args, char *);
  1172. totalLength += len;
  1173. }
  1174. va_end(args);
  1175. char * buffer = (char *)malloc(totalLength);
  1176. char * cur = buffer;
  1177. va_start(args, tgt);
  1178. for (;;)
  1179. {
  1180. unsigned len = va_arg(args, unsigned);
  1181. if (len+1==0)
  1182. break;
  1183. char * str = va_arg(args, char *);
  1184. memcpy(cur, str, len);
  1185. cur += len;
  1186. }
  1187. va_end(args);
  1188. tlen = totalLength;
  1189. *tgt = buffer;
  1190. }
  1191. void rtlConcatVStr(char * * tgt, ...)
  1192. {
  1193. va_list args;
  1194. unsigned totalLength = 0;
  1195. va_start(args, tgt);
  1196. for (;;)
  1197. {
  1198. unsigned len = va_arg(args, unsigned);
  1199. if (len+1==0)
  1200. break;
  1201. char * str = va_arg(args, char *);
  1202. totalLength += len;
  1203. }
  1204. va_end(args);
  1205. char * buffer = (char *)malloc(totalLength+1);
  1206. char * cur = buffer;
  1207. va_start(args, tgt);
  1208. for (;;)
  1209. {
  1210. unsigned len = va_arg(args, unsigned);
  1211. if (len+1==0)
  1212. break;
  1213. char * str = va_arg(args, char *);
  1214. memcpy(cur, str, len);
  1215. cur += len;
  1216. }
  1217. va_end(args);
  1218. cur[0] = 0;
  1219. *tgt = buffer;
  1220. }
  1221. void rtlConcatUnicode(unsigned & tlen, UChar * * tgt, ...)
  1222. {
  1223. va_list args;
  1224. unsigned totalLength = 0;
  1225. va_start(args, tgt);
  1226. for(;;)
  1227. {
  1228. unsigned len = va_arg(args, unsigned);
  1229. if(len+1==0)
  1230. break;
  1231. UChar * str = va_arg(args, UChar *);
  1232. totalLength += len;
  1233. }
  1234. va_end(args);
  1235. UChar * buffer = (UChar *)malloc(totalLength*2); //I *believe* this is a valid upper limit, as an NFC concatenation can only be shorter than the sum of its parts
  1236. unsigned idx = 0;
  1237. UErrorCode err = U_ZERO_ERROR;
  1238. va_start(args, tgt);
  1239. for(;;)
  1240. {
  1241. unsigned len = va_arg(args, unsigned);
  1242. if(len+1==0)
  1243. break;
  1244. UChar * str = va_arg(args, UChar *);
  1245. if (len)
  1246. idx = unorm_concatenate(buffer, idx, str, len, buffer, totalLength, UNORM_NFC, 0, &err);
  1247. }
  1248. va_end(args);
  1249. *tgt = buffer;
  1250. tlen = idx;
  1251. }
  1252. void rtlConcatVUnicode(UChar * * tgt, ...)
  1253. {
  1254. va_list args;
  1255. unsigned totalLength = 0;
  1256. va_start(args, tgt);
  1257. for(;;)
  1258. {
  1259. unsigned len = va_arg(args, unsigned);
  1260. if(len+1==0)
  1261. break;
  1262. UChar * str = va_arg(args, UChar *);
  1263. totalLength += len;
  1264. }
  1265. va_end(args);
  1266. UChar * buffer = (UChar *)malloc((totalLength+1)*2); //I *believe* this is a valid upper limit, as an NFC concatenation can only be shorter than the sum of its parts
  1267. unsigned idx = 0;
  1268. UErrorCode err = U_ZERO_ERROR;
  1269. va_start(args, tgt);
  1270. for(;;)
  1271. {
  1272. unsigned len = va_arg(args, unsigned);
  1273. if(len+1==0)
  1274. break;
  1275. UChar * str = va_arg(args, UChar *);
  1276. if (len)
  1277. idx = unorm_concatenate(buffer, idx, str, len, buffer, totalLength, UNORM_NFC, 0, &err);
  1278. }
  1279. va_end(args);
  1280. buffer[idx++] = 0x0000;
  1281. *tgt = buffer;
  1282. }
  1283. //List of strings with length of -1 to mark the end...
  1284. void rtlConcatStrF(unsigned tlen, void * _tgt, int fill, ...)
  1285. {
  1286. va_list args;
  1287. char * tgt = (char *)_tgt;
  1288. unsigned offset = 0;
  1289. va_start(args, fill);
  1290. while (offset != tlen)
  1291. {
  1292. unsigned len = va_arg(args, unsigned);
  1293. if (len+1==0)
  1294. break;
  1295. const char * str = va_arg(args, const char *);
  1296. unsigned copyLen = len + offset > tlen ? tlen - offset : len;
  1297. memcpy(tgt+offset, str, copyLen);
  1298. offset += copyLen;
  1299. }
  1300. va_end(args);
  1301. if (offset < tlen)
  1302. memset(tgt+offset, fill, tlen-offset);
  1303. }
  1304. void rtlConcatVStrF(unsigned tlen, char * tgt, ...)
  1305. {
  1306. va_list args;
  1307. unsigned offset = 0;
  1308. va_start(args, tgt);
  1309. while (offset != tlen)
  1310. {
  1311. unsigned len = va_arg(args, unsigned);
  1312. if (len+1==0)
  1313. break;
  1314. const char * str = va_arg(args, const char *);
  1315. unsigned copyLen = len + offset > tlen ? tlen - offset : len;
  1316. memcpy(tgt+offset, str, copyLen);
  1317. offset += copyLen;
  1318. }
  1319. va_end(args);
  1320. memset(tgt+offset, 0, (tlen+1)-offset);
  1321. }
  1322. void rtlConcatUnicodeF(unsigned tlen, UChar * tgt, ...)
  1323. {
  1324. va_list args;
  1325. unsigned idx = 0;
  1326. UErrorCode err = U_ZERO_ERROR;
  1327. va_start(args, tgt);
  1328. for(;;)
  1329. {
  1330. unsigned len = va_arg(args, unsigned);
  1331. if(len+1==0)
  1332. break;
  1333. UChar * str = va_arg(args, UChar *);
  1334. if (len)
  1335. idx = unorm_concatenate(tgt, idx, str, len, tgt, tlen, UNORM_NFC, 0, &err);
  1336. }
  1337. va_end(args);
  1338. while (idx < tlen)
  1339. tgt[idx++] = ' ';
  1340. }
  1341. void rtlConcatVUnicodeF(unsigned tlen, UChar * tgt, ...)
  1342. {
  1343. va_list args;
  1344. unsigned idx = 0;
  1345. UErrorCode err = U_ZERO_ERROR;
  1346. va_start(args, tgt);
  1347. for(;;)
  1348. {
  1349. unsigned len = va_arg(args, unsigned);
  1350. if(len+1==0)
  1351. break;
  1352. UChar * str = va_arg(args, UChar *);
  1353. if (len)
  1354. idx = unorm_concatenate(tgt, idx, str, len, tgt, tlen, UNORM_NFC, 0, &err);
  1355. }
  1356. va_end(args);
  1357. while (idx < tlen)
  1358. tgt[idx++] = 0;
  1359. tgt[tlen] = 0;
  1360. }
  1361. //------------------------------------------------------------------------------------------------
  1362. // The followinf concat functions are all deprecated in favour of the variable number of argument
  1363. // versions
  1364. unsigned rtlConcatStrToStr(unsigned tlen, char * tgt, unsigned idx, unsigned slen, const char * src)
  1365. {
  1366. unsigned len = tlen-idx;
  1367. if (len > slen)
  1368. len = slen;
  1369. memcpy(tgt+idx, src, len);
  1370. return idx+len;
  1371. }
  1372. unsigned rtlConcatVStrToStr(unsigned tlen, char * tgt, unsigned idx, const char * src)
  1373. {
  1374. while (idx != tlen)
  1375. {
  1376. char next = *src++;
  1377. if (!next)
  1378. break;
  1379. tgt[idx++] = next;
  1380. }
  1381. return idx;
  1382. }
  1383. void rtlConcatStrToVStr(unsigned tlen, void * _tgt, unsigned slen, const void * src)
  1384. {
  1385. char * tgt = (char *)_tgt;
  1386. unsigned tend = strlen(tgt);
  1387. rtlStrToVStr(tlen-tend, tgt+tend, slen, src);
  1388. }
  1389. void rtlConcatVStrToVStr(unsigned tlen, void * _tgt, const char * src)
  1390. {
  1391. char * tgt = (char *)_tgt;
  1392. unsigned tend = strlen(tgt);
  1393. rtlVStrToVStr(tlen-tend, tgt+tend, src);
  1394. }
  1395. unsigned rtlConcatUnicodeToUnicode(unsigned tlen, UChar * tgt, unsigned idx, unsigned slen, UChar const * src)
  1396. {
  1397. UErrorCode err = U_ZERO_ERROR;
  1398. return unorm_concatenate(tgt, idx, src, slen, tgt, tlen, UNORM_NFC, 0, &err);
  1399. }
  1400. unsigned rtlConcatVUnicodeToUnicode(unsigned tlen, UChar * tgt, unsigned idx, UChar const * src)
  1401. {
  1402. return rtlConcatUnicodeToUnicode(tlen, tgt, idx, rtlUnicodeStrlen(src), src);
  1403. }
  1404. void rtlESpaceFill(unsigned tlen, char * tgt, unsigned idx)
  1405. {
  1406. if (idx < tlen)
  1407. memset(tgt+idx, '@', tlen-idx);
  1408. }
  1409. void rtlSpaceFill(unsigned tlen, char * tgt, unsigned idx)
  1410. {
  1411. if (idx < tlen)
  1412. memset(tgt+idx, ' ', tlen-idx);
  1413. }
  1414. void rtlZeroFill(unsigned tlen, char * tgt, unsigned idx)
  1415. {
  1416. if (idx < tlen)
  1417. memset(tgt+idx, 0, tlen-idx);
  1418. }
  1419. void rtlNullTerminate(unsigned tlen, char * tgt, unsigned idx)
  1420. {
  1421. if (idx >= tlen)
  1422. idx = tlen-1;
  1423. tgt[idx] = 0;
  1424. }
  1425. void rtlUnicodeSpaceFill(unsigned tlen, UChar * tgt, unsigned idx)
  1426. {
  1427. while(idx<tlen) tgt[idx++] = 0x0020;
  1428. }
  1429. void rtlUnicodeNullTerminate(unsigned tlen, UChar * tgt, unsigned idx)
  1430. {
  1431. if (idx >= tlen)
  1432. idx = tlen-1;
  1433. tgt[idx] = 0x0000;
  1434. }
  1435. void rtlUnicodeStrcpy(UChar * tgt, UChar const * src)
  1436. {
  1437. memcpy(tgt, src, rtlUnicodeStrlen(src)*2+2);
  1438. }
  1439. void rtlConcatExtend(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  1440. {
  1441. unsigned len = tlen + slen;
  1442. tgt = (char *)realloc(tgt, len);
  1443. memcpy(tgt+tlen, src, slen);
  1444. tlen = len;
  1445. }
  1446. //-----------------------------------------------------------------------------
  1447. inline void normalizeFrom(unsigned & from, unsigned slen)
  1448. {
  1449. from--;
  1450. if ((int)from < 0)
  1451. from = 0;
  1452. else if (from > slen)
  1453. from = slen;
  1454. }
  1455. inline void normalizeFromTo(unsigned & from, unsigned & to)
  1456. {
  1457. from--;
  1458. if ((int)from < 0) from = 0;
  1459. if ((int)to < (int)from) to = from;
  1460. }
  1461. inline void clipFromTo(unsigned & from, unsigned & to, unsigned slen)
  1462. {
  1463. if (to > slen)
  1464. {
  1465. to = slen;
  1466. if (from > slen)
  1467. from = slen;
  1468. }
  1469. }
  1470. //NB: From and to are 1 based: Now fills to ensure the correct length.
  1471. void * doSubStrFT(unsigned & tlen, unsigned slen, const void * src, unsigned from, unsigned to, byte fillChar)
  1472. {
  1473. normalizeFromTo(from, to);
  1474. unsigned len = to - from;
  1475. clipFromTo(from, to, slen);
  1476. unsigned copylen = to - from;
  1477. char * buffer = (char *)malloc(len);
  1478. memcpy(buffer, (byte *)src+from, copylen);
  1479. if (copylen < len)
  1480. memset(buffer+copylen, fillChar, len-copylen);
  1481. tlen = len;
  1482. return buffer;
  1483. }
  1484. void rtlSubStrFX(unsigned & tlen, char * & tgt, unsigned slen, const char * src, unsigned from)
  1485. {
  1486. normalizeFrom(from, slen);
  1487. tlen = slen-from;
  1488. tgt = (char *) malloc(tlen);
  1489. memcpy(tgt, src+from, tlen);
  1490. }
  1491. void rtlSubStrFTX(unsigned & tlen, char * & tgt, unsigned slen, const char * src, unsigned from, unsigned to)
  1492. {
  1493. tgt = (char *)doSubStrFT(tlen, slen, src, from, to, ' ');
  1494. }
  1495. void rtlSubStrFT(unsigned tlen, char * tgt, unsigned slen, const char * src, unsigned from, unsigned to)
  1496. {
  1497. unsigned char fillChar = ' '; // More, should be passed as a parameter
  1498. normalizeFromTo(from, to);
  1499. clipFromTo(from, to, slen);
  1500. unsigned copylen = to - from;
  1501. if (copylen > tlen)
  1502. copylen = tlen;
  1503. memcpy(tgt, (const char *)src+from, copylen);
  1504. if (copylen < tlen)
  1505. memset(tgt+copylen, fillChar, tlen-copylen);
  1506. }
  1507. void rtlSubDataFT(unsigned tlen, void * tgt, unsigned slen, const void * src, unsigned from, unsigned to)
  1508. {
  1509. normalizeFromTo(from, to);
  1510. clipFromTo(from, to, slen);
  1511. unsigned copylen = to - from;
  1512. if (copylen > tlen)
  1513. copylen = tlen;
  1514. memcpy(tgt, (char *)src+from, copylen);
  1515. if (copylen < tlen)
  1516. memset((byte*)tgt+copylen, 0, tlen-copylen);
  1517. }
  1518. void rtlSubDataFTX(unsigned & tlen, void * & tgt, unsigned slen, const void * src, unsigned from, unsigned to)
  1519. {
  1520. tgt = doSubStrFT(tlen, slen, src, from, to, 0);
  1521. }
  1522. void rtlSubDataFX(unsigned & tlen, void * & tgt, unsigned slen, const void * src, unsigned from)
  1523. {
  1524. normalizeFrom(from, slen);
  1525. tlen = slen-from;
  1526. tgt = (char *) malloc(tlen);
  1527. memcpy(tgt, (const byte *)src+from, tlen);
  1528. }
  1529. void rtlUnicodeSubStrFTX(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src, unsigned from, unsigned to)
  1530. {
  1531. normalizeFromTo(from, to);
  1532. tlen = to - from;
  1533. clipFromTo(from, to, slen);
  1534. tgt = (UChar *)malloc(tlen*2);
  1535. unsigned copylen = to - from;
  1536. memcpy(tgt, src+from, copylen*2);
  1537. while(copylen<tlen)
  1538. tgt[copylen++] = 0x0020;
  1539. }
  1540. void rtlUnicodeSubStrFX(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src, unsigned from)
  1541. {
  1542. normalizeFrom(from, slen);
  1543. tlen = slen - from;
  1544. tgt = (UChar *)malloc(tlen*2);
  1545. memcpy(tgt, src+from, tlen*2);
  1546. }
  1547. void rtlSubQStrFTX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from, unsigned to)
  1548. {
  1549. normalizeFromTo(from, to);
  1550. tlen = to - from;
  1551. clipFromTo(from, to, slen);
  1552. tgt = (char *)malloc(rtlQStrSize(tlen));
  1553. copyQStrRange(tlen, tgt, src, from, to);
  1554. }
  1555. void rtlSubQStrFX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from)
  1556. {
  1557. normalizeFrom(from, slen);
  1558. tlen = slen - from;
  1559. tgt = (char *)malloc(rtlQStrSize(tlen));
  1560. copyQStrRange(tlen, tgt, src, from, slen);
  1561. }
  1562. void rtlSubQStrFT(unsigned tlen, char * tgt, unsigned slen, const char * src, unsigned from, unsigned to)
  1563. {
  1564. normalizeFromTo(from, to);
  1565. clipFromTo(from, to, slen);
  1566. copyQStrRange(tlen, tgt, src, from ,to);
  1567. }
  1568. //-----------------------------------------------------------------------------
  1569. unsigned rtlTrimStrLen(size32_t l, const char * t)
  1570. {
  1571. while (l)
  1572. {
  1573. if (t[l-1] != ' ')
  1574. break;
  1575. l--;
  1576. }
  1577. return l;
  1578. }
  1579. unsigned rtlTrimDataLen(size32_t l, const void * _t)
  1580. {
  1581. const char * t = (const char *)_t;
  1582. while (l)
  1583. {
  1584. if (t[l-1] != 0)
  1585. break;
  1586. l--;
  1587. }
  1588. return l;
  1589. }
  1590. unsigned rtlTrimUnicodeStrLen(size32_t l, UChar const * t)
  1591. {
  1592. if (!l)
  1593. return 0;
  1594. UCharCharacterIterator iter(t, l);
  1595. for(iter.last32(); iter.hasPrevious(); iter.previous32())
  1596. if(!u_isspace(iter.current32()))
  1597. break;
  1598. if(u_isspace(iter.current32())) return iter.getIndex(); // required as the reverse iteration above doesn't hit the first character
  1599. return iter.getIndex() + 1;
  1600. }
  1601. inline size32_t rtlQuickTrimUnicode(size32_t len, UChar const * str)
  1602. {
  1603. while (len && u_isspace(str[len-1]))
  1604. len--;
  1605. return len;
  1606. }
  1607. unsigned rtlTrimVStrLen(const char * t)
  1608. {
  1609. const char * first = t;
  1610. const char * last = first;
  1611. unsigned char c;
  1612. while ((c = *t++) != 0)
  1613. {
  1614. if (c != ' ')
  1615. last = t; //nb after increment of t
  1616. }
  1617. return (last - first);
  1618. }
  1619. unsigned rtlTrimVUnicodeStrLen(UChar const * t)
  1620. {
  1621. return rtlTrimUnicodeStrLen(rtlUnicodeStrlen(t), t);
  1622. }
  1623. inline unsigned rtlLeftTrimStrStart(size32_t slen, const char * src)
  1624. {
  1625. unsigned i = 0;
  1626. while(i < slen && src[i] == ' ')
  1627. i++;
  1628. return i;
  1629. }
  1630. inline unsigned rtlLeftTrimUnicodeStrStart(size32_t slen, UChar const * src)
  1631. {
  1632. UCharCharacterIterator iter(src, slen);
  1633. for(iter.first32(); iter.hasNext(); iter.next32())
  1634. if(!u_isspace(iter.current32()))
  1635. break;
  1636. return iter.getIndex();
  1637. }
  1638. inline unsigned rtlLeftTrimVStrStart(const char * src)
  1639. {
  1640. unsigned i = 0;
  1641. while(src[i] == ' ')
  1642. i++;
  1643. return i;
  1644. }
  1645. inline void rtlTrimUtf8Len(unsigned & trimLen, size32_t & trimSize, size32_t len, const char * t)
  1646. {
  1647. const byte * start = (const byte *)t;
  1648. const byte * cur = start;
  1649. unsigned trimLength = 0;
  1650. const byte * trimEnd = cur;
  1651. for (unsigned i=0; i < len; i++)
  1652. {
  1653. unsigned next = readUtf8Character(UTF8_MAXSIZE, cur);
  1654. if (!u_isspace(next))
  1655. {
  1656. trimLength = i+1;
  1657. trimEnd = cur;
  1658. }
  1659. }
  1660. trimLen = trimLength;
  1661. trimSize = trimEnd-start;
  1662. }
  1663. inline void rtlTrimUtf8Start(unsigned & trimLen, size32_t & trimSize, size32_t len, const char * t)
  1664. {
  1665. const byte * start = (const byte *)t;
  1666. const byte * cur = start;
  1667. for (unsigned i=0; i < len; i++)
  1668. {
  1669. const byte * prev = cur;
  1670. unsigned next = readUtf8Character(UTF8_MAXSIZE, cur);
  1671. if (!u_isspace(next))
  1672. {
  1673. trimLen = i;
  1674. trimSize = prev-start;
  1675. return;
  1676. }
  1677. }
  1678. trimLen = len;
  1679. trimSize = cur-start;
  1680. }
  1681. inline char * rtlDupSubString(const char * src, unsigned len)
  1682. {
  1683. char * buffer = (char *)malloc(len + 1);
  1684. memcpy(buffer, src, len);
  1685. buffer[len] = 0;
  1686. return buffer;
  1687. }
  1688. inline UChar * rtlDupSubUnicode(UChar const * src, unsigned len)
  1689. {
  1690. UChar * buffer = (UChar *)malloc((len + 1) * 2);
  1691. memcpy(buffer, src, len*2);
  1692. buffer[len] = 0x00;
  1693. return buffer;
  1694. }
  1695. inline void rtlCopySubStringV(size32_t tlen, char * tgt, unsigned slen, const char * src)
  1696. {
  1697. if (slen >= tlen)
  1698. slen = tlen-1;
  1699. memcpy(tgt, src, slen);
  1700. tgt[slen] = 0;
  1701. }
  1702. //not yet used, but would be needed for assignment to string rather than vstring
  1703. inline void rtlCopySubString(size32_t tlen, char * tgt, unsigned slen, const char * src, char fill)
  1704. {
  1705. if (slen > tlen)
  1706. slen = tlen;
  1707. memcpy(tgt, src, slen);
  1708. memset(tgt + slen, fill, tlen-slen);
  1709. }
  1710. unsigned rtlTrimUtf8StrLen(size32_t len, const char * t)
  1711. {
  1712. const byte * cur = (const byte *)t;
  1713. unsigned trimLength = 0;
  1714. for (unsigned i=0; i < len; i++)
  1715. {
  1716. unsigned next = readUtf8Character(UTF8_MAXSIZE, cur);
  1717. if (!u_isspace(next))
  1718. trimLength = i+1;
  1719. }
  1720. return trimLength;
  1721. }
  1722. //-----------------------------------------------------------------------------
  1723. // Functions to trim off left side blank spaces
  1724. void rtlTrimRight(size32_t & tlen, char * & tgt, unsigned slen, const char * src)
  1725. {
  1726. tlen = rtlTrimStrLen(slen, src);
  1727. tgt = rtlDupSubString(src, tlen);
  1728. }
  1729. void rtlTrimUnicodeRight(size32_t & tlen, UChar * & tgt, unsigned slen, UChar const * src)
  1730. {
  1731. tlen = rtlTrimUnicodeStrLen(slen, src);
  1732. tgt = rtlDupSubUnicode(src, tlen);
  1733. }
  1734. void rtlTrimVRight(size32_t & tlen, char * & tgt, const char * src)
  1735. {
  1736. tlen = rtlTrimVStrLen(src);
  1737. tgt = rtlDupSubString(src, tlen);
  1738. }
  1739. void rtlTrimVUnicodeRight(size32_t & tlen, UChar * & tgt, UChar const * src)
  1740. {
  1741. rtlTrimUnicodeRight(tlen, tgt, rtlUnicodeStrlen(src), src);
  1742. }
  1743. void rtlTrimUtf8Right(unsigned &tlen, char * &tgt, unsigned slen, char const * src)
  1744. {
  1745. unsigned trimLength;
  1746. size32_t trimSize;
  1747. rtlTrimUtf8Len(trimLength, trimSize, slen, src);
  1748. tlen = trimLength;
  1749. tgt = rtlDupSubString(src, trimSize);
  1750. }
  1751. void rtlAssignTrimRightV(size32_t tlen, char * tgt, unsigned slen, const char * src)
  1752. {
  1753. unsigned len = rtlTrimStrLen(slen, src);
  1754. rtlCopySubStringV(tlen, tgt, len, src);
  1755. }
  1756. void rtlAssignTrimVRightV(size32_t tlen, char * tgt, const char * src)
  1757. {
  1758. unsigned len = rtlTrimVStrLen(src);
  1759. rtlCopySubStringV(tlen, tgt, len, src);
  1760. }
  1761. //-------------------------------------------------------------------------------
  1762. // Functions to trim off left side blank spaces
  1763. void rtlTrimLeft(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  1764. {
  1765. unsigned start = rtlLeftTrimStrStart(slen, src);
  1766. unsigned len = slen - start;
  1767. tlen = len;
  1768. tgt = rtlDupSubString(src + start, len);
  1769. }
  1770. void rtlTrimUnicodeLeft(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src)
  1771. {
  1772. unsigned start = rtlLeftTrimUnicodeStrStart(slen, src);
  1773. unsigned len = slen - start;
  1774. tlen = len;
  1775. tgt = rtlDupSubUnicode(src + start, len);
  1776. }
  1777. void rtlTrimVLeft(unsigned & tlen, char * & tgt, const char * src)
  1778. {
  1779. unsigned start = rtlLeftTrimVStrStart(src);
  1780. unsigned len = strlen(src+start);
  1781. tlen = len;
  1782. tgt = rtlDupSubString(src + start, len);
  1783. }
  1784. void rtlTrimVUnicodeLeft(unsigned & tlen, UChar * & tgt, UChar const * src)
  1785. {
  1786. rtlTrimUnicodeLeft(tlen, tgt, rtlUnicodeStrlen(src), src);
  1787. }
  1788. ECLRTL_API void rtlTrimUtf8Left(unsigned &tlen, char * &tgt, unsigned slen, const char * src)
  1789. {
  1790. unsigned trimLength;
  1791. size32_t trimSize;
  1792. rtlTrimUtf8Start(trimLength, trimSize, slen, src);
  1793. unsigned len = slen-trimLength;
  1794. const char * start = src+trimSize;
  1795. tlen = len;
  1796. tgt = rtlDupSubString(start, rtlUtf8Size(len, start));
  1797. }
  1798. void rtlAssignTrimLeftV(size32_t tlen, char * tgt, unsigned slen, const char * src)
  1799. {
  1800. unsigned start = rtlLeftTrimStrStart(slen, src);
  1801. unsigned len = slen - start;
  1802. rtlCopySubStringV(tlen, tgt, len, src+start);
  1803. }
  1804. void rtlAssignTrimVLeftV(size32_t tlen, char * tgt, const char * src)
  1805. {
  1806. unsigned start = rtlLeftTrimVStrStart(src);
  1807. unsigned len = strlen(src+start);
  1808. rtlCopySubStringV(tlen, tgt, len, src+start);
  1809. }
  1810. //--------------------------------------------------------------------------------
  1811. // Functions to trim off blank spaces of both sides
  1812. void rtlTrimBoth(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  1813. {
  1814. unsigned len = rtlTrimStrLen(slen, src);
  1815. unsigned start = len ? rtlLeftTrimStrStart(slen, src) : 0;
  1816. len -= start;
  1817. tlen = len;
  1818. tgt = rtlDupSubString(src + start, len);
  1819. }
  1820. void rtlTrimUnicodeBoth(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src)
  1821. {
  1822. unsigned len = rtlTrimUnicodeStrLen(slen, src);
  1823. unsigned start = len ? rtlLeftTrimUnicodeStrStart(slen, src) : 0;
  1824. len -= start;
  1825. tlen = len;
  1826. tgt = rtlDupSubUnicode(src + start, len);
  1827. }
  1828. void rtlTrimVBoth(unsigned & tlen, char * & tgt, const char * src)
  1829. {
  1830. unsigned len = rtlTrimVStrLen(src);
  1831. unsigned start = len ? rtlLeftTrimVStrStart(src) : 0;
  1832. len -= start;
  1833. tlen = len;
  1834. tgt = rtlDupSubString(src + start, len);
  1835. }
  1836. void rtlTrimVUnicodeBoth(unsigned & tlen, UChar * & tgt, UChar const * src)
  1837. {
  1838. rtlTrimUnicodeBoth(tlen, tgt, rtlUnicodeStrlen(src), src);
  1839. }
  1840. ECLRTL_API void rtlTrimUtf8Both(unsigned &tlen, char * &tgt, unsigned slen, const char * src)
  1841. {
  1842. unsigned lTrimLength;
  1843. size32_t lTrimSize;
  1844. rtlTrimUtf8Start(lTrimLength, lTrimSize, slen, src);
  1845. rtlTrimUtf8Right(tlen, tgt, slen-lTrimLength, src+lTrimSize);
  1846. }
  1847. void rtlAssignTrimBothV(size32_t tlen, char * tgt, unsigned slen, const char * src)
  1848. {
  1849. unsigned len = rtlTrimStrLen(slen, src);
  1850. unsigned start = len ? rtlLeftTrimStrStart(slen, src) : 0;
  1851. len -= start;
  1852. rtlCopySubStringV(tlen, tgt, len, src+start);
  1853. }
  1854. void rtlAssignTrimVBothV(size32_t tlen, char * tgt, const char * src)
  1855. {
  1856. unsigned len = rtlTrimVStrLen(src);
  1857. unsigned start = len ? rtlLeftTrimVStrStart(src) : 0;
  1858. len -= start;
  1859. rtlCopySubStringV(tlen, tgt, len, src+start);
  1860. }
  1861. //-----------------------------------------------------------------------------
  1862. // Functions used to trim off all blank spaces in a string.
  1863. unsigned rtlTrimStrLenNonBlank(size32_t l, const char * t)
  1864. {
  1865. unsigned len = 0;
  1866. while (l)
  1867. {
  1868. l--;
  1869. if (t[l] != ' ')
  1870. len++;
  1871. }
  1872. return len;
  1873. }
  1874. unsigned rtlTrimVStrLenNonBlank(const char * t)
  1875. {
  1876. unsigned len = 0;
  1877. unsigned char c;
  1878. while ((c = *t++) != 0)
  1879. {
  1880. if (c != ' ')
  1881. len++;
  1882. }
  1883. return len;
  1884. }
  1885. void rtlTrimAll(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  1886. {
  1887. tlen = rtlTrimStrLenNonBlank(slen, src);
  1888. char * buffer = (char *)malloc(tlen + 1);
  1889. int ind = 0;
  1890. for(unsigned i = 0; i < slen; i++) {
  1891. if(src[i] != ' ') {
  1892. buffer[ind] = src[i];
  1893. ind++;
  1894. }
  1895. }
  1896. buffer[tlen] = 0;
  1897. tgt = buffer;
  1898. }
  1899. void rtlTrimUnicodeAll(unsigned & tlen, UChar * & tgt, unsigned slen, const UChar * src)
  1900. {
  1901. UnicodeString rawStr;
  1902. UCharCharacterIterator iter(src, slen);
  1903. for(iter.first32(); iter.hasNext(); iter.next32())
  1904. if(!u_isspace(iter.current32()))
  1905. rawStr.append(iter.current32());
  1906. UnicodeString tgtStr;
  1907. normalizeUnicodeString(rawStr, tgtStr); // normalized in case crazy string like [combining accent] [space] [vowel]
  1908. tlen = tgtStr.length();
  1909. tgt = (UChar *)malloc((tlen+1)*2);
  1910. tgtStr.extract(0, tlen, tgt);
  1911. tgt[tlen] = 0x0000;
  1912. }
  1913. void rtlTrimVAll(unsigned & tlen, char * & tgt, const char * src)
  1914. {
  1915. tlen = rtlTrimVStrLenNonBlank(src);
  1916. char * buffer = (char *)malloc(tlen + 1);
  1917. int ind = 0;
  1918. int i = 0;
  1919. while(src[i] != 0) {
  1920. if(src[i] != ' ') {
  1921. buffer[ind] = src[i];
  1922. ind++;
  1923. }
  1924. i++;
  1925. }
  1926. buffer[tlen] = 0;
  1927. tgt = buffer;
  1928. }
  1929. void rtlTrimVUnicodeAll(unsigned & tlen, UChar * & tgt, const UChar * src)
  1930. {
  1931. rtlTrimUnicodeAll(tlen, tgt, rtlUnicodeStrlen(src), src);
  1932. }
  1933. ECLRTL_API void rtlTrimUtf8All(unsigned &tlen, char * &tgt, unsigned slen, const char * src)
  1934. {
  1935. //Go via unicode because of possibility of combining accents etc.
  1936. rtlDataAttr temp1(slen*sizeof(UChar));
  1937. rtlUtf8ToUnicode(slen, temp1.getustr(), slen, src);
  1938. unsigned trimLen;
  1939. rtlDataAttr trimText;
  1940. rtlTrimUnicodeAll(trimLen, trimText.refustr(), slen, temp1.getustr());
  1941. rtlUnicodeToUtf8X(tlen, tgt, trimLen, trimText.getustr());
  1942. }
  1943. void rtlAssignTrimAllV(unsigned tlen, char * tgt, unsigned slen, const char * src)
  1944. {
  1945. unsigned to = 0;
  1946. for (unsigned from = 0; (from < slen)&&(to+1 < tlen); from++)
  1947. {
  1948. if (src[from] != ' ')
  1949. tgt[to++] = src[from];
  1950. }
  1951. tgt[to] = 0;
  1952. }
  1953. void rtlAssignTrimVAllV(unsigned tlen, char * tgt, const char * src)
  1954. {
  1955. unsigned to = 0;
  1956. for (;(*src && (to+1 < tlen));src++)
  1957. {
  1958. if (*src != ' ')
  1959. tgt[to++] = *src;
  1960. }
  1961. tgt[to] = 0;
  1962. }
  1963. //-----------------------------------------------------------------------------
  1964. ECLRTL_API void rtlUnicodeToVAscii(unsigned outlen, char * out, unsigned inlen, UChar const * in)
  1965. {
  1966. rtlUnicodeToVCodepage(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  1967. }
  1968. ECLRTL_API void rtlData2VUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
  1969. {
  1970. rtlCodepageToVUnicode(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  1971. }
  1972. ECLRTL_API void rtlStrToVUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
  1973. {
  1974. rtlCodepageToVUnicode(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  1975. }
  1976. ECLRTL_API void rtlData2Unicode(unsigned outlen, UChar * out, unsigned inlen, void const * in)
  1977. {
  1978. rtlCodepageToUnicode(outlen, out, inlen, (const char *)in, ASCII_LIKE_CODEPAGE);
  1979. }
  1980. ECLRTL_API void rtlAssignTrimUnicodeLeftV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
  1981. {
  1982. unsigned len;
  1983. UChar * str;
  1984. rtlTrimUnicodeLeft(len, str, slen, src);
  1985. if (len >= tlen)
  1986. len = tlen-1;
  1987. memcpy(tgt, str, len*2);
  1988. tgt[len] = 0;
  1989. rtlFree(str);
  1990. }
  1991. ECLRTL_API void rtlAssignTrimVUnicodeLeftV(size32_t tlen, UChar * tgt, const UChar * src)
  1992. {
  1993. unsigned len;
  1994. UChar * str;
  1995. rtlTrimVUnicodeLeft(len, str, src);
  1996. if (len >= tlen)
  1997. len = tlen-1;
  1998. memcpy(tgt, str, len*2);
  1999. tgt[len] = 0;
  2000. rtlFree(str);
  2001. }
  2002. ECLRTL_API void rtlAssignTrimUnicodeRightV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
  2003. {
  2004. unsigned len;
  2005. UChar * str;
  2006. rtlTrimUnicodeRight(len, str, slen, src);
  2007. if (len >= tlen)
  2008. len = tlen-1;
  2009. memcpy(tgt, str, len*2);
  2010. tgt[len] = 0;
  2011. rtlFree(str);
  2012. }
  2013. ECLRTL_API void rtlAssignTrimVUnicodeRightV(size32_t tlen, UChar * tgt, const UChar * src)
  2014. {
  2015. unsigned len;
  2016. UChar * str;
  2017. rtlTrimVUnicodeRight(len, str, src);
  2018. if (len >= tlen)
  2019. len = tlen-1;
  2020. memcpy(tgt, str, len*2);
  2021. tgt[len] = 0;
  2022. rtlFree(str);
  2023. }
  2024. ECLRTL_API void rtlAssignTrimUnicodeBothV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
  2025. {
  2026. unsigned len;
  2027. UChar * str;
  2028. rtlTrimUnicodeBoth(len, str, slen, src);
  2029. if (len >= tlen)
  2030. len = tlen-1;
  2031. memcpy(tgt, str, len*2);
  2032. tgt[len] = 0;
  2033. rtlFree(str);
  2034. }
  2035. ECLRTL_API void rtlAssignTrimVUnicodeBothV(size32_t tlen, UChar * tgt, const UChar * src)
  2036. {
  2037. unsigned len;
  2038. UChar * str;
  2039. rtlTrimVUnicodeBoth(len, str, src);
  2040. if (len >= tlen)
  2041. len = tlen-1;
  2042. memcpy(tgt, str, len*2);
  2043. tgt[len] = 0;
  2044. rtlFree(str);
  2045. }
  2046. ECLRTL_API void rtlAssignTrimUnicodeAllV(size32_t tlen, UChar * tgt, unsigned slen, const UChar * src)
  2047. {
  2048. unsigned len;
  2049. UChar * str;
  2050. rtlTrimUnicodeAll(len, str, slen, src);
  2051. if (len >= tlen)
  2052. len = tlen-1;
  2053. memcpy(tgt, str, len*2);
  2054. tgt[len] = 0;
  2055. rtlFree(str);
  2056. }
  2057. ECLRTL_API void rtlAssignTrimVUnicodeAllV(size32_t tlen, UChar * tgt, const UChar * src)
  2058. {
  2059. unsigned len;
  2060. UChar * str;
  2061. rtlTrimVUnicodeAll(len, str, src);
  2062. if (len >= tlen)
  2063. len = tlen-1;
  2064. memcpy(tgt, str, len*2);
  2065. tgt[len] = 0;
  2066. rtlFree(str);
  2067. }
  2068. //-----------------------------------------------------------------------------
  2069. int rtlCompareStrStr(unsigned l1, const char * p1, unsigned l2, const char * p2)
  2070. {
  2071. unsigned len = l1;
  2072. if (len > l2)
  2073. len = l2;
  2074. int diff = memcmp(p1, p2, len);
  2075. if (diff == 0)
  2076. {
  2077. if (len != l1)
  2078. {
  2079. for (;(diff == 0) && (len != l1);len++)
  2080. diff = ((unsigned char *)p1)[len] - ' ';
  2081. }
  2082. else if (len != l2)
  2083. {
  2084. for (;(diff == 0) && (len != l2);len++)
  2085. diff = ' ' - ((unsigned char *)p2)[len];
  2086. }
  2087. }
  2088. return diff;
  2089. }
  2090. int rtlCompareVStrVStr(const char * p1, const char * p2)
  2091. {
  2092. return rtlCompareStrStr(strlen(p1), p1, strlen(p2), p2);
  2093. }
  2094. int rtlCompareStrBlank(unsigned l1, const char * p1)
  2095. {
  2096. while (l1--)
  2097. {
  2098. int diff = (*(unsigned char *)(p1++)) - ' ';
  2099. if (diff)
  2100. return diff;
  2101. }
  2102. return 0;
  2103. }
  2104. int rtlCompareDataData(unsigned l1, const void * p1, unsigned l2, const void * p2)
  2105. {
  2106. unsigned len = l1;
  2107. if (len > l2)
  2108. len = l2;
  2109. int diff = memcmp(p1, p2, len);
  2110. if (diff == 0)
  2111. {
  2112. if (l1 > l2)
  2113. diff = +1;
  2114. else if (l1 < l2)
  2115. diff = -1;
  2116. }
  2117. return diff;
  2118. }
  2119. int rtlCompareEStrEStr(unsigned l1, const char * p1, unsigned l2, const char * p2)
  2120. {
  2121. unsigned len = l1;
  2122. if (len > l2)
  2123. len = l2;
  2124. int diff = memcmp(p1, p2, len);
  2125. if (diff == 0)
  2126. {
  2127. if (len != l1)
  2128. {
  2129. for (;(diff == 0) && (len != l1);len++)
  2130. diff = ((unsigned char *)p1)[len] - '@';
  2131. }
  2132. else if (len != l2)
  2133. {
  2134. for (;(diff == 0) && (len != l2);len++)
  2135. diff = '@' - ((unsigned char *)p2)[len];
  2136. }
  2137. }
  2138. return diff;
  2139. }
  2140. int rtlCompareUnicodeUnicode(unsigned l1, UChar const * p1, unsigned l2, UChar const * p2, char const * locale)
  2141. {
  2142. while(l1 && u_isUWhiteSpace(p1[l1-1])) l1--;
  2143. while(l2 && u_isUWhiteSpace(p2[l2-1])) l2--;
  2144. return ucol_strcoll(queryRTLLocale(locale)->queryCollator(), p1, l1, p2, l2);
  2145. }
  2146. int rtlCompareUnicodeUnicodeStrength(unsigned l1, UChar const * p1, unsigned l2, UChar const * p2, char const * locale, unsigned strength)
  2147. {
  2148. while(l1 && u_isUWhiteSpace(p1[l1-1])) l1--;
  2149. while(l2 && u_isUWhiteSpace(p2[l2-1])) l2--;
  2150. return ucol_strcoll(queryRTLLocale(locale)->queryCollator(strength), p1, l1, p2, l2);
  2151. }
  2152. int rtlCompareVUnicodeVUnicode(UChar const * p1, UChar const * p2, char const * locale)
  2153. {
  2154. return rtlCompareUnicodeUnicode(rtlUnicodeStrlen(p1), p1, rtlUnicodeStrlen(p2), p2, locale);
  2155. }
  2156. int rtlCompareVUnicodeVUnicodeStrength(UChar const * p1, UChar const * p2, char const * locale, unsigned strength)
  2157. {
  2158. return rtlCompareUnicodeUnicodeStrength(rtlUnicodeStrlen(p1), p1, rtlUnicodeStrlen(p2), p2, locale, strength);
  2159. }
  2160. void rtlKeyUnicodeX(unsigned & tlen, void * & tgt, unsigned slen, const UChar * src, const char * locale)
  2161. {
  2162. while(slen && u_isUWhiteSpace(src[slen-1])) slen--;
  2163. UCollator * coll = queryRTLLocale(locale)->queryCollator();
  2164. tlen = ucol_getSortKey(coll, src, slen, 0, 0);
  2165. tgt = malloc(tlen);
  2166. ucol_getSortKey(coll, src, slen, (unsigned char *)tgt, tlen);
  2167. }
  2168. void rtlKeyUnicodeStrengthX(unsigned & tlen, void * & tgt, unsigned slen, const UChar * src, const char * locale, unsigned strength)
  2169. {
  2170. while(slen && u_isUWhiteSpace(src[slen-1])) slen--;
  2171. UCollator * coll = queryRTLLocale(locale)->queryCollator(strength);
  2172. tlen = ucol_getSortKey(coll, src, slen, 0, 0);
  2173. tgt = malloc(tlen);
  2174. ucol_getSortKey(coll, src, slen, (unsigned char *)tgt, tlen);
  2175. }
  2176. ECLRTL_API int rtlPrefixDiffStr(unsigned l1, const char * p1, unsigned l2, const char * p2)
  2177. {
  2178. unsigned len = l1 < l2 ? l1 : l2;
  2179. const byte * str1 = (const byte *)p1;
  2180. const byte * str2 = (const byte *)p2;
  2181. for (unsigned i=0; i<len; i++)
  2182. {
  2183. byte c1 = str1[i];
  2184. byte c2 = str2[i];
  2185. if (c1 != c2)
  2186. {
  2187. if (c1 < c2)
  2188. return -(int)(i+1);
  2189. else
  2190. return (int)(i+1);
  2191. }
  2192. }
  2193. if (l1 != l2)
  2194. return (l1 < l2) ? -(int)(len+1) : (int)(len + 1);
  2195. return 0;
  2196. }
  2197. //MORE: I'm not sure this can really be implemented....
  2198. ECLRTL_API int rtlPrefixDiffUnicode(unsigned l1, const UChar * p1, unsigned l2, const UChar * p2, char const * locale)
  2199. {
  2200. while(l1 && u_isUWhiteSpace(p1[l1-1])) l1--;
  2201. while(l2 && u_isUWhiteSpace(p2[l2-1])) l2--;
  2202. unsigned len = l1 < l2 ? l1 : l2;
  2203. for (unsigned i=0; i<len; i++)
  2204. {
  2205. if (p1[i] != p2[i])
  2206. {
  2207. int c = ucol_strcoll(queryRTLLocale(locale)->queryCollator(), p1+i, l1-i, p2+i, l2-i);
  2208. if (c < 0)
  2209. return -(int)(i+1);
  2210. else if (c > 0)
  2211. return (int)(i+1);
  2212. else
  2213. return 0; //weird!
  2214. }
  2215. }
  2216. if (l1 != l2)
  2217. return (l1 < l2) ? -(int)(len+1) : (int)(len + 1);
  2218. return 0;
  2219. }
  2220. //-----------------------------------------------------------------------------
  2221. void rtlStringToLower(size32_t l, char * t)
  2222. {
  2223. for (;l--;t++)
  2224. *t = tolower(*t);
  2225. }
  2226. void rtlStringToUpper(size32_t l, char * t)
  2227. {
  2228. for (;l--;t++)
  2229. *t = toupper(*t);
  2230. }
  2231. void rtlUnicodeToLower(size32_t l, UChar * t, char const * locale)
  2232. {
  2233. UChar * buff = (UChar *)malloc(l*2);
  2234. UErrorCode err = U_ZERO_ERROR;
  2235. u_strToLower(buff, l, t, l, locale, &err);
  2236. unicodeNormalizedCopy(buff, t, l);
  2237. }
  2238. void rtlUnicodeToLowerX(size32_t & lenout, UChar * & out, size32_t l, const UChar * t, char const * locale)
  2239. {
  2240. out = (UChar *)malloc(l*2);
  2241. lenout = l;
  2242. UErrorCode err = U_ZERO_ERROR;
  2243. u_strToLower(out, l, t, l, locale, &err);
  2244. }
  2245. void rtlUnicodeToUpper(size32_t l, UChar * t, char const * locale)
  2246. {
  2247. UChar * buff = (UChar *)malloc(l*2);
  2248. UErrorCode err = U_ZERO_ERROR;
  2249. u_strToUpper(buff, l, t, l, locale, &err);
  2250. unicodeNormalizedCopy(buff, t, l);
  2251. }
  2252. //=============================================================================
  2253. // Miscellaneous helper functions...
  2254. //-----------------------------------------------------------------------------
  2255. int searchTableStringN(unsigned count, const char * * table, unsigned width, const char * search)
  2256. {
  2257. int left = 0;
  2258. int right = count;
  2259. do
  2260. {
  2261. int mid = (left + right) >> 1;
  2262. int cmp = memcmp(search, table[mid], width);
  2263. if (cmp < 0)
  2264. right = mid;
  2265. else if (cmp > 0)
  2266. left = mid+1;
  2267. else
  2268. return mid;
  2269. } while (left < right);
  2270. return -1;
  2271. }
  2272. int rtlSearchTableStringN(unsigned count, char * * table, unsigned width, const char * search)
  2273. {
  2274. int left = 0;
  2275. int right = count;
  2276. do
  2277. {
  2278. int mid = (left + right) >> 1;
  2279. //we could use rtlCompareStrStr, but both source and target strings should
  2280. //be the correct length, so no point.... (unless new weird collation sequences)
  2281. //we would also need to call a different function for data
  2282. int cmp = memcmp(search, table[mid], width);
  2283. if (cmp < 0)
  2284. right = mid;
  2285. else if (cmp > 0)
  2286. left = mid+1;
  2287. else
  2288. return mid;
  2289. } while (left < right);
  2290. return -1;
  2291. }
  2292. int rtlSearchTableVStringN(unsigned count, char * * table, const char * search)
  2293. {
  2294. int left = 0;
  2295. int right = count;
  2296. do
  2297. {
  2298. int mid = (left + right) >> 1;
  2299. int cmp = strcmp(search, table[mid]);
  2300. if (cmp < 0)
  2301. right = mid;
  2302. else if (cmp > 0)
  2303. left = mid+1;
  2304. else
  2305. return mid;
  2306. } while (left < right);
  2307. return -1;
  2308. }
  2309. int rtlNewSearchDataTable(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search)
  2310. {
  2311. int left = 0;
  2312. int right = count;
  2313. do
  2314. {
  2315. int mid = (left + right) >> 1;
  2316. int cmp = rtlCompareDataData( width, search, elemlen, table[mid]);
  2317. if (cmp < 0)
  2318. right = mid;
  2319. else if (cmp > 0)
  2320. left = mid+1;
  2321. else {
  2322. return mid;
  2323. }
  2324. } while (left < right);
  2325. return -1;
  2326. }
  2327. int rtlNewSearchEStringTable(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search)
  2328. {
  2329. int left = 0;
  2330. int right = count;
  2331. do
  2332. {
  2333. int mid = (left + right) >> 1;
  2334. int cmp = rtlCompareEStrEStr( width, search, elemlen, table[mid]);
  2335. if (cmp < 0)
  2336. right = mid;
  2337. else if (cmp > 0)
  2338. left = mid+1;
  2339. else {
  2340. return mid;
  2341. }
  2342. } while (left < right);
  2343. return -1;
  2344. }
  2345. int rtlNewSearchQStringTable(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search)
  2346. {
  2347. int left = 0;
  2348. int right = count;
  2349. do
  2350. {
  2351. int mid = (left + right) >> 1;
  2352. int cmp = rtlCompareQStrQStr( width, search, elemlen, table[mid]);
  2353. if (cmp < 0)
  2354. right = mid;
  2355. else if (cmp > 0)
  2356. left = mid+1;
  2357. else {
  2358. return mid;
  2359. }
  2360. } while (left < right);
  2361. return -1;
  2362. }
  2363. int rtlNewSearchStringTable(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search)
  2364. {
  2365. int left = 0;
  2366. int right = count;
  2367. do
  2368. {
  2369. int mid = (left + right) >> 1;
  2370. int cmp = rtlCompareStrStr( width, search, elemlen, table[mid]);
  2371. if (cmp < 0)
  2372. right = mid;
  2373. else if (cmp > 0)
  2374. left = mid+1;
  2375. else {
  2376. return mid;
  2377. }
  2378. } while (left < right);
  2379. return -1;
  2380. }
  2381. int rtlNewSearchUnicodeTable(unsigned count, unsigned elemlen, UChar * * table, unsigned width, const UChar * search, const char * locale)
  2382. {
  2383. UCollator * coll = queryRTLLocale(locale)->queryCollator();
  2384. int left = 0;
  2385. int right = count;
  2386. size32_t trimWidth = rtlQuickTrimUnicode(width, search);
  2387. do
  2388. {
  2389. int mid = (left + right) >> 1;
  2390. size32_t elemTrimWidth = rtlQuickTrimUnicode(elemlen, table[mid]);
  2391. UCollationResult cmp = ucol_strcoll(coll, search, trimWidth, table[mid], elemTrimWidth);
  2392. if (cmp == UCOL_LESS)
  2393. right = mid;
  2394. else if (cmp == UCOL_GREATER)
  2395. left = mid+1;
  2396. else
  2397. return mid;
  2398. } while (left < right);
  2399. return -1;
  2400. }
  2401. int rtlNewSearchVUnicodeTable(unsigned count, UChar * * table, const UChar * search, const char * locale)
  2402. {
  2403. UCollator * coll = queryRTLLocale(locale)->queryCollator();
  2404. int left = 0;
  2405. int right = count;
  2406. do
  2407. {
  2408. int mid = (left + right) >> 1;
  2409. UCollationResult cmp = ucol_strcoll(coll, search, rtlUnicodeStrlen(search), table[mid], rtlUnicodeStrlen(table[mid]));
  2410. if (cmp == UCOL_LESS)
  2411. right = mid;
  2412. else if (cmp == UCOL_GREATER)
  2413. left = mid+1;
  2414. else
  2415. return mid;
  2416. } while (left < right);
  2417. return -1;
  2418. }
  2419. //-----------------------------------------------------------------------------
  2420. template <class T>
  2421. int rtlSearchIntegerTable(unsigned count, T * table, T search)
  2422. {
  2423. int left = 0;
  2424. int right = count;
  2425. do
  2426. {
  2427. int mid = (left + right) >> 1;
  2428. T midValue = table[mid];
  2429. if (search < midValue)
  2430. right = mid;
  2431. else if (search > midValue)
  2432. left = mid+1;
  2433. else
  2434. return mid;
  2435. } while (left < right);
  2436. return -1;
  2437. }
  2438. int rtlSearchTableInteger8(unsigned count, __int64 * table, __int64 search)
  2439. {
  2440. return rtlSearchIntegerTable(count, table, search);
  2441. }
  2442. int rtlSearchTableUInteger8(unsigned count, unsigned __int64 * table, unsigned __int64 search)
  2443. {
  2444. return rtlSearchIntegerTable(count, table, search);
  2445. }
  2446. int rtlSearchTableInteger4(unsigned count, int * table, int search)
  2447. {
  2448. return rtlSearchIntegerTable(count, table, search);
  2449. }
  2450. int rtlSearchTableUInteger4(unsigned count, unsigned * table, unsigned search)
  2451. {
  2452. return rtlSearchIntegerTable(count, table, search);
  2453. }
  2454. //-----------------------------------------------------------------------------
  2455. unsigned rtlCrc32(unsigned len, const void * buffer, unsigned crc)
  2456. {
  2457. return crc32((const char *)buffer, len, crc);
  2458. }
  2459. //=============================================================================
  2460. // EBCDIC helper functions...
  2461. static char ccsid819[] = "\
  2462. \000\001\002\003\234\011\206\177\227\215\216\013\014\015\016\017\
  2463. \020\021\022\023\235\205\010\207\030\031\222\217\034\035\036\037\
  2464. \200\201\202\203\204\012\027\033\210\211\212\213\214\005\006\007\
  2465. \220\221\026\223\224\225\226\004\230\231\232\233\024\025\236\032\
  2466. \040\240\342\344\340\341\343\345\347\361\242\056\074\050\053\174\
  2467. \046\351\352\353\350\355\356\357\354\337\041\044\052\051\073\254\
  2468. \055\057\302\304\300\301\303\305\307\321\246\054\045\137\076\077\
  2469. \370\311\312\313\310\315\316\317\314\140\072\043\100\047\075\042\
  2470. \330\141\142\143\144\145\146\147\150\151\253\273\360\375\376\261\
  2471. \260\152\153\154\155\156\157\160\161\162\252\272\346\270\306\244\
  2472. \265\176\163\164\165\166\167\170\171\172\241\277\320\335\336\256\
  2473. \136\243\245\267\251\247\266\274\275\276\133\135\257\250\264\327\
  2474. \173\101\102\103\104\105\106\107\110\111\255\364\366\362\363\365\
  2475. \175\112\113\114\115\116\117\120\121\122\271\373\374\371\372\377\
  2476. \134\367\123\124\125\126\127\130\131\132\262\324\326\322\323\325\
  2477. \060\061\062\063\064\065\066\067\070\071\263\333\334\331\332\237";
  2478. static unsigned char ccsid1047[] = "\
  2479. \000\001\002\003\234\011\206\177\227\215\216\013\014\015\016\017\
  2480. \020\021\022\023\235\012\010\207\030\031\222\217\034\035\036\037\
  2481. \200\201\202\203\204\205\027\033\210\211\212\213\214\005\006\007\
  2482. \220\221\026\223\224\225\226\004\230\231\232\233\024\025\236\032\
  2483. \040\240\342\344\340\341\343\345\347\361\242\056\074\050\053\174\
  2484. \046\351\352\353\350\355\356\357\354\337\041\044\052\051\073\136\
  2485. \055\057\302\304\300\301\303\305\307\321\246\054\045\137\076\077\
  2486. \370\311\312\313\310\315\316\317\314\140\072\043\100\047\075\042\
  2487. \330\141\142\143\144\145\146\147\150\151\253\273\360\375\376\261\
  2488. \260\152\153\154\155\156\157\160\161\162\252\272\346\270\306\244\
  2489. \265\176\163\164\165\166\167\170\171\172\241\277\320\133\336\256\
  2490. \254\243\245\267\251\247\266\274\275\276\335\250\257\135\264\327\
  2491. \173\101\102\103\104\105\106\107\110\111\255\364\366\362\363\365\
  2492. \175\112\113\114\115\116\117\120\121\122\271\373\374\371\372\377\
  2493. \134\367\123\124\125\126\127\130\131\132\262\324\326\322\323\325\
  2494. \060\061\062\063\064\065\066\067\070\071\263\333\334\331\332\237";
  2495. static unsigned char ccsid1047_rev[] = "\
  2496. \000\001\002\003\067\055\056\057\026\005\025\013\014\015\016\017\
  2497. \020\021\022\023\074\075\062\046\030\031\077\047\034\035\036\037\
  2498. \100\132\177\173\133\154\120\175\115\135\134\116\153\140\113\141\
  2499. \360\361\362\363\364\365\366\367\370\371\172\136\114\176\156\157\
  2500. \174\301\302\303\304\305\306\307\310\311\321\322\323\324\325\326\
  2501. \327\330\331\342\343\344\345\346\347\350\351\255\340\275\137\155\
  2502. \171\201\202\203\204\205\206\207\210\211\221\222\223\224\225\226\
  2503. \227\230\231\242\243\244\245\246\247\250\251\300\117\320\241\007\
  2504. \040\041\042\043\044\045\006\027\050\051\052\053\054\011\012\033\
  2505. \060\061\032\063\064\065\066\010\070\071\072\073\004\024\076\377\
  2506. \101\252\112\261\237\262\152\265\273\264\232\212\260\312\257\274\
  2507. \220\217\352\372\276\240\266\263\235\332\233\213\267\270\271\253\
  2508. \144\145\142\146\143\147\236\150\164\161\162\163\170\165\166\167\
  2509. \254\151\355\356\353\357\354\277\200\375\376\373\374\272\256\131\
  2510. \104\105\102\106\103\107\234\110\124\121\122\123\130\125\126\127\
  2511. \214\111\315\316\313\317\314\341\160\335\336\333\334\215\216\337";
  2512. void rtlEStrToStr(unsigned outlen, char *out, unsigned inlen, const char *in)
  2513. {
  2514. unsigned char *codepage = ccsid1047;
  2515. unsigned i,j;
  2516. unsigned lim = inlen;
  2517. if (lim>outlen) lim = outlen;
  2518. for (i=0;i<lim;i++)
  2519. {
  2520. j = in[i] & 0x00ff;
  2521. out[i] = codepage[j];
  2522. }
  2523. for (;i<outlen; i++)
  2524. out[i] = ' ';
  2525. }
  2526. void rtlStrToEStr(unsigned outlen, char *out, unsigned inlen, const char *in)
  2527. {
  2528. unsigned char *codepage = ccsid1047_rev;
  2529. unsigned i,j;
  2530. unsigned lim = inlen;
  2531. if (lim>outlen) lim = outlen;
  2532. for (i=0;i<lim;i++)
  2533. {
  2534. j = in[i] & 0x00ff;
  2535. out[i] = codepage[j];
  2536. }
  2537. for (;i<outlen; i++)
  2538. out[i] = codepage[' '];
  2539. }
  2540. //---------------------------------------------------------------------------
  2541. void rtlCodepageToUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
  2542. {
  2543. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2544. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2545. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2546. UErrorCode err = U_ZERO_ERROR;
  2547. unsigned len = ucnv_toUChars(conv, out, outlen, in, inlen, &err);
  2548. while(len<outlen) out[len++] = 0x0020;
  2549. unicodeEnsureIsNormalized(outlen, out);
  2550. }
  2551. void rtlCodepageToVUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
  2552. {
  2553. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2554. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2555. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2556. UErrorCode err = U_ZERO_ERROR;
  2557. unsigned len = ucnv_toUChars(conv, out, outlen-1, in, inlen, &err);
  2558. if (len >= outlen) len = outlen-1;
  2559. out[len] = 0;
  2560. vunicodeEnsureIsNormalized(outlen, out);
  2561. }
  2562. void rtlVCodepageToUnicode(unsigned outlen, UChar * out, char const * in, char const * codepage)
  2563. {
  2564. rtlCodepageToUnicode(outlen, out, strlen(in), in, codepage);
  2565. }
  2566. void rtlVCodepageToVUnicode(unsigned outlen, UChar * out, char const * in, char const * codepage)
  2567. {
  2568. rtlCodepageToVUnicode(outlen, out, strlen(in), in, codepage);
  2569. }
  2570. void rtlCodepageToUnicodeUnescape(unsigned outlen, UChar * out, unsigned inlen, char const * in, char const * codepage)
  2571. {
  2572. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2573. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2574. UnicodeString raw(in, inlen, codepage);
  2575. UnicodeString unescaped = raw.unescape();
  2576. UnicodeString normalized;
  2577. normalizeUnicodeString(unescaped, normalized);
  2578. if((unsigned)normalized.length()>outlen)
  2579. normalized.truncate(outlen);
  2580. else if((unsigned)normalized.length()<outlen)
  2581. normalized.padTrailing(outlen);
  2582. normalized.extract(0, outlen, out);
  2583. }
  2584. void rtlUnicodeToCodepage(unsigned outlen, char * out, unsigned inlen, UChar const * in, char const * codepage)
  2585. {
  2586. //If the unicode contains a character which doesn't exist in the destination codepage,
  2587. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2588. //no telling how your terminal may display this (I've seen a divide sign and a right
  2589. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2590. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2591. UErrorCode err = U_ZERO_ERROR;
  2592. unsigned len = ucnv_fromUChars(conv, (char *)out, outlen, in, inlen, &err);
  2593. if(len<outlen)
  2594. codepageBlankFill(codepage, out+len, outlen-len);
  2595. }
  2596. void rtlUnicodeToData(unsigned outlen, void * out, unsigned inlen, UChar const * in)
  2597. {
  2598. //If the unicode contains a character which doesn't exist in the destination codepage,
  2599. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2600. //no telling how your terminal may display this (I've seen a divide sign and a right
  2601. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2602. UConverter * conv = queryRTLUnicodeConverter(ASCII_LIKE_CODEPAGE)->query();
  2603. UErrorCode err = U_ZERO_ERROR;
  2604. unsigned len = ucnv_fromUChars(conv, (char *)out, outlen, in, inlen, &err);
  2605. if(len<outlen)
  2606. memset((char *)out+len, 0, outlen-len);
  2607. }
  2608. void rtlUnicodeToVCodepage(unsigned outlen, char * out, unsigned inlen, UChar const * in, char const * codepage)
  2609. {
  2610. //If the unicode contains a character which doesn't exist in the destination codepage,
  2611. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2612. //no telling how your terminal may display this (I've seen a divide sign and a right
  2613. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2614. UConverter * conv = queryRTLUnicodeConverter(ASCII_LIKE_CODEPAGE)->query();
  2615. UErrorCode err = U_ZERO_ERROR;
  2616. unsigned len = ucnv_fromUChars(conv, (char *)out, outlen-1, in, inlen, &err);
  2617. if (len >= outlen) len = outlen-1;
  2618. out[len] = 0;
  2619. }
  2620. void rtlVUnicodeToCodepage(unsigned outlen, char * out, UChar const * in, char const * codepage)
  2621. {
  2622. rtlUnicodeToCodepage(outlen, out, rtlUnicodeStrlen(in), in, codepage);
  2623. }
  2624. void rtlVUnicodeToData(unsigned outlen, void * out, UChar const * in)
  2625. {
  2626. rtlUnicodeToData(outlen, out, rtlUnicodeStrlen(in), in);
  2627. }
  2628. void rtlVUnicodeToVCodepage(unsigned outlen, char * out, UChar const * in, char const * codepage)
  2629. {
  2630. rtlUnicodeToVCodepage(outlen, out, rtlUnicodeStrlen(in), in, codepage);
  2631. }
  2632. void rtlCodepageToUnicodeX(unsigned & outlen, UChar * & out, unsigned inlen, char const * in, char const * codepage)
  2633. {
  2634. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2635. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2636. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2637. UErrorCode err = U_ZERO_ERROR;
  2638. outlen = ucnv_toUChars(conv, 0, 0, in, inlen, &err);
  2639. if(err==U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
  2640. out = (UChar *)malloc(outlen*2);
  2641. ucnv_toUChars(conv, out, outlen, in, inlen, &err);
  2642. }
  2643. UChar * rtlCodepageToVUnicodeX(unsigned inlen, char const * in, char const * codepage)
  2644. {
  2645. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2646. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2647. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2648. UErrorCode err = U_ZERO_ERROR;
  2649. unsigned outlen = ucnv_toUChars(conv, 0, 0, in, inlen, &err);
  2650. if(err == U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
  2651. UChar * out = (UChar *)malloc((outlen+1)*2);
  2652. ucnv_toUChars(conv, out, outlen, in, inlen, &err);
  2653. out[outlen] = 0x0000;
  2654. vunicodeEnsureIsNormalizedX(outlen, out);
  2655. return out;
  2656. }
  2657. void rtlVCodepageToUnicodeX(unsigned & outlen, UChar * & out, char const * in, char const * codepage)
  2658. {
  2659. rtlCodepageToUnicodeX(outlen, out, strlen(in), in, codepage);
  2660. }
  2661. UChar * rtlVCodepageToVUnicodeX(char const * in, char const * codepage)
  2662. {
  2663. return rtlCodepageToVUnicodeX(strlen(in), in, codepage);
  2664. }
  2665. void rtlCodepageToUnicodeXUnescape(unsigned & outlen, UChar * & out, unsigned inlen, char const * in, char const * codepage)
  2666. {
  2667. //If the input contains a character which doesn't exist in its claimed codepage, this will
  2668. //generate U+FFFD (substitution character). This most likely won't be displayed.
  2669. UnicodeString raw(in, inlen, codepage);
  2670. UnicodeString unescaped = raw.unescape();
  2671. UnicodeString normalized;
  2672. normalizeUnicodeString(unescaped, normalized);
  2673. outlen = normalized.length();
  2674. out = (UChar *)malloc(outlen*2);
  2675. normalized.extract(0, outlen, out);
  2676. }
  2677. void rtlUnicodeToCodepageX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in, char const * codepage)
  2678. {
  2679. //If the unicode contains a character which doesn't exist in the destination codepage,
  2680. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2681. //no telling how your terminal may display this (I've seen a divide sign and a right
  2682. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2683. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2684. UErrorCode err = U_ZERO_ERROR;
  2685. outlen = ucnv_fromUChars(conv, 0, 0, in, inlen, &err);
  2686. if(err == U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
  2687. out = (char *)malloc(outlen);
  2688. ucnv_fromUChars(conv, out, outlen, in, inlen, &err);
  2689. }
  2690. void rtlUnicodeToDataX(unsigned & outlen, void * & out, unsigned inlen, UChar const * in)
  2691. {
  2692. rtlUnicodeToCodepageX(outlen, (char * &)out, inlen, in, ASCII_LIKE_CODEPAGE);
  2693. }
  2694. char * rtlUnicodeToVCodepageX(unsigned inlen, UChar const * in, char const * codepage)
  2695. {
  2696. //If the unicode contains a character which doesn't exist in the destination codepage,
  2697. //this will generate the SUBstitute control code (ASCII: 0x1A, EBCDIC-US: 0x3F). There's
  2698. //no telling how your terminal may display this (I've seen a divide sign and a right
  2699. //arrow, amongst others). Perhaps we should ensure our display tools handle it neatly.
  2700. UConverter * conv = queryRTLUnicodeConverter(codepage)->query();
  2701. UErrorCode err = U_ZERO_ERROR;
  2702. unsigned outlen = ucnv_fromUChars(conv, 0, 0, in, inlen, &err);
  2703. if(err == U_BUFFER_OVERFLOW_ERROR) err = U_ZERO_ERROR;
  2704. char * out = (char *)malloc(outlen+1);
  2705. ucnv_fromUChars(conv, out, outlen, in, inlen, &err);
  2706. out[outlen] = 0x00;
  2707. return out;
  2708. }
  2709. void rtlVUnicodeToCodepageX(unsigned & outlen, char * & out, UChar const * in, char const * codepage)
  2710. {
  2711. rtlUnicodeToCodepageX(outlen, out, rtlUnicodeStrlen(in), in, codepage);
  2712. }
  2713. char * rtlVUnicodeToVCodepageX(UChar const * in, char const * codepage)
  2714. {
  2715. return rtlUnicodeToVCodepageX(rtlUnicodeStrlen(in), in, codepage);
  2716. }
  2717. void rtlStrToUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
  2718. {
  2719. rtlCodepageToUnicode(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2720. }
  2721. void rtlUnicodeToStr(unsigned outlen, char * out, unsigned inlen, UChar const * in)
  2722. {
  2723. rtlUnicodeToCodepage(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2724. }
  2725. void rtlStrToUnicodeX(unsigned & outlen, UChar * & out, unsigned inlen, char const * in)
  2726. {
  2727. rtlCodepageToUnicodeX(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2728. }
  2729. void rtlUnicodeToStrX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in)
  2730. {
  2731. rtlUnicodeToCodepageX(outlen, out, inlen, in, ASCII_LIKE_CODEPAGE);
  2732. }
  2733. void rtlUnicodeToEscapedStrX(unsigned & outlen, char * & out, unsigned inlen, UChar const * in)
  2734. {
  2735. StringBuffer outbuff;
  2736. escapeUnicode(inlen, in, outbuff);
  2737. outlen = outbuff.length();
  2738. out = (char *)malloc(outlen);
  2739. memcpy(out, outbuff.str(), outlen);
  2740. }
  2741. void rtlUnicodeToQuotedUTF8X(unsigned & outlen, char * & out, unsigned inlen, UChar const * in)
  2742. {
  2743. UnicodeString unicode(in, inlen);
  2744. unicode.findAndReplace("'", "\\'");
  2745. //pre-flight length - may be more efficient to guess length and only re-extract if guess no good, but what to guess?
  2746. outlen = unicode.extract(0, unicode.length(), 0, 0, UTF8_CODEPAGE);
  2747. out = (char *)malloc(outlen);
  2748. unicode.extract(0, unicode.length(), out, outlen, UTF8_CODEPAGE);
  2749. }
  2750. bool rtlCodepageToCodepage(unsigned outlen, char * out, unsigned inlen, char const * in, char const * outcodepage, char const * incodepage)
  2751. {
  2752. UConverter * inconv = queryRTLUnicodeConverter(incodepage)->query();
  2753. UConverter * outconv = queryRTLUnicodeConverter(outcodepage)->query();
  2754. UErrorCode err = U_ZERO_ERROR;
  2755. char * target = out;
  2756. ucnv_convertEx(outconv, inconv, &target, out+outlen, &in, in+inlen, NULL, NULL, NULL, NULL, TRUE, TRUE, &err);
  2757. unsigned len = target - out;
  2758. if(len < outlen)
  2759. codepageBlankFill(outcodepage, target, outlen-len);
  2760. return U_SUCCESS(err);
  2761. }
  2762. bool rtlCodepageToCodepageX(unsigned & outlen, char * & out, unsigned maxoutlen, unsigned inlen, char const * in, char const * outcodepage, char const * incodepage)
  2763. {
  2764. UConverter * inconv = queryRTLUnicodeConverter(incodepage)->query();
  2765. UConverter * outconv = queryRTLUnicodeConverter(outcodepage)->query();
  2766. UErrorCode err = U_ZERO_ERROR;
  2767. //GH->PG is there a better way of coding this with out temporary buffer?
  2768. char * tempBuffer = (char *)malloc(maxoutlen);
  2769. char * target = tempBuffer;
  2770. ucnv_convertEx(outconv, inconv, &target, tempBuffer+maxoutlen, &in, in+inlen, NULL, NULL, NULL, NULL, TRUE, TRUE, &err);
  2771. unsigned len = target - tempBuffer;
  2772. outlen = len;
  2773. if (len == maxoutlen)
  2774. out = tempBuffer;
  2775. else
  2776. {
  2777. out = (char *)realloc(tempBuffer, len);
  2778. if (!out)
  2779. out = tempBuffer;
  2780. }
  2781. return U_SUCCESS(err);
  2782. }
  2783. int rtlSingleUtf8ToCodepage(char * out, unsigned inlen, char const * in, char const * outcodepage)
  2784. {
  2785. if(!U8_IS_LEAD(*in))
  2786. return -1;
  2787. uint8_t trailbytes = U8_COUNT_TRAIL_BYTES(*in);
  2788. if(inlen < (unsigned)(trailbytes+1))
  2789. return -1;
  2790. if(!rtlCodepageToCodepage(1, out, trailbytes+1, in, outcodepage, UTF8_CODEPAGE))
  2791. return -1;
  2792. return static_cast<int>(trailbytes); //cast okay as is certainly 0--3
  2793. }
  2794. //---------------------------------------------------------------------------
  2795. void rtlStrToDataX(unsigned & tlen, void * & tgt, unsigned slen, const void * src)
  2796. {
  2797. void * data = malloc(slen);
  2798. memcpy(data, src, slen);
  2799. tgt = data;
  2800. tlen = slen;
  2801. }
  2802. void rtlStrToStrX(unsigned & tlen, char * & tgt, unsigned slen, const void * src)
  2803. {
  2804. char * data = (char *)malloc(slen);
  2805. memcpy(data, src, slen);
  2806. tgt = data;
  2807. tlen = slen;
  2808. }
  2809. char * rtlStrToVStrX(unsigned slen, const void * src)
  2810. {
  2811. char * data = (char *)malloc(slen+1);
  2812. memcpy(data, src, slen);
  2813. data[slen] = 0;
  2814. return data;
  2815. }
  2816. char * rtlEStrToVStrX(unsigned slen, const char * src)
  2817. {
  2818. char * astr = (char*)alloca(slen);
  2819. rtlEStrToStr(slen,astr,slen,src);
  2820. return rtlStrToVStrX(slen, astr);
  2821. }
  2822. void rtlEStrToStrX(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  2823. {
  2824. char * data = (char *)malloc(slen);
  2825. rtlEStrToStr(slen, data, slen, src);
  2826. tgt = data;
  2827. tlen = slen;
  2828. }
  2829. void rtlStrToEStrX(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
  2830. {
  2831. char * data = (char *)malloc(slen);
  2832. rtlStrToEStr(slen, data, slen, src);
  2833. tgt = data;
  2834. tlen = slen;
  2835. }
  2836. //---------------------------------------------------------------------------
  2837. // See http://www.isthe.com/chongo/tech/comp/fnv/index.html
  2838. #define FNV1_64_INIT HASH64_INIT
  2839. #define FNV_64_PRIME I64C(0x100000001b3U)
  2840. hash64_t rtlHash64Data(size32_t len, const void *buf, hash64_t hval)
  2841. {
  2842. const unsigned char *bp = (const unsigned char *)buf; /* start of buffer */
  2843. const unsigned char *be = bp + len; /* beyond end of buffer */
  2844. while (bp < be)
  2845. {
  2846. hval *= FNV_64_PRIME;
  2847. hval ^= *bp++;
  2848. }
  2849. return hval;
  2850. }
  2851. hash64_t rtlHash64VStr(const char *str, hash64_t hval)
  2852. {
  2853. const unsigned char *s = (const unsigned char *)str;
  2854. unsigned char c;
  2855. while ((c = *s++) != 0)
  2856. {
  2857. hval *= FNV_64_PRIME;
  2858. hval ^= c;
  2859. }
  2860. return hval;
  2861. }
  2862. hash64_t rtlHash64Unicode(unsigned length, UChar const * k, hash64_t initval)
  2863. {
  2864. return rtlHash64Data(length*2, k, initval);
  2865. }
  2866. hash64_t rtlHash64VUnicode(UChar const * k, hash64_t initval)
  2867. {
  2868. return rtlHash64Data(rtlUnicodeStrlen(k)*2, k, initval);
  2869. }
  2870. //---------------------------------------------------------------------------
  2871. // See http://www.isthe.com/chongo/tech/comp/fnv/index.html
  2872. #define FNV1_32_INIT HASH32_INIT
  2873. #define FNV_32_PRIME 0x1000193
  2874. unsigned rtlHash32Data(size32_t len, const void *buf, unsigned hval)
  2875. {
  2876. const unsigned char *bp = (const unsigned char *)buf; /* start of buffer */
  2877. const unsigned char *be = bp + len; /* beyond end of buffer */
  2878. while (bp < be)
  2879. {
  2880. hval *= FNV_32_PRIME;
  2881. hval ^= *bp++;
  2882. }
  2883. return hval;
  2884. }
  2885. unsigned rtlHash32VStr(const char *str, unsigned hval)
  2886. {
  2887. const unsigned char *s = (const unsigned char *)str;
  2888. unsigned char c;
  2889. while ((c = *s++) != 0)
  2890. {
  2891. hval *= FNV_32_PRIME;
  2892. hval ^= c;
  2893. }
  2894. return hval;
  2895. }
  2896. unsigned rtlHash32Unicode(unsigned length, UChar const * k, unsigned initval)
  2897. {
  2898. return rtlHash32Data(length*2, k, initval);
  2899. }
  2900. unsigned rtlHash32VUnicode(UChar const * k, unsigned initval)
  2901. {
  2902. return rtlHash32Data(rtlUnicodeStrlen(k)*2, k, initval);
  2903. }
  2904. //---------------------------------------------------------------------------
  2905. // Hash Helper functions
  2906. #define mix(a,b,c) \
  2907. { \
  2908. a -= b; a -= c; a ^= (c>>13); \
  2909. b -= c; b -= a; b ^= (a<<8); \
  2910. c -= a; c -= b; c ^= (b>>13); \
  2911. a -= b; a -= c; a ^= (c>>12); \
  2912. b -= c; b -= a; b ^= (a<<16); \
  2913. c -= a; c -= b; c ^= (b>>5); \
  2914. a -= b; a -= c; a ^= (c>>3); \
  2915. b -= c; b -= a; b ^= (a<<10); \
  2916. c -= a; c -= b; c ^= (b>>15); \
  2917. }
  2918. #define GETBYTE0(n) ((unsigned)k[n])
  2919. #define GETBYTE1(n) ((unsigned)k[n+1]<<8)
  2920. #define GETBYTE2(n) ((unsigned)k[n+2]<<16)
  2921. #define GETBYTE3(n) ((unsigned)k[n+3]<<24)
  2922. #define GETWORD(k,n) (GETBYTE0(n)+GETBYTE1(n)+GETBYTE2(n)+GETBYTE3(n))
  2923. // the above looks inefficient but the compiler optimizes well
  2924. // this hash looks slow but is about twice as quick as using our CRC table
  2925. // and gives gives better results
  2926. // (see paper at http://burtleburtle.net/bob/hash/evahash.html for more info)
  2927. unsigned rtlHashData( unsigned length, const void *_k, unsigned initval)
  2928. {
  2929. const unsigned char * k = (const unsigned char *)_k;
  2930. register unsigned a,b,c,len;
  2931. /* Set up the internal state */
  2932. len = length;
  2933. a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
  2934. c = initval; /* the previous hash value */
  2935. /*---------------------------------------- handle most of the key */
  2936. while (len >= 12)
  2937. {
  2938. a += GETWORD(k,0);
  2939. b += GETWORD(k,4);
  2940. c += GETWORD(k,8);
  2941. mix(a,b,c);
  2942. k += 12; len -= 12;
  2943. }
  2944. /*------------------------------------- handle the last 11 bytes */
  2945. c += length;
  2946. switch(len) /* all the case statements fall through */
  2947. {
  2948. case 11: c+=GETBYTE3(7);
  2949. case 10: c+=GETBYTE2(7);
  2950. case 9 : c+=GETBYTE1(7);
  2951. /* the first byte of c is reserved for the length */
  2952. case 8 : b+=GETBYTE3(4);
  2953. case 7 : b+=GETBYTE2(4);
  2954. case 6 : b+=GETBYTE1(4);
  2955. case 5 : b+=GETBYTE0(4);
  2956. case 4 : a+=GETBYTE3(0);
  2957. case 3 : a+=GETBYTE2(0);
  2958. case 2 : a+=GETBYTE1(0);
  2959. case 1 : a+=GETBYTE0(0);
  2960. /* case 0: nothing left to add */
  2961. }
  2962. mix(a,b,c);
  2963. /*-------------------------------------------- report the result */
  2964. return c;
  2965. }
  2966. unsigned rtlHashString( unsigned length, const char *_k, unsigned initval)
  2967. {
  2968. return rtlHashData(rtlTrimStrLen(length, _k), _k, initval);
  2969. }
  2970. unsigned rtlHashUnicode(unsigned length, UChar const * k, unsigned initval)
  2971. {
  2972. //Would make more sense to trim here.
  2973. return rtlHashData(length*2, k, initval);
  2974. }
  2975. unsigned rtlHashVStr(const char * k, unsigned initval)
  2976. {
  2977. return rtlHashData(rtlTrimVStrLen(k), k, initval);
  2978. }
  2979. unsigned rtlHashVUnicode(UChar const * k, unsigned initval)
  2980. {
  2981. return rtlHashData(rtlTrimVUnicodeStrLen(k)*2, k, initval);
  2982. }
  2983. #define GETWORDNC(k,n) ((GETBYTE0(n)+GETBYTE1(n)+GETBYTE2(n)+GETBYTE3(n))&0xdfdfdfdf)
  2984. unsigned rtlHashDataNC( unsigned length, const void * _k, unsigned initval)
  2985. {
  2986. const unsigned char * k = (const unsigned char *)_k;
  2987. register unsigned a,b,c,len;
  2988. /* Set up the internal state */
  2989. len = length;
  2990. a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
  2991. c = initval; /* the previous hash value */
  2992. /*---------------------------------------- handle most of the key */
  2993. while (len >= 12)
  2994. {
  2995. a += GETWORDNC(k,0);
  2996. b += GETWORDNC(k,4);
  2997. c += GETWORDNC(k,8);
  2998. mix(a,b,c);
  2999. k += 12; len -= 12;
  3000. }
  3001. /*------------------------------------- handle the last 11 bytes */
  3002. c += length;
  3003. switch(len) /* all the case statements fall through */
  3004. {
  3005. case 11: c+=GETBYTE3(7)&0xdf;
  3006. case 10: c+=GETBYTE2(7)&0xdf;
  3007. case 9 : c+=GETBYTE1(7)&0xdf;
  3008. /* the first byte of c is reserved for the length */
  3009. case 8 : b+=GETBYTE3(4)&0xdf;
  3010. case 7 : b+=GETBYTE2(4)&0xdf;
  3011. case 6 : b+=GETBYTE1(4)&0xdf;
  3012. case 5 : b+=GETBYTE0(4)&0xdf;
  3013. case 4 : a+=GETBYTE3(0)&0xdf;
  3014. case 3 : a+=GETBYTE2(0)&0xdf;
  3015. case 2 : a+=GETBYTE1(0)&0xdf;
  3016. case 1 : a+=GETBYTE0(0)&0xdf;
  3017. /* case 0: nothing left to add */
  3018. }
  3019. mix(a,b,c);
  3020. /*-------------------------------------------- report the result */
  3021. return c;
  3022. }
  3023. unsigned rtlHashVStrNC(const char * k, unsigned initval)
  3024. {
  3025. return rtlHashDataNC(strlen(k), k, initval);
  3026. }
  3027. //---------------------------------------------------------------------------
  3028. unsigned rtlCrcData( unsigned length, const void *_k, unsigned initval)
  3029. {
  3030. return crc32((const char *)_k, length, initval);
  3031. }
  3032. unsigned rtlCrcUnicode(unsigned length, UChar const * k, unsigned initval)
  3033. {
  3034. return crc32((char const *)k, length*2, initval);
  3035. }
  3036. unsigned rtlCrcVStr( const char * k, unsigned initval)
  3037. {
  3038. return crc32(k, strlen(k), initval);
  3039. }
  3040. unsigned rtlCrcVUnicode(UChar const * k, unsigned initval)
  3041. {
  3042. return crc32((char const *)k, rtlUnicodeStrlen(k)*2, initval);
  3043. }
  3044. //---------------------------------------------------------------------------
  3045. // MD5 processing:
  3046. void rtlHashMd5Init(size32_t sizestate, void * _state)
  3047. {
  3048. assertex(sizestate >= sizeof(md5_state_s));
  3049. md5_state_s * state = (md5_state_s *)_state;
  3050. md5_init(state);
  3051. }
  3052. void rtlHashMd5Data(size32_t len, const void *buf, size32_t sizestate, void * _state)
  3053. {
  3054. md5_state_s * state = (md5_state_s * )_state;
  3055. md5_append(state, (const md5_byte_t *)buf, len);
  3056. }
  3057. void rtlHashMd5Finish(void * out, size32_t sizestate, void * _state)
  3058. {
  3059. typedef md5_byte_t digest_t[16];
  3060. md5_state_s * state = (md5_state_s *)_state;
  3061. md5_finish(state, *(digest_t*)out);
  3062. }
  3063. //---------------------------------------------------------------------------
  3064. unsigned rtlRandom()
  3065. {
  3066. CriticalBlock block(random_Sect);
  3067. return random_->next();
  3068. }
  3069. void rtlSeedRandom(unsigned value)
  3070. {
  3071. CriticalBlock block(random_Sect);
  3072. random_->seed(value);
  3073. }
  3074. // These are all useful functions for testing - not really designed for other people to use them...
  3075. ECLRTL_API unsigned rtlTick()
  3076. {
  3077. return msTick();
  3078. }
  3079. ECLRTL_API bool rtlGPF()
  3080. {
  3081. char * x = 0;
  3082. *x = 0;
  3083. return false;
  3084. }
  3085. ECLRTL_API unsigned rtlSleep(unsigned delay)
  3086. {
  3087. MilliSleep(delay);
  3088. return 0;
  3089. }
  3090. ECLRTL_API unsigned rtlDisplay(unsigned len, const char * src)
  3091. {
  3092. LOG(MCprogress, unknownJob, "%.*s", len, src);
  3093. return 0;
  3094. }
  3095. void rtlEcho(unsigned len, const char * src)
  3096. {
  3097. printf("%.*s\n", len, src);
  3098. }
  3099. ECLRTL_API unsigned __int64 rtlNano()
  3100. {
  3101. return cycle_to_nanosec(get_cycles_now());
  3102. }
  3103. ECLRTL_API void rtlTestGetPrimes(unsigned & num, void * & data)
  3104. {
  3105. unsigned numPrimes = 6;
  3106. unsigned size = sizeof(unsigned) * numPrimes;
  3107. unsigned * primes = (unsigned *)malloc(size);
  3108. primes[0] = 1;
  3109. primes[1] = 2;
  3110. primes[2] = 3;
  3111. primes[3] = 5;
  3112. primes[4] = 7;
  3113. primes[5] = 11;
  3114. num = numPrimes;
  3115. data = primes;
  3116. }
  3117. ECLRTL_API void rtlTestFibList(bool & outAll, size32_t & outSize, void * & outData, bool inAll, size32_t inSize, const void * inData)
  3118. {
  3119. const unsigned * inList = (const unsigned *)inData;
  3120. unsigned * outList = (unsigned *)malloc(inSize);
  3121. unsigned * curOut = outList;
  3122. unsigned count = inSize / sizeof(*inList);
  3123. unsigned prev = 0;
  3124. for (unsigned i=0; i < count; i++)
  3125. {
  3126. unsigned next = *inList++;
  3127. *curOut++ = next + prev;
  3128. prev = next;
  3129. }
  3130. outAll = inAll;
  3131. outSize = inSize;
  3132. outData = outList;
  3133. }
  3134. unsigned rtlDelayReturn(unsigned value, unsigned sleepTime)
  3135. {
  3136. MilliSleep(sleepTime);
  3137. return value;
  3138. }
  3139. //---------------------------------------------------------------------------
  3140. class CRtlFailException : public CInterface, public IUserException
  3141. {
  3142. public:
  3143. CRtlFailException(int _code, char const * _msg) : code(_code) { msg = strdup(_msg); }
  3144. ~CRtlFailException() { free(msg); }
  3145. IMPLEMENT_IINTERFACE;
  3146. virtual int errorCode() const { return code; }
  3147. virtual StringBuffer & errorMessage(StringBuffer & buff) const { return buff.append(msg); }
  3148. virtual MessageAudience errorAudience() const { return MSGAUD_user; }
  3149. private:
  3150. int code;
  3151. char * msg;
  3152. };
  3153. void rtlFail(int code, const char *msg)
  3154. {
  3155. throw dynamic_cast<IUserException *>(new CRtlFailException(code, msg));
  3156. }
  3157. void rtlSysFail(int code, const char *msg)
  3158. {
  3159. throw MakeStringException(MSGAUD_user, code, "%s", msg);
  3160. }
  3161. void rtlReportRowOverflow(unsigned size, unsigned max)
  3162. {
  3163. throw MakeStringException(MSGAUD_user, 1000, "Row size %u exceeds the maximum size specified(%u)", size, max);
  3164. }
  3165. void rtlReportFieldOverflow(unsigned size, unsigned max, const char * name)
  3166. {
  3167. if (!name)
  3168. rtlReportRowOverflow(size, max);
  3169. else
  3170. throw MakeStringException(MSGAUD_user, 1000, "Assignment to field '%s' causes row overflow. Size %u exceeds the maximum size specified(%u)", name, size, max);
  3171. }
  3172. void rtlCheckRowOverflow(unsigned size, unsigned max)
  3173. {
  3174. if (size > max)
  3175. rtlReportRowOverflow(size, max);
  3176. }
  3177. void rtlCheckFieldOverflow(unsigned size, unsigned max, const char * field)
  3178. {
  3179. if (size > max)
  3180. rtlReportFieldOverflow(size, max, field);
  3181. }
  3182. void rtlFailUnexpected()
  3183. {
  3184. throw MakeStringException(MSGAUD_user, -1, "Unexpected code execution");
  3185. }
  3186. void rtlFailOnAssert()
  3187. {
  3188. throw MakeStringException(MSGAUD_user, -1, "Abort execution");
  3189. }
  3190. void rtlFailDivideByZero()
  3191. {
  3192. throw MakeStringException(MSGAUD_user, -1, "Division by zero");
  3193. }
  3194. //---------------------------------------------------------------------------
  3195. void deserializeRaw(unsigned recordSize, void *record, MemoryBuffer &in)
  3196. {
  3197. in.read(recordSize, record);
  3198. }
  3199. void deserializeDataX(size32_t & len, void * & data, MemoryBuffer &in)
  3200. {
  3201. free(data);
  3202. in.read(sizeof(len), &len);
  3203. data = malloc(len);
  3204. in.read(len, data);
  3205. }
  3206. void deserializeStringX(size32_t & len, char * & data, MemoryBuffer &in)
  3207. {
  3208. free(data);
  3209. in.read(sizeof(len), &len);
  3210. data = (char *)malloc(len);
  3211. in.read(len, data);
  3212. }
  3213. char * deserializeCStringX(MemoryBuffer &in)
  3214. {
  3215. unsigned len;
  3216. in.read(sizeof(len), &len);
  3217. char * data = (char *)malloc(len+1);
  3218. in.read(len, data);
  3219. data[len] = 0;
  3220. return data;
  3221. }
  3222. void deserializeUnicodeX(size32_t & len, UChar * & data, MemoryBuffer &in)
  3223. {
  3224. free(data);
  3225. in.read(sizeof(len), &len);
  3226. data = (UChar *)malloc(len*sizeof(UChar));
  3227. in.read(len*sizeof(UChar), data);
  3228. }
  3229. void deserializeUtf8X(size32_t & len, char * & data, MemoryBuffer &in)
  3230. {
  3231. free(data);
  3232. in.read(sizeof(len), &len);
  3233. unsigned size = rtlUtf8Size(len, in.readDirect(0));
  3234. data = (char *)malloc(size);
  3235. in.read(size, data);
  3236. }
  3237. UChar * deserializeVUnicodeX(MemoryBuffer &in)
  3238. {
  3239. unsigned len;
  3240. in.read(sizeof(len), &len);
  3241. UChar * data = (UChar *)malloc((len+1)*sizeof(UChar));
  3242. in.read(len*sizeof(UChar), data);
  3243. data[len] = 0;
  3244. return data;
  3245. }
  3246. void deserializeSet(bool & isAll, size32_t & len, void * & data, MemoryBuffer &in)
  3247. {
  3248. free(data);
  3249. in.read(isAll);
  3250. in.read(sizeof(len), &len);
  3251. data = malloc(len);
  3252. in.read(len, data);
  3253. }
  3254. void serializeRaw(unsigned recordSize, const void *record, MemoryBuffer &out)
  3255. {
  3256. out.append(recordSize, record);
  3257. }
  3258. void serializeDataX(size32_t len, const void * data, MemoryBuffer &out)
  3259. {
  3260. out.append(len).append(len, data);
  3261. }
  3262. void serializeStringX(size32_t len, const char * data, MemoryBuffer &out)
  3263. {
  3264. out.append(len).append(len, data);
  3265. }
  3266. void serializeCStringX(const char * data, MemoryBuffer &out)
  3267. {
  3268. unsigned len = strlen(data);
  3269. out.append(len).append(len, data);
  3270. }
  3271. void serializeUnicodeX(size32_t len, const UChar * data, MemoryBuffer &out)
  3272. {
  3273. out.append(len).append(len*sizeof(UChar), data);
  3274. }
  3275. void serializeUtf8X(size32_t len, const char * data, MemoryBuffer &out)
  3276. {
  3277. out.append(len).append(rtlUtf8Size(len, data), data);
  3278. }
  3279. void serializeSet(bool isAll, size32_t len, const void * data, MemoryBuffer &out)
  3280. {
  3281. out.append(isAll).append(len).append(len, data);
  3282. }
  3283. //---------------------------------------------------------------------------
  3284. ECLRTL_API void serializeFixedString(unsigned len, const char *field, MemoryBuffer &out)
  3285. {
  3286. out.append(len, field);
  3287. }
  3288. ECLRTL_API void serializeLPString(unsigned len, const char *field, MemoryBuffer &out)
  3289. {
  3290. out.append(len);
  3291. out.append(len, field);
  3292. }
  3293. ECLRTL_API void serializeVarString(const char *field, MemoryBuffer &out)
  3294. {
  3295. out.append(field);
  3296. }
  3297. ECLRTL_API void serializeBool(bool field, MemoryBuffer &out)
  3298. {
  3299. out.append(field);
  3300. }
  3301. ECLRTL_API void serializeFixedData(unsigned len, const void *field, MemoryBuffer &out)
  3302. {
  3303. out.append(len, field);
  3304. }
  3305. ECLRTL_API void serializeLPData(unsigned len, const void *field, MemoryBuffer &out)
  3306. {
  3307. out.append(len);
  3308. out.append(len, field);
  3309. }
  3310. ECLRTL_API void serializeInt1(signed char field, MemoryBuffer &out)
  3311. {
  3312. // MORE - why did overloading pick the int method for this???
  3313. // out.append(field);
  3314. out.appendEndian(sizeof(field), &field);
  3315. }
  3316. ECLRTL_API void serializeInt2(signed short field, MemoryBuffer &out)
  3317. {
  3318. out.appendEndian(sizeof(field), &field);
  3319. }
  3320. ECLRTL_API void serializeInt3(signed int field, MemoryBuffer &out)
  3321. {
  3322. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3323. out.appendEndian(3, &field);
  3324. #else
  3325. out.appendEndian(3, ((char *) &field) + 1);
  3326. #endif
  3327. }
  3328. ECLRTL_API void serializeInt4(signed int field, MemoryBuffer &out)
  3329. {
  3330. out.appendEndian(sizeof(field), &field);
  3331. }
  3332. ECLRTL_API void serializeInt5(signed __int64 field, MemoryBuffer &out)
  3333. {
  3334. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3335. out.appendEndian(5, &field);
  3336. #else
  3337. out.appendEndian(5, ((char *) &field) + 3);
  3338. #endif
  3339. }
  3340. ECLRTL_API void serializeInt6(signed __int64 field, MemoryBuffer &out)
  3341. {
  3342. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3343. out.appendEndian(6, &field);
  3344. #else
  3345. out.appendEndian(6, ((char *) &field) + 2);
  3346. #endif
  3347. }
  3348. ECLRTL_API void serializeInt7(signed __int64 field, MemoryBuffer &out)
  3349. {
  3350. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3351. out.appendEndian(7, &field);
  3352. #else
  3353. out.appendEndian(7, ((char *) &field) + 1);
  3354. #endif
  3355. }
  3356. ECLRTL_API void serializeInt8(signed __int64 field, MemoryBuffer &out)
  3357. {
  3358. out.appendEndian(sizeof(field), &field);
  3359. }
  3360. ECLRTL_API void serializeUInt1(unsigned char field, MemoryBuffer &out)
  3361. {
  3362. out.appendEndian(sizeof(field), &field);
  3363. }
  3364. ECLRTL_API void serializeUInt2(unsigned short field, MemoryBuffer &out)
  3365. {
  3366. out.appendEndian(sizeof(field), &field);
  3367. }
  3368. ECLRTL_API void serializeUInt3(unsigned int field, MemoryBuffer &out)
  3369. {
  3370. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3371. out.appendEndian(3, &field);
  3372. #else
  3373. out.appendEndian(3, ((char *) &field) + 1);
  3374. #endif
  3375. }
  3376. ECLRTL_API void serializeUInt4(unsigned int field, MemoryBuffer &out)
  3377. {
  3378. out.appendEndian(sizeof(field), &field);
  3379. }
  3380. ECLRTL_API void serializeUInt5(unsigned __int64 field, MemoryBuffer &out)
  3381. {
  3382. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3383. out.appendEndian(5, &field);
  3384. #else
  3385. out.appendEndian(5, ((char *) &field) + 3);
  3386. #endif
  3387. }
  3388. ECLRTL_API void serializeUInt6(unsigned __int64 field, MemoryBuffer &out)
  3389. {
  3390. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3391. out.appendEndian(6, &field);
  3392. #else
  3393. out.appendEndian(6, ((char *) &field) + 2);
  3394. #endif
  3395. }
  3396. ECLRTL_API void serializeUInt7(unsigned __int64 field, MemoryBuffer &out)
  3397. {
  3398. #if __BYTE_ORDER == __LITTLE_ENDIAN
  3399. out.appendEndian(7, &field);
  3400. #else
  3401. out.appendEndian(7, ((char *) &field) + 1);
  3402. #endif
  3403. }
  3404. ECLRTL_API void serializeUInt8(unsigned __int64 field, MemoryBuffer &out)
  3405. {
  3406. out.appendEndian(sizeof(field), &field);
  3407. }
  3408. ECLRTL_API void serializeReal4(float field, MemoryBuffer &out)
  3409. {
  3410. out.appendEndian(sizeof(field), &field);
  3411. }
  3412. ECLRTL_API void serializeReal8(double field, MemoryBuffer &out)
  3413. {
  3414. out.append(sizeof(field), &field);
  3415. }
  3416. //These maths functions can all have out of range arguments....
  3417. //---------------------------------------------------------------------------
  3418. ECLRTL_API double rtlLog10(double x)
  3419. {
  3420. if (x <= 0) return 0;
  3421. return log10(x);
  3422. }
  3423. ECLRTL_API double rtlLog(double x)
  3424. {
  3425. if (x <= 0) return 0;
  3426. return log(x);
  3427. }
  3428. ECLRTL_API double rtlSqrt(double x)
  3429. {
  3430. if (x < 0) return 0;
  3431. return sqrt(x);
  3432. }
  3433. ECLRTL_API double rtlACos(double x)
  3434. {
  3435. if (fabs(x) > 1) return 0;
  3436. return acos(x);
  3437. }
  3438. ECLRTL_API double rtlASin(double x)
  3439. {
  3440. if (fabs(x) > 1) return 0;
  3441. return asin(x);
  3442. }
  3443. //---------------------------------------------------------------------------
  3444. ECLRTL_API bool rtlIsValidReal(unsigned size, const void * data)
  3445. {
  3446. byte * bytes = (byte *)data;
  3447. //Valid unless it is a Nan, represented by exponent all 1's and non-zero mantissa (ignore the sign).
  3448. if (size == 4)
  3449. {
  3450. //sign(1) exponent(8) mantissa(23)
  3451. if (((bytes[3] & 0x7f) == 0x7f) && ((bytes[2] & 0x80) == 0x80))
  3452. {
  3453. if ((bytes[2] & 0x7f) != 0 || bytes[1] || bytes[0])
  3454. return false;
  3455. }
  3456. }
  3457. else if (size == 8)
  3458. {
  3459. //sign(1) exponent(11) mantissa(52)
  3460. if (((bytes[7] & 0x7f) == 0x7f) && ((bytes[6] & 0xF0) == 0xF0))
  3461. {
  3462. if ((bytes[6] & 0xF) || bytes[5] || bytes[4] || bytes[3] || bytes[2] || bytes[1] || bytes[0])
  3463. return false;
  3464. }
  3465. }
  3466. else
  3467. {
  3468. //sign(1) exponent(15) mantissa(64)
  3469. assertex(size==10);
  3470. if (((bytes[9] & 0x7f) == 0x7f) && (bytes[8] == 0xFF))
  3471. {
  3472. if (bytes[7] || bytes[6] || bytes[5] || bytes[4] || bytes[3] || bytes[2] || bytes[1] || bytes[0])
  3473. return false;
  3474. }
  3475. }
  3476. return true;
  3477. }
  3478. double rtlCreateRealNull()
  3479. {
  3480. union
  3481. {
  3482. byte data[8];
  3483. double r;
  3484. } u;
  3485. //Use a non-signaling NaN
  3486. memcpy(u.data, "\x01\x00\x00\x00\x00\x00\xF0\x7f", 8);
  3487. return u.r;
  3488. }
  3489. void rtlUnicodeToUnicode(size32_t outlen, UChar * out, size32_t inlen, UChar const *in)
  3490. {
  3491. if(inlen>outlen) inlen = outlen;
  3492. memcpy(out, in, inlen*2);
  3493. while(inlen<outlen)
  3494. out[inlen++] = 0x0020;
  3495. }
  3496. void rtlUnicodeToVUnicode(size32_t outlen, UChar * out, size32_t inlen, UChar const *in)
  3497. {
  3498. if((inlen>=outlen) && (outlen != 0)) inlen = outlen-1;
  3499. memcpy(out, in, inlen*2);
  3500. out[inlen] = 0x0000;
  3501. }
  3502. void rtlVUnicodeToUnicode(size32_t outlen, UChar * out, UChar const *in)
  3503. {
  3504. rtlUnicodeToUnicode(outlen, out, rtlUnicodeStrlen(in), in);
  3505. }
  3506. void rtlVUnicodeToVUnicode(size32_t outlen, UChar * out, UChar const *in)
  3507. {
  3508. rtlUnicodeToVUnicode(outlen, out, rtlUnicodeStrlen(in), in);
  3509. }
  3510. void rtlUnicodeToUnicodeX(unsigned & tlen, UChar * & tgt, unsigned slen, UChar const * src)
  3511. {
  3512. tgt = (UChar *)malloc(slen*2);
  3513. memcpy(tgt, src, slen*2);
  3514. tlen = slen;
  3515. }
  3516. UChar * rtlUnicodeToVUnicodeX(unsigned slen, UChar const * src)
  3517. {
  3518. UChar * data = (UChar *)malloc((slen+1)*2);
  3519. memcpy(data, src, slen*2);
  3520. data[slen] = 0x0000;
  3521. return data;
  3522. }
  3523. void rtlVUnicodeToUnicodeX(unsigned & tlen, UChar * & tgt, UChar const * src)
  3524. {
  3525. rtlUnicodeToUnicodeX(tlen, tgt, rtlUnicodeStrlen(src), src);
  3526. }
  3527. UChar * rtlVUnicodeToVUnicodeX(UChar const * src)
  3528. {
  3529. return rtlUnicodeToVUnicodeX(rtlUnicodeStrlen(src), src);
  3530. }
  3531. void rtlDecPushUnicode(size32_t len, UChar const * data)
  3532. {
  3533. char * buff = 0;
  3534. unsigned bufflen = 0;
  3535. rtlUnicodeToStrX(bufflen, buff, len, data);
  3536. DecPushString(bufflen, buff);
  3537. rtlFree(buff);
  3538. }
  3539. unsigned rtlUnicodeStrlen(UChar const * str)
  3540. {
  3541. return u_strlen(str);
  3542. }
  3543. //---------------------------------------------------------------------------
  3544. unsigned rtlUtf8Size(const void * data)
  3545. {
  3546. return readUtf8Size(data);
  3547. }
  3548. unsigned rtlUtf8Size(unsigned len, const void * _data)
  3549. {
  3550. const byte * data = (const byte *)_data;
  3551. size32_t offset = 0;
  3552. for (unsigned i=0; i< len; i++)
  3553. offset += readUtf8Size(data+offset);
  3554. return offset;
  3555. }
  3556. unsigned rtlUtf8Length(unsigned size, const void * _data)
  3557. {
  3558. const byte * data = (const byte *)_data;
  3559. size32_t length = 0;
  3560. for (unsigned offset=0; offset < size; offset += readUtf8Size(data+offset))
  3561. length++;
  3562. return length;
  3563. }
  3564. unsigned rtlUtf8Char(const void * data)
  3565. {
  3566. return readUtf8Char(data);
  3567. }
  3568. void rtlUtf8ToData(size32_t outlen, void * out, size32_t inlen, const char *in)
  3569. {
  3570. unsigned insize = rtlUtf8Size(inlen, in);
  3571. rtlCodepageToCodepage(outlen, (char *)out, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  3572. }
  3573. void rtlUtf8ToDataX(size32_t & outlen, void * & out, size32_t inlen, const char *in)
  3574. {
  3575. unsigned insize = rtlUtf8Size(inlen, in);
  3576. char * cout;
  3577. rtlCodepageToCodepageX(outlen, cout, inlen, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  3578. out = cout;
  3579. }
  3580. void rtlUtf8ToStr(size32_t outlen, char * out, size32_t inlen, const char *in)
  3581. {
  3582. unsigned insize = rtlUtf8Size(inlen, in);
  3583. rtlCodepageToCodepage(outlen, (char *)out, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  3584. }
  3585. void rtlUtf8ToStrX(size32_t & outlen, char * & out, size32_t inlen, const char *in)
  3586. {
  3587. unsigned insize = rtlUtf8Size(inlen, in);
  3588. rtlCodepageToCodepageX(outlen, out, inlen, insize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  3589. }
  3590. char * rtlUtf8ToVStr(size32_t inlen, const char *in)
  3591. {
  3592. unsigned utfSize = rtlUtf8Size(inlen, in);
  3593. char *ret = (char *) rtlMalloc(inlen+1);
  3594. rtlCodepageToCodepage(inlen, ret, utfSize, in, ASCII_LIKE_CODEPAGE, UTF8_CODEPAGE);
  3595. ret[inlen] = 0;
  3596. return ret;
  3597. }
  3598. void rtlDataToUtf8(size32_t outlen, char * out, size32_t inlen, const void *in)
  3599. {
  3600. rtlCodepageToCodepage(outlen*UTF8_MAXSIZE, (char *)out, inlen, (const char *)in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
  3601. }
  3602. void rtlDataToUtf8X(size32_t & outlen, char * & out, size32_t inlen, const void *in)
  3603. {
  3604. unsigned outsize;
  3605. rtlCodepageToCodepageX(outsize, out, inlen*UTF8_MAXSIZE, inlen, (const char *)in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
  3606. outlen = rtlUtf8Length(outsize, out);
  3607. }
  3608. void rtlStrToUtf8(size32_t outlen, char * out, size32_t inlen, const char *in)
  3609. {
  3610. rtlCodepageToCodepage(outlen*UTF8_MAXSIZE, (char *)out, inlen, in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
  3611. }
  3612. void rtlStrToUtf8X(size32_t & outlen, char * & out, size32_t inlen, const char *in)
  3613. {
  3614. unsigned outsize;
  3615. rtlCodepageToCodepageX(outsize, out, inlen*UTF8_MAXSIZE, inlen, in, UTF8_CODEPAGE, ASCII_LIKE_CODEPAGE);
  3616. outlen = rtlUtf8Length(outsize, out);
  3617. }
  3618. void rtlUtf8ToUtf8(size32_t outlen, char * out, size32_t inlen, const char *in)
  3619. {
  3620. //Packs as many characaters as it can into the target, but don't include any half characters
  3621. size32_t offset = 0;
  3622. size32_t outsize = outlen*UTF8_MAXSIZE;
  3623. for (unsigned i=0; i< inlen; i++)
  3624. {
  3625. unsigned nextSize = readUtf8Size(in+offset);
  3626. if (offset + nextSize > outsize)
  3627. break;
  3628. offset += nextSize;
  3629. }
  3630. memcpy(out, in, offset);
  3631. if (offset != outsize)
  3632. memset(out+offset, ' ', outsize-offset);
  3633. }
  3634. void rtlUtf8ToUtf8X(size32_t & outlen, char * & out, size32_t inlen, const char *in)
  3635. {
  3636. unsigned insize = rtlUtf8Size(inlen, in);
  3637. char * buffer = (char *)malloc(insize);
  3638. memcpy(buffer, in, insize);
  3639. outlen = inlen;
  3640. out = buffer;
  3641. }
  3642. static int rtlCompareUtf8Utf8ViaUnicode(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale)
  3643. {
  3644. rtlDataAttr uleft(llen*sizeof(UChar));
  3645. rtlDataAttr uright(rlen*sizeof(UChar));
  3646. rtlUtf8ToUnicode(llen, uleft.getustr(), llen, left);
  3647. rtlUtf8ToUnicode(rlen, uright.getustr(), rlen, right);
  3648. return rtlCompareUnicodeUnicode(llen, uleft.getustr(), rlen, uright.getustr(), locale);
  3649. }
  3650. int rtlCompareUtf8Utf8(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale)
  3651. {
  3652. //MORE: Do a simple comparison as long as there are no non->0x80 characters around
  3653. // fall back to a full unicode comparison if we hit one - or in the next character to allow for accents etc.
  3654. const byte * bleft = (const byte *)left;
  3655. const byte * bright = (const byte *)right;
  3656. unsigned len = llen > rlen ? rlen : llen;
  3657. for (unsigned i = 0; i < len; i++)
  3658. {
  3659. byte nextLeft = bleft[i];
  3660. byte nextRight = bright[i];
  3661. if (nextLeft >= 0x80 || nextRight >= 0x80)
  3662. return rtlCompareUtf8Utf8ViaUnicode(llen-i, left+i, rlen-i, right+i, locale);
  3663. if ((i+1 != len) && ((bleft[i+1] >= 0x80) || bright[i+1] >= 0x80))
  3664. return rtlCompareUtf8Utf8ViaUnicode(llen-i, left+i, rlen-i, right+i, locale);
  3665. if (nextLeft != nextRight)
  3666. return nextLeft - nextRight;
  3667. }
  3668. int diff = 0;
  3669. if (len != llen)
  3670. {
  3671. for (;(diff == 0) && (len != llen);len++)
  3672. diff = bleft[len] - ' ';
  3673. }
  3674. else if (len != rlen)
  3675. {
  3676. for (;(diff == 0) && (len != rlen);len++)
  3677. diff = ' ' - bright[len];
  3678. }
  3679. return diff;
  3680. }
  3681. int rtlCompareUtf8Utf8Strength(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale, unsigned strength)
  3682. {
  3683. //GH->PG Any better way of doing this? We could possible decide it was a binary comparison instead I guess.
  3684. rtlDataAttr uleft(llen*sizeof(UChar));
  3685. rtlDataAttr uright(rlen*sizeof(UChar));
  3686. rtlUtf8ToUnicode(llen, uleft.getustr(), llen, left);
  3687. rtlUtf8ToUnicode(rlen, uright.getustr(), rlen, right);
  3688. return rtlCompareUnicodeUnicodeStrength(llen, uleft.getustr(), rlen, uright.getustr(), locale, strength);
  3689. }
  3690. void rtlDecPushUtf8(size32_t len, const void * data)
  3691. {
  3692. DecPushString(len, (const char *)data); // good enough for the moment
  3693. }
  3694. bool rtlUtf8ToBool(size32_t inlen, const char * in)
  3695. {
  3696. return rtlStrToBool(inlen, in);
  3697. }
  3698. __int64 rtlUtf8ToInt(size32_t inlen, const char * in)
  3699. {
  3700. return rtlStrToInt8(inlen, in); // good enough for the moment
  3701. }
  3702. double rtlUtf8ToReal(size32_t inlen, const char * in)
  3703. {
  3704. return rtlStrToReal(inlen, in); // good enough for the moment
  3705. }
  3706. void rtlCodepageToUtf8(unsigned outlen, char * out, unsigned inlen, char const * in, char const * codepage)
  3707. {
  3708. rtlCodepageToCodepage(outlen*UTF8_MAXSIZE, (char *)out, inlen, in, UTF8_CODEPAGE, codepage);
  3709. }
  3710. void rtlCodepageToUtf8X(unsigned & outlen, char * & out, unsigned inlen, char const * in, char const * codepage)
  3711. {
  3712. unsigned outsize;
  3713. rtlCodepageToCodepageX(outsize, out, inlen*UTF8_MAXSIZE, inlen, in, UTF8_CODEPAGE, codepage);
  3714. outlen = rtlUtf8Length(outsize, out);
  3715. }
  3716. void rtlUtf8ToCodepage(unsigned outlen, char * out, unsigned inlen, char const * in, char const * codepage)
  3717. {
  3718. unsigned insize = rtlUtf8Size(inlen, in);
  3719. rtlCodepageToCodepage(outlen, (char *)out, insize, in, codepage, UTF8_CODEPAGE);
  3720. }
  3721. void rtlUtf8ToCodepageX(unsigned & outlen, char * & out, unsigned inlen, char const * in, char const * codepage)
  3722. {
  3723. unsigned insize = rtlUtf8Size(inlen, in);
  3724. rtlCodepageToCodepageX(outlen, out, inlen, insize, in, codepage, UTF8_CODEPAGE);
  3725. }
  3726. void rtlUnicodeToUtf8X(unsigned & outlen, char * & out, unsigned inlen, const UChar * in)
  3727. {
  3728. unsigned outsize;
  3729. rtlUnicodeToCodepageX(outsize, out, inlen, in, UTF8_CODEPAGE);
  3730. outlen = rtlUtf8Length(outsize, out);
  3731. }
  3732. void rtlUnicodeToUtf8(unsigned outlen, char * out, unsigned inlen, const UChar * in)
  3733. {
  3734. rtlUnicodeToCodepage(outlen*UTF8_MAXSIZE, out, inlen, in, UTF8_CODEPAGE);
  3735. }
  3736. void rtlUtf8ToUnicodeX(unsigned & outlen, UChar * & out, unsigned inlen, char const * in)
  3737. {
  3738. rtlCodepageToUnicodeX(outlen, out, rtlUtf8Size(inlen, in), in, UTF8_CODEPAGE);
  3739. }
  3740. void rtlUtf8ToUnicode(unsigned outlen, UChar * out, unsigned inlen, char const * in)
  3741. {
  3742. rtlCodepageToUnicode(outlen, out, rtlUtf8Size(inlen, in), in, UTF8_CODEPAGE);
  3743. }
  3744. ECLRTL_API void rtlUtf8SubStrFT(unsigned tlen, char * tgt, unsigned slen, char const * src, unsigned from, unsigned to)
  3745. {
  3746. normalizeFromTo(from, to);
  3747. clipFromTo(from, to, slen);
  3748. unsigned copylen = to - from;
  3749. unsigned startOffset = rtlUtf8Size(from, src);
  3750. rtlUtf8ToUtf8(tlen, tgt, copylen, src+startOffset);
  3751. }
  3752. ECLRTL_API void rtlUtf8SubStrFTX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from, unsigned to)
  3753. {
  3754. normalizeFromTo(from, to);
  3755. unsigned len = to - from;
  3756. clipFromTo(from, to, slen);
  3757. unsigned copylen = to - from;
  3758. unsigned fillSize = len - copylen;
  3759. unsigned startOffset = rtlUtf8Size(from, src);
  3760. unsigned copySize = rtlUtf8Size(copylen, src+startOffset);
  3761. char * buffer = (char *)malloc(copySize + fillSize);
  3762. memcpy(buffer, (byte *)src+startOffset, copySize);
  3763. if (fillSize)
  3764. memset(buffer+copySize, ' ', fillSize);
  3765. tlen = len;
  3766. tgt = buffer;
  3767. }
  3768. ECLRTL_API void rtlUtf8SubStrFX(unsigned & tlen, char * & tgt, unsigned slen, char const * src, unsigned from)
  3769. {
  3770. normalizeFromTo(from, slen);
  3771. unsigned len = slen - from;
  3772. unsigned startOffset = rtlUtf8Size(from, src);
  3773. unsigned copySize = rtlUtf8Size(len, src+startOffset);
  3774. char * buffer = (char *)malloc(copySize);
  3775. memcpy(buffer, (byte *)src+startOffset, copySize);
  3776. tlen = len;
  3777. tgt = buffer;
  3778. }
  3779. ECLRTL_API void rtlUtf8ToLower(size32_t l, char * t, char const * locale)
  3780. {
  3781. //Convert to lower case, but only go via unicode routines if we have to...
  3782. for (unsigned i=0; i< l; i++)
  3783. {
  3784. byte next = *t;
  3785. if (next >= 0x80)
  3786. {
  3787. //yuk, go via unicode to do the convertion.
  3788. unsigned len = l-i;
  3789. unsigned size = rtlUtf8Size(len, t+i);
  3790. rtlDataAttr unicode(len*sizeof(UChar));
  3791. rtlCodepageToUnicode(len, unicode.getustr(), size, t+i, UTF8_CODEPAGE);
  3792. rtlUnicodeToLower(len, unicode.getustr(), locale);
  3793. rtlUnicodeToCodepage(size, t+i, len, unicode.getustr(), UTF8_CODEPAGE);
  3794. return;
  3795. }
  3796. *t++ = tolower(next);
  3797. }
  3798. }
  3799. ECLRTL_API void rtlConcatUtf8(unsigned & tlen, char * * tgt, ...)
  3800. {
  3801. //Going to have to go via unicode because of normalization. However, it might be worth optimizing the case where no special characters are present
  3802. va_list args;
  3803. unsigned totalLength = 0;
  3804. unsigned maxLength = 0;
  3805. va_start(args, tgt);
  3806. for(;;)
  3807. {
  3808. unsigned len = va_arg(args, unsigned);
  3809. if(len+1==0)
  3810. break;
  3811. const char * str = va_arg(args, const char *);
  3812. totalLength += len;
  3813. if (len > maxLength)
  3814. maxLength = len;
  3815. }
  3816. va_end(args);
  3817. rtlDataAttr next(maxLength*sizeof(UChar));
  3818. rtlDataAttr result(totalLength*sizeof(UChar));
  3819. unsigned idx = 0;
  3820. UErrorCode err = U_ZERO_ERROR;
  3821. va_start(args, tgt);
  3822. for(;;)
  3823. {
  3824. unsigned len = va_arg(args, unsigned);
  3825. if(len+1==0)
  3826. break;
  3827. if (len)
  3828. {
  3829. const char * str = va_arg(args, const char *);
  3830. rtlUtf8ToUnicode(len, next.getustr(), len, str);
  3831. idx = unorm_concatenate(result.getustr(), idx, next.getustr(), len, result.getustr(), totalLength, UNORM_NFC, 0, &err);
  3832. }
  3833. }
  3834. va_end(args);
  3835. rtlUnicodeToUtf8X(tlen, *tgt, idx, result.getustr());
  3836. }
  3837. ECLRTL_API unsigned rtlConcatUtf8ToUtf8(unsigned tlen, char * tgt, unsigned offset, unsigned slen, const char * src)
  3838. {
  3839. //NB: Inconsistently with the other varieties, idx is a byte offset, not a character position to make the code more efficient.....
  3840. //normalization is done in the space filling routine at the end
  3841. unsigned ssize = rtlUtf8Size(slen, src);
  3842. assertex(tlen * UTF8_MAXSIZE >= offset+ssize);
  3843. memcpy(tgt+offset, src, ssize);
  3844. return offset + ssize;
  3845. }
  3846. ECLRTL_API void rtlUtf8SpaceFill(unsigned tlen, char * tgt, unsigned offset)
  3847. {
  3848. const byte * src = (const byte *)tgt;
  3849. for (unsigned i=0; i<offset; i++)
  3850. {
  3851. if (src[i] >= 0x80)
  3852. {
  3853. unsigned idx = rtlUtf8Length(offset, tgt);
  3854. rtlDataAttr unicode(idx*sizeof(UChar));
  3855. rtlUtf8ToUnicode(idx, unicode.getustr(), idx, tgt);
  3856. unicodeEnsureIsNormalized(idx, unicode.getustr());
  3857. rtlUnicodeToUtf8(tlen, tgt, idx, unicode.getustr());
  3858. return;
  3859. }
  3860. }
  3861. //no special characters=>easy route.
  3862. memset(tgt+offset, ' ', tlen*UTF8_MAXSIZE-offset);
  3863. }
  3864. ECLRTL_API unsigned rtlHash32Utf8(unsigned length, const char * k, unsigned initval)
  3865. {
  3866. return rtlHash32Data(rtlUtf8Size(length, k), k, initval);
  3867. }
  3868. ECLRTL_API unsigned rtlHashUtf8(unsigned length, const char * k, unsigned initval)
  3869. {
  3870. return rtlHashData(rtlUtf8Size(length, k), k, initval);
  3871. }
  3872. ECLRTL_API hash64_t rtlHash64Utf8(unsigned length, const char * k, hash64_t initval)
  3873. {
  3874. return rtlHash64Data(rtlUtf8Size(length, k), k, initval);
  3875. }
  3876. unsigned rtlCrcUtf8(unsigned length, const char * k, unsigned initval)
  3877. {
  3878. return rtlCrcData(rtlUtf8Size(length, k), k, initval);
  3879. }
  3880. int rtlNewSearchUtf8Table(unsigned count, unsigned elemlen, char * * table, unsigned width, const char * search, const char * locale)
  3881. {
  3882. //MORE: Hopelessly inefficient.... Should rethink - possibly introducing a class for doing string searching, and the Utf8 variety pre-converting the
  3883. //search strings into unicode.
  3884. int left = 0;
  3885. int right = count;
  3886. do
  3887. {
  3888. int mid = (left + right) >> 1;
  3889. int cmp = rtlCompareUtf8Utf8(width, search, elemlen, table[mid], locale);
  3890. if (cmp < 0)
  3891. right = mid;
  3892. else if (cmp > 0)
  3893. left = mid+1;
  3894. else
  3895. return mid;
  3896. } while (left < right);
  3897. return -1;
  3898. }
  3899. //---------------------------------------------------------------------------
  3900. #ifdef _USE_BOOST_REGEX
  3901. class CStrRegExprFindInstance : implements IStrRegExprFindInstance
  3902. {
  3903. private:
  3904. bool matched;
  3905. const boost::regex * regEx;
  3906. boost::cmatch subs;
  3907. char * sample; //only required if findstr/findvstr will be called
  3908. public:
  3909. CStrRegExprFindInstance(const boost::regex * _regEx, const char * _str, size32_t _from, size32_t _len, bool _keep)
  3910. : regEx(_regEx)
  3911. {
  3912. matched = false;
  3913. sample = NULL;
  3914. try
  3915. {
  3916. if (_keep)
  3917. {
  3918. sample = (char *)malloc(_len + 1); //required for findstr
  3919. memcpy(sample, _str + _from, _len);
  3920. sample[_len] = (char)NULL;
  3921. matched = boost::regex_search(sample, subs, *regEx);
  3922. }
  3923. else
  3924. {
  3925. matched = boost::regex_search(_str + _from, _str + _len, subs, *regEx);
  3926. }
  3927. }
  3928. catch (const std::runtime_error & e)
  3929. {
  3930. throw MakeStringException(0, "Error in regex search: %s (regex: %s)", e.what(), regEx->str().c_str());
  3931. }
  3932. }
  3933. ~CStrRegExprFindInstance() //CAVEAT non-virtual destructor !
  3934. {
  3935. free(sample);
  3936. }
  3937. //IStrRegExprFindInstance
  3938. bool found() const { return matched; }
  3939. void getMatchX(unsigned & outlen, char * & out, unsigned n = 0) const
  3940. {
  3941. if (matched && (n < subs.size()))
  3942. {
  3943. outlen = subs[n].second - subs[n].first;
  3944. out = (char *)malloc(outlen);
  3945. memcpy(out, subs[n].first, outlen);
  3946. }
  3947. else
  3948. {
  3949. outlen = 0;
  3950. out = NULL;
  3951. }
  3952. }
  3953. char const * findvstr(unsigned outlen, char * out, unsigned n = 0)
  3954. {
  3955. if (matched && (n < subs.size()))
  3956. {
  3957. unsigned sublen = subs[n].second - subs[n].first;
  3958. if (sublen >= outlen)
  3959. sublen = outlen - 1;
  3960. memcpy(out, subs[n].first, sublen);
  3961. out[sublen] = 0;
  3962. }
  3963. else
  3964. {
  3965. out[0] = 0;
  3966. }
  3967. return out;
  3968. }
  3969. };
  3970. //---------------------------------------------------------------------------
  3971. class CCompiledStrRegExpr : implements ICompiledStrRegExpr
  3972. {
  3973. private:
  3974. boost::regex regEx;
  3975. public:
  3976. CCompiledStrRegExpr(const char * _regExp, bool _isCaseSensitive = false)
  3977. {
  3978. try
  3979. {
  3980. if (_isCaseSensitive)
  3981. regEx.assign(_regExp, boost::regbase::perl);
  3982. else
  3983. regEx.assign(_regExp, boost::regbase::perl | boost::regbase::icase);
  3984. }
  3985. catch(const boost::bad_expression & e)
  3986. {
  3987. StringBuffer msg;
  3988. msg.append("Bad regular expression: ").append(e.what()).append(": ").append(_regExp);
  3989. rtlFail(0, msg.str()); //throws
  3990. }
  3991. }
  3992. //ICompiledStrRegExpr
  3993. void replace(size32_t & outlen, char * & out, size32_t slen, char const * str, size32_t rlen, char const * replace) const
  3994. {
  3995. std::string src(str, str + slen);
  3996. std::string fmt(replace, replace + rlen);
  3997. std::string tgt;
  3998. try
  3999. {
  4000. // tgt = boost::regex_merge(src, cre->regEx, fmt, boost::format_perl); //Algorithm regex_merge has been renamed regex_replace, existing code will continue to compile, but new code should use regex_replace instead.
  4001. tgt = boost::regex_replace(src, regEx, fmt, boost::format_perl);
  4002. }
  4003. catch(const std::runtime_error & e)
  4004. {
  4005. throw MakeStringException(0, "Error in regex replace: %s (regex: %s)", e.what(), regEx.str().c_str());
  4006. }
  4007. outlen = tgt.length();
  4008. out = (char *)malloc(outlen);
  4009. memcpy(out, tgt.data(), outlen);
  4010. }
  4011. IStrRegExprFindInstance * find(const char * str, size32_t from, size32_t len, bool needToKeepSearchString) const
  4012. {
  4013. CStrRegExprFindInstance * findInst = new CStrRegExprFindInstance(&regEx, str, from, len, needToKeepSearchString);
  4014. return findInst;
  4015. }
  4016. };
  4017. //---------------------------------------------------------------------------
  4018. ECLRTL_API ICompiledStrRegExpr * rtlCreateCompiledStrRegExpr(const char * regExpr, bool isCaseSensitive)
  4019. {
  4020. CCompiledStrRegExpr * expr = new CCompiledStrRegExpr(regExpr, isCaseSensitive);
  4021. return expr;
  4022. }
  4023. ECLRTL_API void rtlDestroyCompiledStrRegExpr(ICompiledStrRegExpr * compiledExpr)
  4024. {
  4025. if (compiledExpr)
  4026. delete (CCompiledStrRegExpr*)compiledExpr;
  4027. }
  4028. ECLRTL_API void rtlDestroyStrRegExprFindInstance(IStrRegExprFindInstance * findInst)
  4029. {
  4030. if (findInst)
  4031. delete (CStrRegExprFindInstance*)findInst;
  4032. }
  4033. //---------------------------------------------------------------------------
  4034. // RegEx Compiler for unicode strings
  4035. class CUStrRegExprFindInstance : implements IUStrRegExprFindInstance
  4036. {
  4037. private:
  4038. bool matched;
  4039. RegexMatcher * matcher;
  4040. UnicodeString sample;
  4041. unsigned matchedSize;
  4042. public:
  4043. CUStrRegExprFindInstance(RegexMatcher * _matcher, const UChar * _str, size32_t _from, size32_t _len)
  4044. : matcher(_matcher)
  4045. {
  4046. matched = false;
  4047. sample.setTo(_str + _from, _len);
  4048. matcher->reset(sample);
  4049. matched = matcher->find();
  4050. if (matched)
  4051. matchedSize = (unsigned)matcher->groupCount() + 1;
  4052. }
  4053. //IUStrRegExprFindInstance
  4054. bool found() const { return matched; }
  4055. void getMatchX(unsigned & outlen, UChar * & out, unsigned n = 0) const
  4056. {
  4057. if(matched && (n < matchedSize))
  4058. {
  4059. assertex(matcher);
  4060. UErrorCode uerr = U_ZERO_ERROR;
  4061. int32_t start = n ? matcher->start(n, uerr) : matcher->start(uerr);
  4062. int32_t end = n ? matcher->end(n, uerr) : matcher->end(uerr);
  4063. outlen = end - start;
  4064. out = (UChar *)malloc(outlen*2);
  4065. sample.extract(start, outlen, out);
  4066. }
  4067. else
  4068. {
  4069. outlen = 0;
  4070. out = NULL;
  4071. }
  4072. }
  4073. UChar const * findvstr(unsigned outlen, UChar * out, unsigned n = 0)
  4074. {
  4075. if(matched && (n < matchedSize))
  4076. {
  4077. assertex(matcher);
  4078. UErrorCode uerr = U_ZERO_ERROR;
  4079. int32_t start = n ? matcher->start(n, uerr) : matcher->start(uerr);
  4080. int32_t end = n ? matcher->end(n, uerr) : matcher->end(uerr);
  4081. unsigned sublen = end - start;
  4082. if(sublen >= outlen)
  4083. sublen = outlen - 1;
  4084. sample.extract(start, sublen, out);
  4085. out[sublen] = 0;
  4086. }
  4087. else
  4088. {
  4089. out[0] = 0;
  4090. }
  4091. return out;
  4092. }
  4093. };
  4094. //---------------------------------------------------------------------------
  4095. class CCompiledUStrRegExpr : implements ICompiledUStrRegExpr
  4096. {
  4097. private:
  4098. RegexPattern * pattern;
  4099. RegexMatcher * matcher;
  4100. public:
  4101. CCompiledUStrRegExpr(const UChar * _UregExp, bool _isCaseSensitive = false)
  4102. {
  4103. UErrorCode uerr = U_ZERO_ERROR;
  4104. UParseError uperr;
  4105. if (_isCaseSensitive)
  4106. pattern = RegexPattern::compile(_UregExp, uperr, uerr);
  4107. else
  4108. pattern = RegexPattern::compile(_UregExp, UREGEX_CASE_INSENSITIVE, uperr, uerr);
  4109. matcher = pattern->matcher(uerr);
  4110. if (U_FAILURE(uerr))
  4111. {
  4112. char * expAscii;
  4113. unsigned expAsciiLen;
  4114. rtlUnicodeToEscapedStrX(expAsciiLen, expAscii, rtlUnicodeStrlen(_UregExp), _UregExp);
  4115. StringBuffer msg;
  4116. msg.append("Bad regular expression: ").append(u_errorName(uerr)).append(": ").append(expAsciiLen, expAscii);
  4117. rtlFree(expAscii);
  4118. delete matcher;
  4119. delete pattern;
  4120. matcher = 0;
  4121. pattern = 0;
  4122. rtlFail(0, msg.str()); //throws
  4123. }
  4124. }
  4125. ~CCompiledUStrRegExpr()
  4126. {
  4127. if (matcher)
  4128. delete matcher;
  4129. if (pattern)
  4130. delete pattern;
  4131. }
  4132. void replace(size32_t & outlen, UChar * & out, size32_t slen, const UChar * str, size32_t rlen, UChar const * replace) const
  4133. {
  4134. UnicodeString const src(str, slen);
  4135. UErrorCode err = U_ZERO_ERROR;
  4136. RegexMatcher * replacer = pattern->matcher(src, err);
  4137. UnicodeString const fmt(replace, rlen);
  4138. UnicodeString const tgt = replacer->replaceAll(fmt, err);
  4139. outlen = tgt.length();
  4140. out = (UChar *)malloc(outlen*2);
  4141. tgt.extract(0, outlen, out);
  4142. delete replacer;
  4143. }
  4144. IUStrRegExprFindInstance * find(const UChar * str, size32_t from, size32_t len) const
  4145. {
  4146. CUStrRegExprFindInstance * findInst = new CUStrRegExprFindInstance(matcher, str, from, len);
  4147. return findInst;
  4148. }
  4149. };
  4150. //---------------------------------------------------------------------------
  4151. ECLRTL_API ICompiledUStrRegExpr * rtlCreateCompiledUStrRegExpr(const UChar * regExpr, bool isCaseSensitive)
  4152. {
  4153. CCompiledUStrRegExpr * expr = new CCompiledUStrRegExpr(regExpr, isCaseSensitive);
  4154. return expr;
  4155. }
  4156. ECLRTL_API void rtlDestroyCompiledUStrRegExpr(ICompiledUStrRegExpr * compiledExpr)
  4157. {
  4158. if (compiledExpr)
  4159. delete (CCompiledUStrRegExpr*)compiledExpr;
  4160. }
  4161. ECLRTL_API void rtlDestroyUStrRegExprFindInstance(IUStrRegExprFindInstance * findInst)
  4162. {
  4163. if (findInst)
  4164. delete (CUStrRegExprFindInstance*)findInst;
  4165. }
  4166. #else // _USE_BOOST_REGEX not set
  4167. ECLRTL_API ICompiledStrRegExpr * rtlCreateCompiledStrRegExpr(const char * regExpr, bool isCaseSensitive)
  4168. {
  4169. UNIMPLEMENTED_X("Boost regex disabled");
  4170. }
  4171. ECLRTL_API void rtlDestroyCompiledStrRegExpr(ICompiledStrRegExpr * compiledExpr)
  4172. {
  4173. }
  4174. ECLRTL_API void rtlDestroyStrRegExprFindInstance(IStrRegExprFindInstance * findInst)
  4175. {
  4176. }
  4177. ECLRTL_API ICompiledUStrRegExpr * rtlCreateCompiledUStrRegExpr(const UChar * regExpr, bool isCaseSensitive)
  4178. {
  4179. UNIMPLEMENTED_X("Boost regex disabled");
  4180. }
  4181. ECLRTL_API void rtlDestroyCompiledUStrRegExpr(ICompiledUStrRegExpr * compiledExpr)
  4182. {
  4183. }
  4184. ECLRTL_API void rtlDestroyUStrRegExprFindInstance(IUStrRegExprFindInstance * findInst)
  4185. {
  4186. }
  4187. #endif
  4188. //---------------------------------------------------------------------------
  4189. ECLRTL_API int rtlQueryLocalFailCode(IException * e)
  4190. {
  4191. return e->errorCode();
  4192. }
  4193. ECLRTL_API void rtlGetLocalFailMessage(size32_t & len, char * & text, IException * e, const char * tag)
  4194. {
  4195. rtlExceptionExtract(len, text, e, tag);
  4196. }
  4197. ECLRTL_API void rtlFreeException(IException * e)
  4198. {
  4199. e->Release();
  4200. }
  4201. //---------------------------------------------------------------------------
  4202. //Generally any calls to this function have also checked that the length(trim(str)) <= fieldLen, so exceptions should only occur if compareLen > fieldLen
  4203. //However, function can now also handle the exception case.
  4204. ECLRTL_API void rtlCreateRange(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str, byte fill, byte pad)
  4205. {
  4206. //
  4207. if (compareLen > fieldLen)
  4208. {
  4209. if ((int)compareLen >= 0)
  4210. {
  4211. //x[1..m] = y, m is larger than fieldLen, so truncate to fieldLen
  4212. compareLen = fieldLen;
  4213. }
  4214. else
  4215. compareLen = 0; // probably m[1..-1] or something silly
  4216. }
  4217. if (len > compareLen)
  4218. {
  4219. while ((len > compareLen) && (str[len-1] == pad))
  4220. len--;
  4221. //so change the search range to FF,FF,FF .. 00.00.00 which will then never match.
  4222. if (len > compareLen)
  4223. {
  4224. compareLen = 0;
  4225. fill = (fill == 0) ? 255 : 0;
  4226. }
  4227. }
  4228. outlen = fieldLen;
  4229. out = (char *)malloc(fieldLen);
  4230. if (len >= compareLen)
  4231. memcpy(out, str, compareLen);
  4232. else
  4233. {
  4234. memcpy(out, str, len);
  4235. memset(out+len, pad, compareLen-len);
  4236. }
  4237. memset(out + compareLen, fill, fieldLen-compareLen);
  4238. }
  4239. ECLRTL_API void rtlCreateStrRangeLow(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
  4240. {
  4241. rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 0, ' ');
  4242. }
  4243. ECLRTL_API void rtlCreateStrRangeHigh(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
  4244. {
  4245. rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 255, ' ');
  4246. }
  4247. ECLRTL_API void rtlCreateDataRangeLow(size32_t & outlen, void * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const void * str)
  4248. {
  4249. rtlCreateRange(outlen, *(char * *)&out, fieldLen, compareLen, len, (const char *)str, 0, 0);
  4250. }
  4251. ECLRTL_API void rtlCreateDataRangeHigh(size32_t & outlen, void * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const void * str)
  4252. {
  4253. rtlCreateRange(outlen, *(char * *)&out, fieldLen, compareLen, len, (const char *)str, 255, 0);
  4254. }
  4255. ECLRTL_API void rtlCreateRangeLow(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
  4256. {
  4257. rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 0, ' ');
  4258. }
  4259. ECLRTL_API void rtlCreateRangeHigh(size32_t & outlen, char * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const char * str)
  4260. {
  4261. rtlCreateRange(outlen, out, fieldLen, compareLen, len, str, 255, ' ');
  4262. }
  4263. ECLRTL_API void rtlCreateUnicodeRange(size32_t & outlen, UChar * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const UChar * str, byte fill)
  4264. {
  4265. //Same as function above!
  4266. if (compareLen > fieldLen)
  4267. {
  4268. if ((int)compareLen >= 0)
  4269. {
  4270. //x[1..m] = y, m is larger than fieldLen, so truncate to fieldLen
  4271. compareLen = fieldLen;
  4272. }
  4273. else
  4274. compareLen = 0; // probably m[1..-1] or something silly
  4275. }
  4276. if (len > compareLen)
  4277. {
  4278. while ((len > compareLen) && (str[len-1] == ' '))
  4279. len--;
  4280. //so change the search range to FF,FF,FF .. 00.00.00 which will then never match.
  4281. if (len > compareLen)
  4282. {
  4283. compareLen = 0;
  4284. fill = (fill == 0) ? 255 : 0;
  4285. }
  4286. }
  4287. outlen = fieldLen;
  4288. out = (UChar *)malloc(fieldLen*sizeof(UChar));
  4289. if (len >= compareLen)
  4290. memcpy(out, str, compareLen*sizeof(UChar));
  4291. else
  4292. {
  4293. memcpy(out, str, len * sizeof(UChar));
  4294. while (len != compareLen)
  4295. out[len++] = ' ';
  4296. }
  4297. memset(out + compareLen, fill, (fieldLen-compareLen) * sizeof(UChar));
  4298. }
  4299. ECLRTL_API void rtlCreateUnicodeRangeLow(size32_t & outlen, UChar * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const UChar * str)
  4300. {
  4301. rtlCreateUnicodeRange(outlen, out, fieldLen, compareLen, len, str, 0x00);
  4302. }
  4303. ECLRTL_API void rtlCreateUnicodeRangeHigh(size32_t & outlen, UChar * & out, unsigned fieldLen, unsigned compareLen, size32_t len, const UChar * str)
  4304. {
  4305. rtlCreateUnicodeRange(outlen, out, fieldLen, compareLen, len, str, 0xFF);
  4306. }
  4307. //---------------------------------------------------------------------------
  4308. ECLRTL_API unsigned rtlCountRows(size32_t len, const void * data, IRecordSize * rs)
  4309. {
  4310. if (rs->isFixedSize())
  4311. return len / rs->getFixedSize();
  4312. unsigned count = 0;
  4313. while (len)
  4314. {
  4315. size32_t thisLen = rs->getRecordSize(data);
  4316. data = (byte *)data + thisLen;
  4317. if (thisLen > len)
  4318. throw MakeStringException(0, "Invalid raw data");
  4319. len -= thisLen;
  4320. count++;
  4321. }
  4322. return count;
  4323. }
  4324. //---------------------------------------------------------------------------
  4325. ECLRTL_API size32_t rtlCountToSize(unsigned count, const void * data, IRecordSize * rs)
  4326. {
  4327. if (rs->isFixedSize())
  4328. return count * rs->getFixedSize();
  4329. unsigned size = 0;
  4330. for (unsigned i=0;i<count;i++)
  4331. {
  4332. size32_t thisLen = rs->getRecordSize(data);
  4333. data = (byte *)data + thisLen;
  4334. size += thisLen;
  4335. }
  4336. return size;
  4337. }
  4338. //---------------------------------------------------------------------------
  4339. class rtlCodepageConverter
  4340. {
  4341. public:
  4342. rtlCodepageConverter(char const * sourceName, char const * targetName, bool & failed) : uerr(U_ZERO_ERROR)
  4343. {
  4344. srccnv = ucnv_open(sourceName, &uerr);
  4345. tgtcnv = ucnv_open(targetName, &uerr);
  4346. tgtMaxRatio = ucnv_getMaxCharSize(tgtcnv);
  4347. failed = U_FAILURE(uerr);
  4348. }
  4349. ~rtlCodepageConverter()
  4350. {
  4351. ucnv_close(srccnv);
  4352. ucnv_close(tgtcnv);
  4353. }
  4354. void convertX(unsigned & targetLength, char * & target, unsigned sourceLength, char const * source, bool & failed, bool preflight)
  4355. {
  4356. //convert from source to utf-16: try to avoid preflighting by guessing upper bound
  4357. //unicode length in UChars equal source length in chars if single byte encoding, and be less for multibyte
  4358. UChar * ubuff = (UChar *)malloc(sourceLength*2);
  4359. int32_t ulen = ucnv_toUChars(srccnv, ubuff, sourceLength, source, sourceLength, &uerr);
  4360. if(ulen > (int32_t)sourceLength)
  4361. {
  4362. //okay, so our guess was wrong, and we have to reallocate
  4363. free(ubuff);
  4364. ubuff = (UChar *)malloc(ulen*2);
  4365. ucnv_toUChars(srccnv, ubuff, ulen, source, sourceLength, &uerr);
  4366. }
  4367. if(preflight)
  4368. {
  4369. //convert from utf-16 to target: preflight to get buffer of exactly the right size
  4370. UErrorCode uerr2 = uerr; //preflight has to use copy of error code, as it is considered an 'error'
  4371. int32_t tlen = ucnv_fromUChars(tgtcnv, 0, 0, ubuff, ulen, &uerr2);
  4372. target = (char *)malloc(tlen);
  4373. targetLength = ucnv_fromUChars(tgtcnv, target, tlen, ubuff, ulen, &uerr);
  4374. }
  4375. else
  4376. {
  4377. //convert from utf-16 to target: avoid preflighting by allocating buffer of maximum size
  4378. target = (char *)malloc(ulen*tgtMaxRatio);
  4379. targetLength = ucnv_fromUChars(tgtcnv, target, ulen*tgtMaxRatio, ubuff, ulen, &uerr);
  4380. }
  4381. free(ubuff);
  4382. failed = U_FAILURE(uerr);
  4383. }
  4384. unsigned convert(unsigned targetLength, char * target, unsigned sourceLength, char const * source, bool & failed)
  4385. {
  4386. char * tgtStart = target;
  4387. ucnv_convertEx(tgtcnv, srccnv, &target, target+targetLength, &source, source+sourceLength, 0, 0, 0, 0, true, true, &uerr);
  4388. int32_t ret = target-tgtStart;
  4389. failed = U_FAILURE(uerr);
  4390. return ret;
  4391. }
  4392. private:
  4393. UErrorCode uerr;
  4394. UConverter * srccnv;
  4395. UConverter * tgtcnv;
  4396. int8_t tgtMaxRatio;
  4397. };
  4398. void * rtlOpenCodepageConverter(char const * sourceName, char const * targetName, bool & failed)
  4399. {
  4400. return new rtlCodepageConverter(sourceName, targetName, failed);
  4401. }
  4402. void rtlCloseCodepageConverter(void * converter)
  4403. {
  4404. delete ((rtlCodepageConverter *)converter);
  4405. }
  4406. void rtlCodepageConvertX(void * converter, unsigned & targetLength, char * & target, unsigned sourceLength, char const * source, bool & failed, bool preflight)
  4407. {
  4408. ((rtlCodepageConverter *)converter)->convertX(targetLength, target, sourceLength, source, failed, preflight);
  4409. }
  4410. unsigned rtlCodepageConvert(void * converter, unsigned targetLength, char * target, unsigned sourceLength, char const * source, bool & failed)
  4411. {
  4412. return ((rtlCodepageConverter *)converter)->convert(targetLength, target, sourceLength, source, failed);
  4413. }
  4414. //---------------------------------------------------------------------------
  4415. void appendUChar(MemoryBuffer & buff, char x)
  4416. {
  4417. UChar c = x;
  4418. buff.append(sizeof(c), &c);
  4419. }
  4420. void appendUChar(MemoryBuffer & buff, UChar c)
  4421. {
  4422. buff.append(sizeof(c), &c);
  4423. }
  4424. void appendUStr(MemoryBuffer & x, const char * text)
  4425. {
  4426. while (*text)
  4427. {
  4428. UChar c = *text++;
  4429. x.append(sizeof(c), &c);
  4430. }
  4431. }
  4432. ECLRTL_API void xmlDecodeStrX(size32_t & outLen, char * & out, size32_t inLen, const char * in)
  4433. {
  4434. StringBuffer input(inLen, in);
  4435. StringBuffer temp;
  4436. decodeXML(input, temp, NULL, NULL, false);
  4437. outLen = temp.length();
  4438. out = temp.detach();
  4439. }
  4440. bool hasPrefix(const UChar * ustr, const UChar * end, const char * str, unsigned len)
  4441. {
  4442. if (end - ustr < len)
  4443. return false;
  4444. while (len--)
  4445. {
  4446. if (*ustr++ != *str++)
  4447. return false;
  4448. }
  4449. return true;
  4450. }
  4451. ECLRTL_API void xmlDecodeUStrX(size32_t & outLen, UChar * & out, size32_t inLen, const UChar * in)
  4452. {
  4453. const UChar * cur = in;
  4454. const UChar * end = in+inLen;
  4455. MemoryBuffer ret;
  4456. while (cur<end)
  4457. {
  4458. switch(*cur)
  4459. {
  4460. case '&':
  4461. if(hasPrefix(cur+1, end, "amp;", 4))
  4462. {
  4463. cur += 4;
  4464. appendUChar(ret, '&');
  4465. }
  4466. else if(hasPrefix(cur+1, end, "lt;", 3))
  4467. {
  4468. cur += 3;
  4469. appendUChar(ret, '<');
  4470. }
  4471. else if(hasPrefix(cur+1, end, "gt;", 3))
  4472. {
  4473. cur += 3;
  4474. appendUChar(ret, '>');
  4475. }
  4476. else if(hasPrefix(cur+1, end, "quot;", 5))
  4477. {
  4478. cur += 5;
  4479. appendUChar(ret, '"');
  4480. }
  4481. else if(hasPrefix(cur+1, end, "apos;", 5))
  4482. {
  4483. cur += 5;
  4484. appendUChar(ret, '\'');
  4485. }
  4486. else if(hasPrefix(cur+1, end, "nbsp;", 5))
  4487. {
  4488. cur += 5;
  4489. appendUChar(ret, (UChar) 0xa0);
  4490. }
  4491. else if(hasPrefix(cur+1, end, "#", 1))
  4492. {
  4493. const UChar * saveCur = cur;
  4494. bool error = true; // until we have seen a digit...
  4495. cur += 2;
  4496. unsigned base = 10;
  4497. if (*cur == 'x')
  4498. {
  4499. base = 16;
  4500. cur++;
  4501. }
  4502. UChar value = 0;
  4503. while (cur < end)
  4504. {
  4505. unsigned digit;
  4506. UChar next = *cur;
  4507. if ((next >= '0') && (next <= '9'))
  4508. digit = next-'0';
  4509. else if ((next >= 'A') && (next <= 'F'))
  4510. digit = next-'A'+10;
  4511. else if ((next >= 'a') && (next <= 'f'))
  4512. digit = next-'a'+10;
  4513. else if (next==';')
  4514. break;
  4515. if (digit >= base)
  4516. {
  4517. error = true;
  4518. break;
  4519. }
  4520. error = false;
  4521. value = value * base + digit;
  4522. cur++;
  4523. }
  4524. if (error)
  4525. {
  4526. appendUChar(ret, '&');
  4527. cur = saveCur;
  4528. }
  4529. else
  4530. appendUChar(ret, value);
  4531. }
  4532. else
  4533. appendUChar(ret, *cur);
  4534. break;
  4535. default:
  4536. appendUChar(ret, *cur);
  4537. break;
  4538. }
  4539. cur++;
  4540. }
  4541. outLen = ret.length()/2;
  4542. out = (UChar *)ret.detach();
  4543. }
  4544. ECLRTL_API void xmlEncodeStrX(size32_t & outLen, char * & out, size32_t inLen, const char * in, unsigned flags)
  4545. {
  4546. StringBuffer temp;
  4547. encodeXML(in, temp, flags, inLen, false);
  4548. outLen = temp.length();
  4549. out = temp.detach();
  4550. }
  4551. ECLRTL_API void xmlEncodeUStrX(size32_t & outLen, UChar * & out, size32_t inLen, const UChar * in, unsigned flags)
  4552. {
  4553. const UChar * cur = in;
  4554. MemoryBuffer ret;
  4555. ret.ensureCapacity(inLen*2);
  4556. while (inLen)
  4557. {
  4558. UChar next = *cur;
  4559. switch(*cur)
  4560. {
  4561. case '&':
  4562. appendUStr(ret, "&amp;");
  4563. break;
  4564. case '<':
  4565. appendUStr(ret, "&lt;");
  4566. break;
  4567. case '>':
  4568. appendUStr(ret, "&gt;");
  4569. break;
  4570. case '\"':
  4571. appendUStr(ret, "&quot;");
  4572. break;
  4573. case '\'':
  4574. appendUStr(ret, "&apos;");
  4575. break;
  4576. case ' ':
  4577. appendUStr(ret, flags & ENCODE_SPACES?"&#32;":" ");
  4578. break;
  4579. case '\n':
  4580. appendUStr(ret, flags & ENCODE_NEWLINES?"&#10;":"\n");
  4581. break;
  4582. case '\r':
  4583. appendUStr(ret, flags & ENCODE_NEWLINES?"&#13;":"\r");
  4584. break;
  4585. case '\t':
  4586. appendUStr(ret, flags & ENCODE_SPACES?"&#9;":"\t");
  4587. break;
  4588. default:
  4589. appendUChar(ret, next);
  4590. break;
  4591. }
  4592. inLen--;
  4593. cur++;
  4594. }
  4595. outLen = ret.length()/2;
  4596. out = (UChar *)ret.detach();
  4597. }
  4598. //---------------------------------------------------------------------------
  4599. #define STRUCTURED_EXCEPTION_TAG "Error"
  4600. inline bool isStructuredMessage(const char * text, const char * tag)
  4601. {
  4602. if (!text || text[0] != '<')
  4603. return false;
  4604. if (!tag)
  4605. return true;
  4606. size32_t lenTag = strlen(tag);
  4607. if (memcmp(text+1,tag,lenTag) != 0)
  4608. return false;
  4609. if (text[lenTag+1] != '>')
  4610. return false;
  4611. return true;
  4612. }
  4613. inline bool isStructuredError(const char * text) { return isStructuredMessage(text, STRUCTURED_EXCEPTION_TAG); }
  4614. void rtlExtractTag(size32_t & outLen, char * & out, const char * text, const char * tag, const char * rootTag)
  4615. {
  4616. if (!tag || !isStructuredMessage(text, rootTag))
  4617. {
  4618. if (text && (!tag || strcmp(tag, "text")==0))
  4619. rtlStrToStrX(outLen, out, strlen(text), text);
  4620. else
  4621. {
  4622. outLen = 0;
  4623. out = NULL;
  4624. }
  4625. }
  4626. else
  4627. {
  4628. StringBuffer startTag, endTag;
  4629. startTag.append("<").append(tag).append(">");
  4630. endTag.append("</").append(tag).append(">");
  4631. const char * start = strstr(text, startTag.str());
  4632. const char * end = strstr(text, endTag.str());
  4633. if (start && end)
  4634. {
  4635. start += startTag.length();
  4636. xmlDecodeStrX(outLen, out, end-start, start);
  4637. }
  4638. else
  4639. {
  4640. outLen = 0;
  4641. out = NULL;
  4642. }
  4643. }
  4644. }
  4645. void rtlExceptionExtract(size32_t & outLen, char * & out, const char * text, const char * tag)
  4646. {
  4647. if (!tag) tag = "text";
  4648. rtlExtractTag(outLen, out, text, tag, STRUCTURED_EXCEPTION_TAG);
  4649. }
  4650. void rtlExceptionExtract(size32_t & outLen, char * & out, IException * e, const char * tag)
  4651. {
  4652. StringBuffer text;
  4653. e->errorMessage(text);
  4654. rtlExceptionExtract(outLen, out, text.str(), tag);
  4655. }
  4656. void rtlAddExceptionTag(StringBuffer & errorText, const char * tag, const char * value)
  4657. {
  4658. if (!isStructuredError(errorText.str()))
  4659. {
  4660. StringBuffer temp;
  4661. temp.append("<" STRUCTURED_EXCEPTION_TAG "><text>");
  4662. encodeXML(errorText.str(), temp, ENCODE_WHITESPACE, errorText.length(), false);
  4663. temp.append("</text></" STRUCTURED_EXCEPTION_TAG ">");
  4664. errorText.swapWith(temp);
  4665. }
  4666. StringBuffer temp;
  4667. temp.append("<").append(tag).append(">");
  4668. encodeXML(value, temp, ENCODE_WHITESPACE, (unsigned)-1, false);
  4669. temp.append("</").append(tag).append(">");
  4670. unsigned len = errorText.length();
  4671. unsigned pos = len - strlen(STRUCTURED_EXCEPTION_TAG) - 3;
  4672. errorText.insert(pos, temp);
  4673. }
  4674. //---------------------------------------------------------------------------
  4675. void rtlRowBuilder::forceAvailable(size32_t size)
  4676. {
  4677. const size32_t chunkSize = 64;
  4678. maxsize = (size + chunkSize-1) & ~(chunkSize-1);
  4679. ptr = realloc(ptr, maxsize);
  4680. }
  4681. //---------------------------------------------------------------------------
  4682. inline unsigned numExtraBytesFromValue(unsigned __int64 first)
  4683. {
  4684. if (first >= I64C(0x10000000))
  4685. if (first >= I64C(0x40000000000))
  4686. if (first >= I64C(0x2000000000000))
  4687. if (first >= I64C(0x100000000000000))
  4688. return 8;
  4689. else
  4690. return 7;
  4691. else
  4692. return 6;
  4693. else
  4694. if (first >= I64C(0x800000000))
  4695. return 5;
  4696. else
  4697. return 4;
  4698. else
  4699. if (first >= 0x4000)
  4700. if (first >= 0x200000)
  4701. return 3;
  4702. else
  4703. return 2;
  4704. else
  4705. if (first >= 0x80)
  4706. return 1;
  4707. else
  4708. return 0;
  4709. }
  4710. //An packed byte format, based on the unicode packing of utf-8.
  4711. //The number of top bits set in the leading byte indicates how many extra
  4712. //bytes follow (0..8). It gives the same compression as using a top bit to
  4713. //indicate continuation, but seems to be quicker (and requires less look ahead).
  4714. /*
  4715. byte numExtraBytesFromFirstTable[256] =
  4716. {
  4717. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  4718. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  4719. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  4720. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  4721. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  4722. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  4723. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  4724. 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8
  4725. };
  4726. inline unsigned numExtraBytesFromFirst(byte first)
  4727. {
  4728. return numExtraBytesFromFirstTable(first);
  4729. }
  4730. */
  4731. //NB: This seems to be faster than using the table lookup above. Probably affects the data cache less
  4732. inline unsigned numExtraBytesFromFirst(byte first)
  4733. {
  4734. if (first >= 0xF0)
  4735. if (first >= 0xFC)
  4736. if (first >= 0xFE)
  4737. if (first >= 0xFF)
  4738. return 8;
  4739. else
  4740. return 7;
  4741. else
  4742. return 6;
  4743. else
  4744. if (first >= 0xF8)
  4745. return 5;
  4746. else
  4747. return 4;
  4748. else
  4749. if (first >= 0xC0)
  4750. if (first >= 0xE0)
  4751. return 3;
  4752. else
  4753. return 2;
  4754. else
  4755. if (first >= 0x80)
  4756. return 1;
  4757. else
  4758. return 0;
  4759. }
  4760. static byte leadingValueMask[9] = { 0x7f, 0x3f, 0x1f, 0x0f, 0x07, 0x03, 0x01, 0x00, 0x00 };
  4761. static byte leadingLengthMask[9] = { 0x00, 0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF };
  4762. //maximum number of bytes for a packed value is size+1 bytes for size <=8 and last byte being fully used.
  4763. unsigned __int64 rtlGetPackedUnsigned(const void * _ptr)
  4764. {
  4765. const byte * ptr = (const byte *)_ptr;
  4766. byte first = *ptr++;
  4767. unsigned numExtra = numExtraBytesFromFirst(first);
  4768. unsigned __int64 value = first & leadingValueMask[numExtra];
  4769. //Loop unrolling has a negligable effect
  4770. while (numExtra--)
  4771. value = (value << 8) | *ptr++;
  4772. return value;
  4773. }
  4774. void rtlSetPackedUnsigned(void * _ptr, unsigned __int64 value)
  4775. {
  4776. byte * ptr = (byte *)_ptr;
  4777. unsigned numExtra = numExtraBytesFromValue(value);
  4778. byte firstMask = leadingLengthMask[numExtra];
  4779. while (numExtra)
  4780. {
  4781. ptr[numExtra--] = (byte)value;
  4782. value >>= 8;
  4783. }
  4784. ptr[0] = (byte)value | firstMask;
  4785. }
  4786. size32_t rtlGetPackedSize(const void * ptr)
  4787. {
  4788. return numExtraBytesFromFirst(*(byte*)ptr)+1;
  4789. }
  4790. size32_t rtlGetPackedSizeFromFirst(byte first)
  4791. {
  4792. return numExtraBytesFromFirst(first)+1;
  4793. }
  4794. //Store signed by moving the sign to the bottom bit, and inverting if negative.
  4795. //so small positive and negative numbers are stored compactly.
  4796. __int64 rtlGetPackedSigned(const void * ptr)
  4797. {
  4798. unsigned __int64 value = rtlGetPackedUnsigned(ptr);
  4799. unsigned __int64 shifted = (value >> 1);
  4800. return (__int64)((value & 1) ? ~shifted : shifted);
  4801. }
  4802. void rtlSetPackedSigned(void * ptr, __int64 value)
  4803. {
  4804. unsigned __int64 storeValue;
  4805. if (value < 0)
  4806. storeValue = (~value << 1) | 1;
  4807. else
  4808. storeValue = value << 1;
  4809. rtlSetPackedUnsigned(ptr, storeValue);
  4810. }
  4811. IAtom * rtlCreateFieldNameAtom(const char * name)
  4812. {
  4813. return createAtom(name);
  4814. }
  4815. //---------------------------------------------------------------------------
  4816. void RtlCInterface::Link() const { atomic_inc(&xxcount); }
  4817. bool RtlCInterface::Release(void) const
  4818. {
  4819. if (atomic_dec_and_test(&xxcount))
  4820. {
  4821. delete this;
  4822. return true;
  4823. }
  4824. return false;
  4825. }
  4826. //---------------------------------------------------------------------------
  4827. #if 0
  4828. void PrintExtract(StringBuffer & s, const char * tag)
  4829. {
  4830. size32_t outLen;
  4831. char * out = NULL;
  4832. rtlExceptionExtract(outLen, out, s.str(), tag);
  4833. PrintLog("%s = %.*s", tag, outLen, out);
  4834. rtlFree(out);
  4835. }
  4836. void testStructuredExceptions()
  4837. {
  4838. StringBuffer s;
  4839. s.append("This<is>some text");
  4840. PrintExtract(s, NULL);
  4841. PrintExtract(s, "text");
  4842. PrintExtract(s, "is");
  4843. rtlAddExceptionTag(s, "location", "192.168.12.1");
  4844. PrintExtract(s, NULL);
  4845. PrintExtract(s, "text");
  4846. PrintExtract(s, "is");
  4847. PrintExtract(s, "location");
  4848. rtlAddExceptionTag(s, "author", "gavin");
  4849. PrintExtract(s, NULL);
  4850. PrintExtract(s, "text");
  4851. PrintExtract(s, "is");
  4852. PrintExtract(s, "location");
  4853. PrintExtract(s, "author");
  4854. PrintLog("%s", s.str());
  4855. }
  4856. static void testPackedUnsigned()
  4857. {
  4858. unsigned __int64 values[] = { 0, 1, 2, 10, 127, 128, 16383, 16384, 32767, 32768, 0xffffff, 0x7fffffff, 0xffffffff,
  4859. I64C(0xffffffffffffff), I64C(0x100000000000000), I64C(0x7fffffffffffffff), I64C(0xffffffffffffffff) };
  4860. unsigned numBytes[] = { 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 5, 5, 8, 9, 9, 9 };
  4861. unsigned numValues = _elements_in(values);
  4862. byte temp[9];
  4863. for (unsigned i = 0; i < numValues; i++)
  4864. {
  4865. rtlSetPackedUnsigned(temp, values[i]);
  4866. assertex(rtlGetPackedSize(temp) == numBytes[i]);
  4867. assertex(rtlGetPackedUnsigned(temp) == values[i]);
  4868. }
  4869. for (unsigned j= 0; j < 2000000; j++)
  4870. {
  4871. unsigned __int64 value = I64C(1) << (rtlRandom() & 63);
  4872. // unsigned value = rtlRandom();
  4873. rtlSetPackedUnsigned(temp, value);
  4874. assertex(rtlGetPackedSize(temp) == numExtraBytesFromValue(value)+1);
  4875. assertex(rtlGetPackedUnsigned(temp) == value);
  4876. }
  4877. for (unsigned k= 0; k < 63; k++)
  4878. {
  4879. unsigned __int64 value1 = I64C(1) << k;
  4880. rtlSetPackedUnsigned(temp, value1);
  4881. assertex(rtlGetPackedSize(temp) == numExtraBytesFromValue(value1)+1);
  4882. assertex(rtlGetPackedUnsigned(temp) == value1);
  4883. unsigned __int64 value2 = value1-1;
  4884. rtlSetPackedUnsigned(temp, value2);
  4885. assertex(rtlGetPackedSize(temp) == numExtraBytesFromValue(value2)+1);
  4886. assertex(rtlGetPackedUnsigned(temp) == value2);
  4887. }
  4888. }
  4889. static void testPackedSigned()
  4890. {
  4891. __int64 values[] = { 0, 1, -2, 10, 63, 64, -64, -65, 8191, 8192, 0x3fffffff,
  4892. I64C(0x7fffffffffffff), I64C(0x80000000000000), I64C(0x7fffffffffffffff), I64C(0x8000000000000000) };
  4893. unsigned numBytes[] = { 1, 1, 1, 1, 1, 2, 1, 2, 2, 3, 5,
  4894. 8, 9, 9, 9 };
  4895. unsigned numValues = _elements_in(values);
  4896. byte temp[9];
  4897. for (unsigned i = 0; i < numValues; i++)
  4898. {
  4899. rtlSetPackedSigned(temp, values[i]);
  4900. assertex(rtlGetPackedSize(temp) == numBytes[i]);
  4901. assertex(rtlGetPackedSigned(temp) == values[i]);
  4902. }
  4903. }
  4904. #endif
  4905. void ensureRtlLoaded()
  4906. {
  4907. }
  4908. #ifdef _USE_CPPUNIT
  4909. #include "unittests.hpp"
  4910. class EclRtlTests : public CppUnit::TestFixture
  4911. {
  4912. CPPUNIT_TEST_SUITE( EclRtlTests );
  4913. CPPUNIT_TEST(RegexTest);
  4914. CPPUNIT_TEST(MultiRegexTest);
  4915. CPPUNIT_TEST_SUITE_END();
  4916. protected:
  4917. void RegexTest()
  4918. {
  4919. rtlCompiledStrRegex r;
  4920. size32_t outlen;
  4921. char * out = NULL;
  4922. r.setPattern("([A-Z]+)[ ]?'(S) ", true);
  4923. r->replace(outlen, out, 7, "ABC'S ", 5, "$1$2 ");
  4924. ASSERT(outlen==6);
  4925. ASSERT(out != NULL);
  4926. ASSERT(memcmp(out, "ABCS ", outlen)==0);
  4927. rtlFree(out);
  4928. }
  4929. void MultiRegexTest()
  4930. {
  4931. class RegexTestThread : public Thread
  4932. {
  4933. virtual int run()
  4934. {
  4935. for (int i = 0; i < 100000; i++)
  4936. {
  4937. rtlCompiledStrRegex r;
  4938. size32_t outlen;
  4939. char * out = NULL;
  4940. r.setPattern("([A-Z]+)[ ]?'(S) ", true);
  4941. r->replace(outlen, out, 7, "ABC'S ", 5, "$1$2 ");
  4942. ASSERT(outlen==6);
  4943. ASSERT(out != NULL);
  4944. ASSERT(memcmp(out, "ABCS ", outlen)==0);
  4945. rtlFree(out);
  4946. }
  4947. return 0;
  4948. }
  4949. };
  4950. RegexTestThread t1;
  4951. RegexTestThread t2;
  4952. RegexTestThread t3;
  4953. t1.start();
  4954. t2.start();
  4955. t3.start();
  4956. t1.join();
  4957. t2.join();
  4958. t3.join();
  4959. }
  4960. };
  4961. CPPUNIT_TEST_SUITE_REGISTRATION( EclRtlTests );
  4962. CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( EclRtlTests, "EclRtlTests" );
  4963. #endif