stringlib.cpp 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include <platform.h>
  14. #include <time.h>
  15. #include <stdlib.h>
  16. #include <string.h>
  17. #include <ctype.h>
  18. #include <assert.h>
  19. #include <eclrtl.hpp>
  20. #include "stringlib.hpp"
  21. #include "wildmatch.tpp"
  22. static const char * compatibleVersions[] = {
  23. "STRINGLIB 1.1.06 [fd997dc3feb4ca385d59a12b9dc4beab]", // windows version
  24. "STRINGLIB 1.1.06 [f8305e66ca26a1447dee66d4a36d88dc]", // linux version
  25. "STRINGLIB 1.1.07",
  26. "STRINGLIB 1.1.08",
  27. "STRINGLIB 1.1.09",
  28. "STRINGLIB 1.1.10",
  29. "STRINGLIB 1.1.11",
  30. "STRINGLIB 1.1.12",
  31. "STRINGLIB 1.1.13",
  32. NULL };
  33. #define STRINGLIB_VERSION "STRINGLIB 1.1.14"
  34. static const char * EclDefinition =
  35. "export StringLib := SERVICE:fold\n"
  36. " string StringFilterOut(const string src, const string _within) : c, pure,entrypoint='slStringFilterOut'; \n"
  37. " string StringFilter(const string src, const string _within) : c, pure,entrypoint='slStringFilter'; \n"
  38. " string StringSubstituteOut(const string src, const string _within, const string _newchar) : c, pure,entrypoint='slStringSubsOut'; \n"
  39. " string StringSubstitute(const string src, const string _within, const string _newchar) : c, pure,entrypoint='slStringSubs'; \n"
  40. " string StringRepad(const string src, unsigned4 size) : c, pure,entrypoint='slStringRepad'; \n"
  41. " string StringTranslate(const string src, const string _within, const string _mapping) : c, pure,entrypoint='slStringTranslate'; \n"
  42. " unsigned integer4 StringFind(const string src, const string tofind, unsigned4 instance ) : c, pure,entrypoint='slStringFind'; \n"
  43. " unsigned integer4 StringUnboundedUnsafeFind(const string src, const string tofind ) : c,pure,nofold,entrypoint='slStringFind2'; \n"
  44. " unsigned integer4 StringFindCount(const string src, const string tofind) : c, pure,entrypoint='slStringFindCount'; \n"
  45. " unsigned integer4 EbcdicStringFind(const ebcdic string src, const ebcdic string tofind , unsigned4 instance ) : c,pure,entrypoint='slStringFind'; \n"
  46. " unsigned integer4 EbcdicStringUnboundedUnsafeFind(const ebcdic string src, const ebcdic string tofind ) : c,pure,nofold,entrypoint='slStringFind2'; \n"
  47. " string StringExtract(const string src, unsigned4 instance) : c,pure,entrypoint='slStringExtract'; \n"
  48. // NOTE - the next 2 are foldable but not pure, meaning it will only be folded if found in a #IF or similar
  49. // This is because you usually want them to be executed at runtime
  50. " string8 GetDateYYYYMMDD() : c,once,entrypoint='slGetDateYYYYMMDD2';\n"
  51. " varstring GetBuildInfo() : c,once,entrypoint='slGetBuildInfo';\n"
  52. " string Data2String(const data src) : c,pure,entrypoint='slData2String';\n"
  53. " data String2Data(const string src) : c,pure,entrypoint='slString2Data';\n"
  54. " string StringToLowerCase(const string src) : c,pure,entrypoint='slStringToLowerCase';\n"
  55. " string StringToUpperCase(const string src) : c,pure,entrypoint='slStringToUpperCase';\n"
  56. " string StringToProperCase(const string src) : c,pure,entrypoint='slStringToProperCase';\n"
  57. " string StringToCapitalCase(const string src) : c,pure,entrypoint='slStringToCapitalCase';\n"
  58. " string StringToTitleCase(const string src) : c,pure,entrypoint='slStringToTitleCase';\n"
  59. " integer4 StringCompareIgnoreCase(const string src1, const string src2) : c,pure,entrypoint='slStringCompareIgnoreCase';\n"
  60. " string StringReverse(const string src) : c,pure,entrypoint='slStringReverse';\n"
  61. " string StringFindReplace(const string src, const string stok, const string rtok) : c,pure,entrypoint='slStringFindReplace';\n"
  62. " string StringCleanSpaces(const string src) : c,pure,entrypoint='slStringCleanSpaces'; \n"
  63. " boolean StringWildMatch(const string src, const string _pattern, boolean _noCase) : c, pure,entrypoint='slStringWildMatch'; \n"
  64. " boolean StringWildExactMatch(const string src, const string _pattern, boolean _noCase) : c, pure,entrypoint='slStringWildExactMatch'; \n"
  65. " boolean StringContains(const string src, const string _pattern, boolean _noCase) : c, pure,entrypoint='slStringContains'; \n"
  66. " string StringExtractMultiple(const string src, unsigned8 mask) : c,pure,entrypoint='slStringExtractMultiple'; \n"
  67. " unsigned integer4 EditDistance(const string l, const string r) : c, time, pure,entrypoint='slEditDistanceV2'; \n"
  68. " boolean EditDistanceWithinRadius(const string l, const string r, unsigned4 radius) : c,time,pure,entrypoint='slEditDistanceWithinRadiusV2'; \n"
  69. " unsigned integer4 EditDistanceV2(const string l, const string r) : c,time,pure,entrypoint='slEditDistanceV2'; \n"
  70. " unsigned integer4 EditDistanceV3(const string l, const string r, unsigned4 radius) : c,time,pure,entrypoint='slEditDistanceV3'; \n"
  71. " boolean EditDistanceWithinRadiusV2(const string l, const string r, unsigned4 radius) : c,time,pure,entrypoint='slEditDistanceWithinRadiusV2'; \n"
  72. " string StringGetNthWord(const string src, unsigned4 n) : c, pure,entrypoint='slStringGetNthWord'; \n"
  73. " string StringExcludeLastWord(const string src) : c, pure,entrypoint='slStringExcludeLastWord'; \n"
  74. " string StringExcludeNthWord(const string src, unsigned4 n) : c, pure,entrypoint='slStringExcludeNthWord'; \n"
  75. " unsigned4 StringWordCount(const string src) : c, pure,entrypoint='slStringWordCount'; \n"
  76. " unsigned4 CountWords(const string src, const string _separator, BOOLEAN allow_blanks) : c, pure,entrypoint='slCountWords'; \n"
  77. " SET OF STRING SplitWords(const string src, const string _separator, BOOLEAN allow_blanks) : c, pure,entrypoint='slSplitWords'; \n"
  78. " STRING CombineWords(set of string src, const string _separator) : c, pure,entrypoint='slCombineWords'; \n"
  79. " UNSIGNED4 StringToDate(const string src, const varstring format) : c, pure,entrypoint='slStringToDate'; \n"
  80. " UNSIGNED4 StringToTimeOfDay(const string src, const varstring format) : c, pure,entrypoint='slStringToTimeOfDay'; \n"
  81. " UNSIGNED4 MatchDate(const string src, set of varstring formats) : c, pure,entrypoint='slMatchDate'; \n"
  82. " UNSIGNED4 MatchTimeOfDay(const string src, set of varstring formats) : c, pure,entrypoint='slMatchTimeOfDay'; \n"
  83. " STRING FormatDate(UNSIGNED4 date, const varstring format) : c, pure,entrypoint='slFormatDate'; \n"
  84. " STRING StringRepeat(const string src, unsigned4 n) : c, pure,entrypoint='slStringRepeat'; \n"
  85. "END;";
  86. STRINGLIB_API bool getECLPluginDefinition(ECLPluginDefinitionBlock *pb)
  87. {
  88. if (pb->size == sizeof(ECLPluginDefinitionBlockEx))
  89. {
  90. ECLPluginDefinitionBlockEx * pbx = (ECLPluginDefinitionBlockEx *) pb;
  91. pbx->compatibleVersions = compatibleVersions;
  92. }
  93. else if (pb->size != sizeof(ECLPluginDefinitionBlock))
  94. return false;
  95. pb->magicVersion = PLUGIN_VERSION;
  96. pb->version = STRINGLIB_VERSION;
  97. pb->moduleName = "lib_stringlib";
  98. pb->ECL = EclDefinition;
  99. pb->flags = PLUGIN_IMPLICIT_MODULE | PLUGIN_MULTIPLE_VERSIONS;
  100. pb->description = "StringLib string manipulation library";
  101. return true;
  102. }
  103. namespace nsStringlib {
  104. IPluginContext * parentCtx = NULL;
  105. enum { bitsInUnsigned = sizeof(unsigned) * 8 };
  106. static const char hexchar[] = "0123456789ABCDEF";
  107. static unsigned hex2digit(char c)
  108. {
  109. switch (c)
  110. {
  111. default: case 0: return 0;
  112. case '1': return 1;
  113. case '2': return 2;
  114. case '3': return 3;
  115. case '4': return 4;
  116. case '5': return 5;
  117. case '6': return 6;
  118. case '7': return 7;
  119. case '8': return 8;
  120. case '9': return 9;
  121. case 'a': case 'A': return 10;
  122. case 'b': case 'B': return 11;
  123. case 'c': case 'C': return 12;
  124. case 'd': case 'D': return 13;
  125. case 'e': case 'E': return 14;
  126. case 'f': case 'F': return 15;
  127. }
  128. }
  129. inline char char_toupper(char c) { return (char)toupper(c); }
  130. inline void clip(unsigned &len, const char * s)
  131. {
  132. while ( len > 0 && s[len-1]==' ' )
  133. len--;
  134. }
  135. inline unsigned min3(unsigned a, unsigned b, unsigned c)
  136. {
  137. unsigned mi;
  138. mi = a;
  139. if (b < mi)
  140. {
  141. mi = b;
  142. }
  143. if (c < mi)
  144. {
  145. mi = c;
  146. }
  147. return mi;
  148. }
  149. //--- Optimized versions of the edit distance functions
  150. inline unsigned mask(unsigned x) { return x & 1; }
  151. unsigned editDistance(unsigned leftLen, const char * left, unsigned rightLen, const char * right)
  152. {
  153. unsigned i, j;
  154. clip(leftLen, left);
  155. clip(rightLen, right);
  156. if (leftLen > 255)
  157. leftLen = 255;
  158. if (rightLen > 255)
  159. rightLen = 255;
  160. if (leftLen == 0)
  161. return rightLen;
  162. if (rightLen == 0)
  163. return leftLen;
  164. //Optimize the storage requirements by
  165. //i) Only storing two stripes
  166. //ii) Calculate, but don't store the row comparing against the null string
  167. unsigned char da[2][256];
  168. char r_0 = right[0];
  169. char l_0 = left[0];
  170. bool matched_l0 = false;
  171. for (j = 0; j < rightLen; j++)
  172. {
  173. if (right[j] == l_0) matched_l0 = true;
  174. da[0][j] = (matched_l0) ? j : j+1;
  175. }
  176. bool matched_r0 = (l_0 == r_0);
  177. for (i = 1; i < leftLen; i++)
  178. {
  179. char l_i = left[i];
  180. if (l_i == r_0)
  181. matched_r0 = true;
  182. byte da_i_0 = matched_r0 ? i : i+1;
  183. da[mask(i)][0] = da_i_0;
  184. byte da_i_prevj = da_i_0;
  185. for (j = 1; j < rightLen; j++)
  186. {
  187. char r_j = right[j];
  188. unsigned char next = (l_i == r_j) ? da[mask(i-1)][j-1] :
  189. min3(da[mask(i-1)][j], da_i_prevj, da[mask(i-1)][j-1]) + 1;
  190. da[mask(i)][j] = next;
  191. da_i_prevj = next;
  192. }
  193. }
  194. return da[mask(leftLen-1)][rightLen-1];
  195. }
  196. //This could be further improved in the following ways:
  197. // * Only use 2*radius bytes of temporary storage - I doubt it is worth it.
  198. // * special case edit1 - you could use variables for the 6 interesting array elements, and get
  199. // rid of the array completely. You could also unwind the first (and last iterations).
  200. // * I suspect the early exit condition could be improved depending the lengths of the strings.
  201. extern STRINGLIB_API unsigned editDistanceWithinRadius(unsigned leftLen, const char * left, unsigned rightLen, const char * right, unsigned radius)
  202. {
  203. if (radius >= 255)
  204. return 255;
  205. clip(leftLen, left);
  206. clip(rightLen, right);
  207. unsigned minED = (leftLen < rightLen)? rightLen - leftLen: leftLen - rightLen;
  208. if (minED > radius)
  209. return minED;
  210. if (leftLen > 255)
  211. leftLen = 255;
  212. if (rightLen > 255)
  213. rightLen = 255;
  214. //Checking for leading common substrings actually slows the function down.
  215. if (leftLen == 0)
  216. return rightLen;
  217. if (rightLen == 0)
  218. return leftLen;
  219. /*
  220. This function applies two optimizations over the function above.
  221. a) Adding a charcter (next row) can at most decrease the edit distance by 1, so short circuit when
  222. we there is no possiblity of getting within the distance.
  223. b) We only need to evaluate the martix da[i-radius..i+radius][j-radius..j+radius]
  224. not taking into account values outside that range [can use max value to prevent access]
  225. */
  226. //Optimize the storage requirements by
  227. //i) Only storing two stripes
  228. //ii) Calculate, but don't store the row comparing against the null string
  229. unsigned char da[2][256];
  230. char r_0 = right[0];
  231. char l_0 = left[0];
  232. bool matched_l0 = false;
  233. for (unsigned j = 0; j < rightLen; j++)
  234. {
  235. if (right[j] == l_0) matched_l0 = true;
  236. da[0][j] = (matched_l0) ? j : j+1;
  237. }
  238. bool matched_r0 = (l_0 == r_0);
  239. for (unsigned i = 1; i < leftLen; i++)
  240. {
  241. char l_i = left[i];
  242. if (l_i == r_0)
  243. matched_r0 = true;
  244. byte da_i_0 = matched_r0 ? i : i+1;
  245. da[mask(i)][0] = da_i_0;
  246. byte da_i_prevj = da_i_0;
  247. unsigned low = i-radius;
  248. unsigned high = i+radius;
  249. unsigned first = (i > radius) ? low : 1;
  250. unsigned last = (high >= rightLen) ? rightLen : high +1;
  251. for (unsigned j = first; j < last; j++)
  252. {
  253. char r_j = right[j];
  254. unsigned next = da[mask(i-1)][j-1];
  255. if (l_i != r_j)
  256. {
  257. if (j != low)
  258. {
  259. if (next > da_i_prevj)
  260. next = da_i_prevj;
  261. }
  262. if (j != high)
  263. {
  264. byte da_previ_j = da[mask(i-1)][j];
  265. if (next > da_previ_j)
  266. next = da_previ_j;
  267. }
  268. next++;
  269. }
  270. da[mask(i)][j] = next;
  271. da_i_prevj = next;
  272. }
  273. // bail out early if ed can't possibly be <= radius
  274. // Only considering a strip down the middle of the matrix, so the maximum the score can ever be adjusted is 2xradius
  275. unsigned max_valid_score = 3*radius;
  276. // But maximum is also 1 for every difference in string length - comes in to play when close to the end.
  277. //In 32bit goes slower for radius=1 I suspect because running out of registers. Retest in 64bit.
  278. if (radius > 1)
  279. {
  280. unsigned max_distance = radius + (leftLen - (i+1)) + (rightLen - last);
  281. if (max_valid_score > max_distance)
  282. max_valid_score = max_distance;
  283. }
  284. if (da_i_prevj > max_valid_score)
  285. return da_i_prevj;
  286. }
  287. return da[mask(leftLen-1)][rightLen-1];
  288. }
  289. } // namespace
  290. //-------------------------------------------------------------------------------------------------------------------------------------------
  291. // Exported functions are NOT in the namespace
  292. using namespace nsStringlib;
  293. STRINGLIB_API void setPluginContext(IPluginContext * _ctx) { parentCtx = _ctx; }
  294. STRINGLIB_API void STRINGLIB_CALL slStringFilterOut(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned hitLen, const char * hit)
  295. {
  296. char *temp = (char *)CTXMALLOC(parentCtx, srcLen);
  297. unsigned tlen = 0;
  298. if (hitLen==1)
  299. {
  300. char test = *hit;
  301. for ( unsigned i = 0; i < srcLen; i++ )
  302. {
  303. char c = src[i];
  304. if (c!=test)
  305. temp[tlen++] = c;
  306. }
  307. }
  308. else {
  309. unsigned filter[256/bitsInUnsigned];
  310. memset(filter,0,sizeof(filter));
  311. for (unsigned j = 0; j < hitLen; j++ )
  312. {
  313. unsigned c = (unsigned char)hit[j];
  314. filter[c/bitsInUnsigned] |= (1<<(c%bitsInUnsigned));
  315. }
  316. for ( unsigned i = 0; i < srcLen; i++ )
  317. {
  318. unsigned c = (unsigned char)src[i];
  319. if ((filter[c/bitsInUnsigned] & (1<<(c%bitsInUnsigned))) == 0)
  320. temp[tlen++] = (char)c;
  321. }
  322. }
  323. tgt = (char *)CTXREALLOC(parentCtx, temp, tlen);
  324. tgtLen = tlen;
  325. }
  326. STRINGLIB_API void STRINGLIB_CALL slStringFilter(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned hitLen, const char * hit)
  327. {
  328. char *temp = (char *)CTXMALLOC(parentCtx, srcLen);
  329. unsigned tlen = 0;
  330. unsigned filter[256/bitsInUnsigned];
  331. memset(filter,0,sizeof(filter));
  332. for (unsigned j = 0; j < hitLen; j++ )
  333. {
  334. unsigned c = (unsigned char)hit[j];
  335. filter[c/bitsInUnsigned] |= (1<<(c%bitsInUnsigned));
  336. }
  337. for ( unsigned i = 0; i < srcLen; i++ )
  338. {
  339. unsigned c = (unsigned char)src[i];
  340. if ((filter[c/bitsInUnsigned] & (1<<(c%bitsInUnsigned))) != 0)
  341. temp[tlen++] = (char)c;
  342. }
  343. tgt = (char *)CTXREALLOC(parentCtx, temp, tlen);
  344. tgtLen = tlen;
  345. }
  346. STRINGLIB_API void STRINGLIB_CALL slStringSubsOut(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned hitLen, const char * hit, unsigned newCharLen, const char * newChar)
  347. {
  348. bool filter[256];
  349. memset(filter,0,sizeof(filter));
  350. for (unsigned j = 0; j < hitLen; j++ )
  351. {
  352. unsigned char c = ((unsigned char *)hit)[j];
  353. filter[c] = true;
  354. }
  355. tgt = (char *)CTXMALLOC(parentCtx, srcLen);
  356. if (newCharLen > 0)
  357. {
  358. for ( unsigned i = 0; i < srcLen; i++ )
  359. {
  360. unsigned char c = ((unsigned char *)src)[i];
  361. if (!filter[c])
  362. tgt[i] = c;
  363. else
  364. tgt[i] = ((char *)newChar)[0];
  365. }
  366. }
  367. else
  368. {
  369. memcpy_iflen(tgt, src, srcLen);
  370. }
  371. tgtLen = srcLen;
  372. }
  373. STRINGLIB_API void STRINGLIB_CALL slStringSubs(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned hitLen, const char * hit, unsigned newCharLen, const char * newChar)
  374. {
  375. bool filter[256];
  376. memset(filter,0,sizeof(filter));
  377. for (unsigned j = 0; j < hitLen; j++ )
  378. {
  379. unsigned char c = ((unsigned char *)hit)[j];
  380. filter[c] = true;
  381. }
  382. tgt = (char *)CTXMALLOC(parentCtx, srcLen);
  383. if (newCharLen > 0)
  384. {
  385. for ( unsigned i = 0; i < srcLen; i++ )
  386. {
  387. unsigned char c = ((unsigned char *)src)[i];
  388. if (filter[c])
  389. tgt[i] = c;
  390. else
  391. tgt[i] = ((char *)newChar)[0];
  392. }
  393. }
  394. else
  395. {
  396. memcpy_iflen(tgt, src, srcLen);
  397. }
  398. tgtLen = srcLen;
  399. }
  400. STRINGLIB_API void STRINGLIB_CALL slStringTranslate(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned hitLen, const char * hit, unsigned mappingLen, const char * mapping)
  401. {
  402. char mapped[256];
  403. for (unsigned i=0; i < sizeof(mapped); i++)
  404. mapped[i] = i;
  405. if (hitLen == mappingLen)
  406. {
  407. for (unsigned j = 0; j < hitLen; j++ )
  408. {
  409. unsigned char c = ((unsigned char *)hit)[j];
  410. mapped[c] = mapping[j];
  411. }
  412. }
  413. char * ret = (char *)CTXMALLOC(parentCtx, srcLen);
  414. for ( unsigned i = 0; i < srcLen; i++ )
  415. {
  416. unsigned char c = ((unsigned char *)src)[i];
  417. ret[i] = mapped[c];
  418. }
  419. tgt = ret;
  420. tgtLen = srcLen;
  421. }
  422. STRINGLIB_API void STRINGLIB_CALL slStringRepad(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned tLen)
  423. {
  424. char *base = (char *)src;
  425. while ( srcLen && *base == ' ' )
  426. {
  427. srcLen--;
  428. base++;
  429. }
  430. while ( srcLen && base[srcLen-1] == ' ' )
  431. srcLen--;
  432. if ( srcLen > tLen )
  433. srcLen = tLen;
  434. if ((int) tLen < 0)
  435. rtlFail(0, "Invalid parameter to StringLib.StringRepad");
  436. if (tLen)
  437. {
  438. tgt = (char *)CTXMALLOC(parentCtx, tLen);
  439. if (!tgt)
  440. rtlThrowOutOfMemory(0, "In StringLib.StringRepad");
  441. tgtLen = tLen;
  442. memcpy_iflen(tgt,base,srcLen);
  443. memset(tgt+srcLen,' ',tLen-srcLen);
  444. }
  445. else
  446. {
  447. tgt = NULL;
  448. tgtLen = 0;
  449. }
  450. }
  451. STRINGLIB_API unsigned STRINGLIB_CALL slStringFind(unsigned srcLen, const char * src, unsigned hitLen, const char * hit, unsigned instance)
  452. {
  453. if ( srcLen < hitLen )
  454. return 0;
  455. if (hitLen==1) { // common case optimization
  456. const char *p=src;
  457. const char *e = p+srcLen;
  458. char c = *hit;
  459. while (p!=e)
  460. if ((*(p++)==c))
  461. if (!--instance)
  462. return (unsigned)(p-src);
  463. }
  464. else
  465. {
  466. unsigned steps = srcLen-hitLen+1;
  467. for ( unsigned i = 0; i < steps; i++ )
  468. {
  469. if ( !memcmp((char *)src+i,hit,hitLen) )
  470. {
  471. if ( !--instance )
  472. return i+1;
  473. if (hitLen > 1)
  474. i += (hitLen-1);
  475. }
  476. }
  477. }
  478. return 0;
  479. }
  480. STRINGLIB_API unsigned STRINGLIB_CALL slStringFindCount(unsigned srcLen, const char * src, unsigned hitLen, const char * hit)
  481. {
  482. if ( srcLen < hitLen )
  483. return 0;
  484. unsigned matches = 0;
  485. if (hitLen==1) { // common case optimization
  486. const char *p=src;
  487. const char *e = p+srcLen;
  488. char c = *hit;
  489. while (p!=e)
  490. if ((*(p++)==c))
  491. matches++;
  492. }
  493. else
  494. {
  495. unsigned steps = srcLen-hitLen+1;
  496. for ( unsigned i = 0; i < steps; i++ )
  497. {
  498. if ( !memcmp((char *)src+i,hit,hitLen) )
  499. {
  500. matches++;
  501. if (hitLen > 1)
  502. i += (hitLen-1);
  503. }
  504. }
  505. }
  506. return matches;
  507. }
  508. STRINGLIB_API unsigned STRINGLIB_CALL slStringFind2(unsigned /*srcLen*/, const char * src, unsigned hitLen, const char * hit)
  509. {
  510. if (hitLen==1) { // common case optimization
  511. const char *p=src;
  512. char c = *hit;
  513. while (*(p++)!=c);
  514. return (unsigned)(p-src);
  515. }
  516. for ( unsigned i = 0; ; i++ )
  517. if ( !memcmp((char *)src+i,hit,hitLen) )
  518. return i+1;
  519. return 0;
  520. }
  521. STRINGLIB_API void STRINGLIB_CALL slStringExtract(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned instance)
  522. {
  523. tgtLen = 0;
  524. tgt = NULL;
  525. char * finger = (char *)src;
  526. if ( !instance )
  527. return;
  528. while ( --instance )
  529. {
  530. while ( srcLen && *finger != ',' )
  531. {
  532. srcLen--;
  533. finger++;
  534. }
  535. if ( !srcLen )
  536. return;
  537. srcLen--; // Skip ,
  538. finger++;
  539. }
  540. unsigned len = 0;
  541. for ( ; len < srcLen; len++ )
  542. if ( finger[len] == ',' )
  543. break;
  544. tgt = (char *)CTXMALLOC(parentCtx, len);
  545. memcpy_iflen(tgt,finger,len);
  546. tgtLen = len;
  547. }
  548. STRINGLIB_API void STRINGLIB_CALL slStringExtractMultiple(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned __int64 mask)
  549. {
  550. tgtLen = 0;
  551. tgt = NULL;
  552. char * finger = (char *)src;
  553. unsigned __int64 thisInstance = 1;
  554. while (mask)
  555. {
  556. while ( srcLen && *finger != ',' )
  557. {
  558. srcLen--;
  559. finger++;
  560. }
  561. if (mask & thisInstance)
  562. {
  563. mask &= ~thisInstance;
  564. unsigned matchLen = (unsigned)(finger - src);
  565. if (!tgt)
  566. tgt = (char *) CTXMALLOC(parentCtx, matchLen + srcLen);
  567. else
  568. tgt[tgtLen++] = ',';
  569. memcpy(tgt+tgtLen, src, finger - src);
  570. tgtLen += matchLen;
  571. }
  572. thisInstance <<= 1;
  573. if ( !srcLen )
  574. break;
  575. srcLen--; // Skip the ','
  576. finger++;
  577. src = finger;
  578. }
  579. }
  580. STRINGLIB_API char * STRINGLIB_CALL slGetDateYYYYMMDD(void)
  581. {
  582. char * result = (char *)CTXMALLOC(parentCtx, 9);
  583. time_t ltime;
  584. time( &ltime );
  585. tm *today = localtime( &ltime );
  586. strftime(result, 9, "%Y%m%d", today);
  587. return result;
  588. }
  589. STRINGLIB_API void STRINGLIB_CALL slGetDateYYYYMMDD2(char * ret)
  590. {
  591. char temp[9];
  592. time_t ltime;
  593. time( &ltime );
  594. tm *today = localtime( &ltime );
  595. strftime(temp, 9, "%Y%m%d", today);
  596. memcpy(ret, temp, 8);
  597. }
  598. STRINGLIB_API char * STRINGLIB_CALL slGetBuildInfo(void)
  599. {
  600. return CTXSTRDUP(parentCtx, STRINGLIB_VERSION);
  601. }
  602. STRINGLIB_API void STRINGLIB_CALL slData2String(size32_t & __ret_len,char * & __ret_str,unsigned _len_y, const void * y)
  603. {
  604. char *out = (char *)CTXMALLOC(parentCtx, _len_y * 2);
  605. char *res = out;
  606. unsigned char *yy = (unsigned char *) y;
  607. for (unsigned int i = 0; i < _len_y; i++)
  608. {
  609. *out++ = hexchar[yy[i] >> 4];
  610. *out++ = hexchar[yy[i] & 0x0f];
  611. }
  612. __ret_len = _len_y * 2;
  613. __ret_str = res;
  614. }
  615. STRINGLIB_API void STRINGLIB_CALL slString2Data(size32_t & __ret_len,void * & __ret_str,unsigned _len_src,const char * src)
  616. {
  617. // trailing nibbles are ignored
  618. // embedded spaces are ignored
  619. // illegal hex values are treated as zero
  620. // we could do a stricter one if it was considered desirable.
  621. char *out = (char *)CTXMALLOC(parentCtx, _len_src / 2);
  622. char *target = out;
  623. for (;;)
  624. {
  625. while (_len_src > 1 && isspace(*src))
  626. {
  627. src++;
  628. _len_src--;
  629. }
  630. if (_len_src < 2)
  631. break;
  632. *target++ = (hex2digit(src[0]) << 4) | hex2digit(src[1]);
  633. _len_src -= 2;
  634. src += 2;
  635. }
  636. __ret_len = (size32_t)(target - out);
  637. __ret_str = out;
  638. }
  639. // -----------------------------------------------------------------
  640. STRINGLIB_API void STRINGLIB_CALL slStringToLowerCase(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src)
  641. {
  642. char * res = (char *)CTXMALLOC(parentCtx, srcLen);
  643. for (unsigned int i=0;i<srcLen;i++)
  644. res[i] = tolower(src[i]);
  645. tgt = res;
  646. tgtLen = srcLen;
  647. }
  648. // -----------------------------------------------------------------
  649. STRINGLIB_API void STRINGLIB_CALL slStringToUpperCase(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src)
  650. {
  651. char * res = (char *)CTXMALLOC(parentCtx, srcLen);
  652. for (unsigned int i=0;i<srcLen;i++)
  653. res[i] = toupper(src[i]);
  654. tgt = res;
  655. tgtLen = srcLen;
  656. }
  657. // -----------------------------------------------------------------
  658. STRINGLIB_API void STRINGLIB_CALL slStringToProperCase(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src)
  659. {
  660. tgt = (char *)CTXMALLOC(parentCtx, srcLen);
  661. char * res = tgt;
  662. bool seenSpace = true;
  663. for (unsigned int i=0;i<srcLen;i++)
  664. {
  665. char c = src[i];
  666. *tgt++ = seenSpace ? toupper(c) : c;
  667. seenSpace = (c==' ');
  668. }
  669. tgt = res;
  670. tgtLen = srcLen;
  671. }
  672. // -----------------------------------------------------------------
  673. STRINGLIB_API void STRINGLIB_CALL slStringToCapitalCase(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src)
  674. {
  675. char * const result = (char *)CTXMALLOC(parentCtx, srcLen);
  676. bool upperPending = true;
  677. for (unsigned int i=0;i<srcLen;i++)
  678. {
  679. byte c = src[i];
  680. result[i] = upperPending ? toupper(c) : c;
  681. upperPending = !isalnum(c);
  682. }
  683. tgt = result;
  684. tgtLen = srcLen;
  685. }
  686. // -----------------------------------------------------------------
  687. STRINGLIB_API void STRINGLIB_CALL slStringToTitleCase(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src)
  688. {
  689. char * const result = (char *)CTXMALLOC(parentCtx, srcLen);
  690. bool upperPending = true;
  691. for (unsigned int i=0;i<srcLen;i++)
  692. {
  693. byte c = src[i];
  694. result[i] = upperPending ? toupper(c) : tolower(c);
  695. upperPending = !isalnum(c);
  696. }
  697. tgt = result;
  698. tgtLen = srcLen;
  699. }
  700. // -----------------------------------------------------------------
  701. STRINGLIB_API int STRINGLIB_CALL slStringCompareIgnoreCase (unsigned src1Len, const char * src1, unsigned src2Len, const char * src2)
  702. {
  703. unsigned int i;
  704. for (i=0;i < src1Len && i < src2Len;i++)
  705. {
  706. byte lc = src1[i];
  707. byte rc = src2[i];
  708. if (lc != rc)
  709. {
  710. lc = tolower(lc);
  711. rc = tolower(rc);
  712. if (lc != rc)
  713. return lc > rc ? 1 : -1;
  714. }
  715. }
  716. while (i < src1Len)
  717. {
  718. if (src1[i++] != ' ')
  719. return 1;
  720. }
  721. while (i < src2Len)
  722. {
  723. if (src2[i++] != ' ')
  724. return -1;
  725. }
  726. return 0;
  727. }
  728. // -----------------------------------------------------------------
  729. STRINGLIB_API void STRINGLIB_CALL slStringReverse (unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src)
  730. {
  731. char * res = (char *)CTXMALLOC(parentCtx, srcLen);
  732. unsigned int n = srcLen - 1;
  733. for (unsigned int i=0;i<srcLen;i++)
  734. res[i] = src[n-i];
  735. tgt = res;
  736. tgtLen = srcLen;
  737. }
  738. // -----------------------------------------------------------------
  739. STRINGLIB_API void STRINGLIB_CALL slStringFindReplace (unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned stokLen, const char * stok, unsigned rtokLen, const char * rtok)
  740. {
  741. if ( srcLen < stokLen || stokLen == 0)
  742. {
  743. tgt = (char *) CTXMALLOC(parentCtx, srcLen);
  744. memcpy_iflen(tgt, src, srcLen);
  745. tgtLen = srcLen;
  746. }
  747. else
  748. {
  749. unsigned steps = srcLen-stokLen+1;
  750. unsigned tgtmax = rtokLen > stokLen ? srcLen + steps * (rtokLen - stokLen) : srcLen;
  751. // This is the upper limit on target size - not a problem if we allocate a bit too much
  752. char * res = (char *)CTXMALLOC(parentCtx, tgtmax);
  753. tgt = res;
  754. unsigned i;
  755. for ( i = 0; i < steps; )
  756. {
  757. if ( !memcmp(src+i,stok,stokLen) )
  758. {
  759. memcpy(res, rtok, rtokLen);
  760. res += rtokLen;
  761. i += stokLen;
  762. }
  763. else
  764. *res++ = src[i++];
  765. }
  766. while (i <srcLen)
  767. *res++ = src[i++];
  768. tgtLen = (size32_t)(res - tgt);
  769. }
  770. }
  771. // -----------------------------------------------------------------
  772. STRINGLIB_API void STRINGLIB_CALL slStringCleanSpaces(size32_t & __ret_len,char * & __ret_str,unsigned _len_instr,const char * instr)
  773. {
  774. // remove double spaces
  775. char *out = (char *) CTXMALLOC(parentCtx, _len_instr);
  776. char *origout = out;
  777. bool spacePending = false;
  778. bool atStart = true;
  779. for(unsigned idx = 0; idx < _len_instr; idx++)
  780. {
  781. char c = *instr++;
  782. switch (c)
  783. {
  784. case ' ':
  785. case '\t':
  786. spacePending = true;
  787. break;
  788. default:
  789. if (spacePending && !atStart)
  790. *out++ = ' ';
  791. spacePending = false;
  792. atStart = false;
  793. *out++ = c;
  794. break;
  795. }
  796. }
  797. __ret_str = origout;
  798. __ret_len = (size32_t)(out - origout);
  799. }
  800. STRINGLIB_API bool STRINGLIB_CALL slStringWildMatch(unsigned srcLen, const char * src, unsigned patLen, const char * pat, bool noCase)
  801. {
  802. return wildTrimMatch<char, char_toupper, '?', '*', ' '>(src, srcLen, pat, patLen, noCase);
  803. }
  804. STRINGLIB_API bool STRINGLIB_CALL slStringWildExactMatch(unsigned srcLen, const char * src, unsigned patLen, const char * pat, bool noCase)
  805. {
  806. return wildMatch<char, char_toupper, '?', '*'>(src, srcLen, pat, patLen, noCase);
  807. }
  808. STRINGLIB_API bool STRINGLIB_CALL slStringContains(unsigned srcLen, const char * src, unsigned patLen, const char * pat, bool noCase)
  809. {
  810. unsigned char srcCount[256];
  811. memset(srcCount, 0, 256);
  812. while (srcLen && src[srcLen-1]==' ')
  813. srcLen--;
  814. while(srcLen-- > 0)
  815. {
  816. byte c = *src++;
  817. if (noCase)
  818. c = toupper(c);
  819. srcCount[c]++;
  820. }
  821. while (patLen && pat[patLen-1]==' ')
  822. patLen--;
  823. while(patLen-- > 0)
  824. {
  825. byte c = *pat++;
  826. if (noCase)
  827. c = toupper(c);
  828. if (srcCount[c] == 0)
  829. return false;
  830. else
  831. srcCount[c]--;
  832. }
  833. return true;
  834. }
  835. STRINGLIB_API unsigned STRINGLIB_CALL slEditDistanceV2(unsigned leftLen, const char * left, unsigned rightLen, const char * right)
  836. {
  837. return nsStringlib::editDistance(leftLen, left, rightLen, right);
  838. }
  839. STRINGLIB_API unsigned STRINGLIB_CALL slEditDistanceV3(unsigned leftLen, const char * left, unsigned rightLen, const char * right, unsigned radius)
  840. {
  841. if (radius == 0)
  842. return nsStringlib::editDistance(leftLen, left, rightLen, right);
  843. else
  844. return nsStringlib::editDistanceWithinRadius(leftLen, left, rightLen, right, radius);
  845. }
  846. STRINGLIB_API bool STRINGLIB_CALL slEditDistanceWithinRadiusV2(unsigned leftLen, const char * left, unsigned rightLen, const char * right, unsigned radius)
  847. {
  848. return nsStringlib::editDistanceWithinRadius(leftLen, left, rightLen, right, radius) <= radius;
  849. }
  850. inline bool isWordSeparator(char x)
  851. {
  852. return (unsigned char)x <= 0x20;
  853. }
  854. STRINGLIB_API void STRINGLIB_CALL slStringGetNthWord(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned n)
  855. {
  856. const char* start = 0;
  857. const char* end = 0;
  858. // skip any leading white space
  859. while (srcLen>0 && isWordSeparator(*src)) {
  860. src++;
  861. srcLen--;
  862. }
  863. while (srcLen>0 && n>0) {
  864. start = src;
  865. n--;
  866. // go to the next white space
  867. while (srcLen>0 && !isWordSeparator(*src)) {
  868. src++;
  869. srcLen--;
  870. }
  871. end = src;
  872. // skip white space again
  873. while (srcLen>0 && isWordSeparator(*src)) {
  874. src++;
  875. srcLen--;
  876. }
  877. }
  878. if (!n && (end-start)) {
  879. tgt = (char *)CTXMALLOC(parentCtx, end-start);
  880. memcpy(tgt,start,end-start);
  881. tgtLen = end-start;
  882. } else {
  883. tgt = 0;
  884. tgtLen = 0;
  885. }
  886. }
  887. STRINGLIB_API void STRINGLIB_CALL slStringRepeat(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned n)
  888. {
  889. char * buffer = NULL;
  890. if ((int) n < 0)
  891. rtlFail(0, "Invalid parameter to StringLib.StringRepeat");
  892. if (n == 0 || (srcLen == 0))
  893. {
  894. tgtLen = 0;
  895. }
  896. else
  897. {
  898. tgtLen = srcLen*n;
  899. if (tgtLen/n != srcLen) // Check did not overflow
  900. rtlFail(0, "Invalid parameter to StringLib.StringRepeat");
  901. buffer = (char *)CTXMALLOC(parentCtx, tgtLen);
  902. if (!buffer)
  903. rtlThrowOutOfMemory(0, "In StringLib.StringRepeat");
  904. if (srcLen == 1)
  905. {
  906. memset(buffer, *src, n);
  907. }
  908. else
  909. {
  910. for (unsigned i = 0; i < n; ++i)
  911. {
  912. memcpy(buffer + i*srcLen, src, srcLen);
  913. }
  914. }
  915. }
  916. tgt = buffer;
  917. }
  918. STRINGLIB_API unsigned STRINGLIB_CALL slStringWordCount(unsigned srcLen,const char * src)
  919. {
  920. // skip any leading white space
  921. unsigned word_count = 0;
  922. while (srcLen>0 && isWordSeparator(*src)) {
  923. src++;
  924. srcLen--;
  925. }
  926. while (srcLen>0) {
  927. word_count++;
  928. // go to the next white space
  929. while (srcLen>0 && !isWordSeparator(*src)) {
  930. src++;
  931. srcLen--;
  932. }
  933. // skip white space again
  934. while (srcLen>0 && isWordSeparator(*src)) {
  935. src++;
  936. srcLen--;
  937. }
  938. }
  939. return word_count;
  940. }
  941. STRINGLIB_API void STRINGLIB_CALL slStringExcludeLastWord(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src)
  942. {
  943. //Remove first word also removes leading whitespace, otherwise just remove trailing whitespace
  944. unsigned idx = 0;
  945. unsigned startLast = 0;
  946. while (idx < srcLen && isWordSeparator(src[idx]))
  947. idx++;
  948. for (;;)
  949. {
  950. while (idx < srcLen && !isWordSeparator(src[idx]))
  951. idx++;
  952. while (idx < srcLen && isWordSeparator(src[idx]))
  953. idx++;
  954. if (idx == srcLen)
  955. break;
  956. startLast = idx;
  957. }
  958. unsigned len = startLast;
  959. tgtLen = len;
  960. if (len)
  961. {
  962. tgt = (char *)CTXMALLOC(parentCtx, len);
  963. memcpy(tgt,src,len);
  964. }
  965. else
  966. tgt = NULL;
  967. }
  968. STRINGLIB_API void STRINGLIB_CALL slStringExcludeNthWord(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned n)
  969. {
  970. unsigned idx = 0;
  971. unsigned startLast = 0;
  972. while (idx < srcLen && isWordSeparator(src[idx]))
  973. idx++;
  974. unsigned matchIndex = 0;
  975. //Remove first word also removes leading whitespace, otherwise just remove trailing whitespace
  976. //No matching words returns a blank string
  977. if (idx != srcLen)
  978. {
  979. for (;;)
  980. {
  981. while (idx < srcLen && !isWordSeparator(src[idx]))
  982. idx++;
  983. while (idx < srcLen && isWordSeparator(src[idx]))
  984. idx++;
  985. if (++matchIndex == n)
  986. break;
  987. startLast = idx;
  988. if (idx == srcLen)
  989. break;
  990. }
  991. }
  992. unsigned len = startLast + (srcLen - idx);
  993. tgtLen = len;
  994. if (len)
  995. {
  996. tgt = (char *)CTXMALLOC(parentCtx, len);
  997. memcpy_iflen(tgt,src,startLast);
  998. memcpy_iflen(tgt+startLast,src+idx,(srcLen - idx));
  999. }
  1000. else
  1001. tgt = NULL;
  1002. }
  1003. //--------------------------------------------------------------------------------------------------------------------
  1004. STRINGLIB_API unsigned STRINGLIB_CALL slCountWords(size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems)
  1005. {
  1006. if (lenSrc == 0)
  1007. return 0;
  1008. if ((lenSeparator == 0) || (lenSrc < lenSeparator))
  1009. return 1;
  1010. unsigned numWords=0;
  1011. const char * end = src + lenSrc;
  1012. const char * max = end - (lenSeparator - 1);
  1013. const char * cur = src;
  1014. const char * startWord = NULL;
  1015. //MORE: optimize lenSeparator == 1!
  1016. while (cur < max)
  1017. {
  1018. if (memcmp(cur, separator, lenSeparator) == 0)
  1019. {
  1020. if (startWord || allowBlankItems)
  1021. {
  1022. numWords++;
  1023. startWord = NULL;
  1024. }
  1025. cur += lenSeparator;
  1026. }
  1027. else
  1028. {
  1029. if (!startWord)
  1030. startWord = cur;
  1031. cur++;
  1032. }
  1033. }
  1034. if (startWord || (cur != end) || allowBlankItems)
  1035. numWords++;
  1036. return numWords;
  1037. }
  1038. static unsigned calcWordSetSize(size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems)
  1039. {
  1040. if (lenSrc == 0)
  1041. return 0;
  1042. if ((lenSeparator == 0) || (lenSrc < lenSeparator))
  1043. return sizeof(size32_t) + lenSrc;
  1044. unsigned sizeWords=0;
  1045. const char * end = src + lenSrc;
  1046. const char * max = end - (lenSeparator - 1);
  1047. const char * cur = src;
  1048. const char * startWord = NULL;
  1049. //MORE: optimize lenSeparator == 1!
  1050. while (cur < max)
  1051. {
  1052. if (memcmp(cur, separator, lenSeparator) == 0)
  1053. {
  1054. if (startWord)
  1055. {
  1056. sizeWords += sizeof(size32_t) + (cur - startWord);
  1057. startWord = NULL;
  1058. }
  1059. else if (allowBlankItems)
  1060. sizeWords += sizeof(size32_t);
  1061. cur += lenSeparator;
  1062. }
  1063. else
  1064. {
  1065. if (!startWord)
  1066. startWord = cur;
  1067. cur++;
  1068. }
  1069. }
  1070. if (startWord || (cur != end) || allowBlankItems)
  1071. {
  1072. if (!startWord)
  1073. startWord = cur;
  1074. sizeWords += sizeof(size32_t) + (end - startWord);
  1075. }
  1076. return sizeWords;
  1077. }
  1078. STRINGLIB_API void STRINGLIB_CALL slSplitWords(bool & __isAllResult, size32_t & __lenResult, void * & __result, size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems)
  1079. {
  1080. unsigned sizeRequired = calcWordSetSize(lenSrc, src, lenSeparator, separator, allowBlankItems);
  1081. char * const result = static_cast<char *>(CTXMALLOC(parentCtx, sizeRequired));
  1082. __isAllResult = false;
  1083. __lenResult = sizeRequired;
  1084. __result = result;
  1085. if (lenSrc == 0)
  1086. return;
  1087. if ((lenSeparator == 0) || (lenSrc < lenSeparator))
  1088. {
  1089. *((size32_t *)result) = lenSrc;
  1090. memcpy_iflen(result+sizeof(size32_t), src, lenSrc);
  1091. return;
  1092. }
  1093. unsigned sizeWords=0;
  1094. char * target = result;
  1095. const char * end = src + lenSrc;
  1096. const char * max = end - (lenSeparator - 1);
  1097. const char * cur = src;
  1098. const char * startWord = NULL;
  1099. //MORE: optimize lenSeparator == 1!
  1100. while (cur < max)
  1101. {
  1102. if (memcmp(cur, separator, lenSeparator) == 0)
  1103. {
  1104. if (startWord || allowBlankItems)
  1105. {
  1106. size32_t len = startWord ? (cur - startWord) : 0;
  1107. memcpy(target, &len, sizeof(len));
  1108. memcpy_iflen(target+sizeof(size32_t), startWord, len);
  1109. target += sizeof(size32_t) + len;
  1110. startWord = NULL;
  1111. }
  1112. cur += lenSeparator;
  1113. }
  1114. else
  1115. {
  1116. if (!startWord)
  1117. startWord = cur;
  1118. cur++;
  1119. }
  1120. }
  1121. if (startWord || (cur != end) || allowBlankItems)
  1122. {
  1123. if (!startWord)
  1124. startWord = cur;
  1125. size32_t len = (end - startWord);
  1126. memcpy(target, &len, sizeof(len));
  1127. memcpy_iflen(target+sizeof(size32_t), startWord, len);
  1128. target += sizeof(size32_t) + len;
  1129. }
  1130. assert(target == result + sizeRequired);
  1131. // ctx->fail(1, "Size mismatch in StringLib.SplitWords");
  1132. }
  1133. static unsigned countWords(size32_t lenSrc, const char * src)
  1134. {
  1135. unsigned count = 0;
  1136. unsigned offset = 0;
  1137. while (offset < lenSrc)
  1138. {
  1139. size32_t len;
  1140. memcpy(&len, src+offset, sizeof(len));
  1141. offset += sizeof(len) + len;
  1142. count++;
  1143. }
  1144. return count;
  1145. }
  1146. STRINGLIB_API void STRINGLIB_CALL slCombineWords(size32_t & __lenResult, void * & __result, bool isAllSrc, size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems)
  1147. {
  1148. if (lenSrc == 0)
  1149. {
  1150. __lenResult = 0;
  1151. __result = NULL;
  1152. return;
  1153. }
  1154. unsigned numWords = countWords(lenSrc, src);
  1155. size32_t sizeRequired = lenSrc - numWords * sizeof(size32_t) + (numWords-1) * lenSeparator;
  1156. char * const result = static_cast<char *>(CTXMALLOC(parentCtx, sizeRequired));
  1157. __lenResult = sizeRequired;
  1158. __result = result;
  1159. char * target = result;
  1160. unsigned offset = 0;
  1161. while (offset < lenSrc)
  1162. {
  1163. if ((offset != 0) && lenSeparator)
  1164. {
  1165. memcpy(target, separator, lenSeparator);
  1166. target += lenSeparator;
  1167. }
  1168. size32_t len;
  1169. memcpy(&len, src+offset, sizeof(len));
  1170. offset += sizeof(len);
  1171. memcpy_iflen(target, src+offset, len);
  1172. target += len;
  1173. offset += len;
  1174. }
  1175. assert(target == result + sizeRequired);
  1176. }
  1177. //--------------------------------------------------------------------------------------------------------------------
  1178. inline unsigned makeDate(const tm & tm)
  1179. {
  1180. return (tm.tm_year + 1900) * 10000 + (tm.tm_mon + 1) * 100 + tm.tm_mday;
  1181. }
  1182. inline unsigned makeTimeOfDay(const tm & tm)
  1183. {
  1184. return (tm.tm_hour * 10000) + (tm.tm_min * 100) + tm.tm_sec;
  1185. }
  1186. inline void extractDate(tm & tm, unsigned date)
  1187. {
  1188. tm.tm_year = (date / 10000) - 1900;
  1189. tm.tm_mon = ((date / 100) % 100) - 1;
  1190. tm.tm_mday = (date % 100);
  1191. // To proper initialisation of tm
  1192. mktime(&tm);
  1193. }
  1194. STRINGLIB_API unsigned STRINGLIB_CALL slStringToDate(size32_t lenS, const char * s, const char * fmtin)
  1195. {
  1196. struct tm tm;
  1197. memset(&tm, 0, sizeof(tm));
  1198. tm.tm_mday = 1;
  1199. if (simple_strptime(lenS, s, fmtin, &tm))
  1200. return makeDate(tm);
  1201. return 0;
  1202. }
  1203. STRINGLIB_API unsigned STRINGLIB_CALL slStringToTimeOfDay(size32_t lenS, const char * s, const char * fmtin)
  1204. {
  1205. struct tm tm;
  1206. memset(&tm, 0, sizeof(tm));
  1207. if (simple_strptime(lenS, s, fmtin, &tm))
  1208. return makeTimeOfDay(tm);
  1209. return 0;
  1210. }
  1211. STRINGLIB_API unsigned STRINGLIB_CALL slMatchDate(size32_t lenS, const char * s, bool isAllFormats, unsigned lenFormats, const void * _formats)
  1212. {
  1213. struct tm tm;
  1214. const char * formats = (const char *)_formats;
  1215. for (unsigned off=0; off < lenFormats; )
  1216. {
  1217. const char * curFormat = formats+off;
  1218. memset(&tm, 0, sizeof(tm));
  1219. tm.tm_mday = 1;
  1220. if (simple_strptime(lenS, s, curFormat, &tm))
  1221. return makeDate(tm);
  1222. off += strlen(curFormat) + 1;
  1223. }
  1224. return 0;
  1225. }
  1226. STRINGLIB_API unsigned STRINGLIB_CALL slMatchTimeOfDay(size32_t lenS, const char * s, bool isAllFormats, unsigned lenFormats, const void * _formats)
  1227. {
  1228. struct tm tm;
  1229. const char * formats = (const char *)_formats;
  1230. for (unsigned off=0; off < lenFormats; )
  1231. {
  1232. const char * curFormat = formats+off;
  1233. memset(&tm, 0, sizeof(tm));
  1234. if (simple_strptime(lenS, s, curFormat, &tm))
  1235. return makeTimeOfDay(tm);
  1236. off += strlen(curFormat) + 1;
  1237. }
  1238. return 0;
  1239. }
  1240. STRINGLIB_API void STRINGLIB_CALL slFormatDate(size32_t & __lenResult, char * & __result, unsigned date, const char * format)
  1241. {
  1242. size32_t len = 0;
  1243. char * out = NULL;
  1244. if (date)
  1245. {
  1246. struct tm tm;
  1247. memset(&tm, 0, sizeof(tm));
  1248. extractDate(tm, date);
  1249. char buf[255];
  1250. #if defined(__clang__) || defined(__GNUC__)
  1251. #pragma GCC diagnostic push
  1252. #pragma GCC diagnostic ignored "-Wformat-nonliteral"
  1253. #endif
  1254. strftime(buf, sizeof(buf), format, &tm);
  1255. #if defined(__clang__) || defined(__GNUC__)
  1256. #pragma GCC diagnostic pop
  1257. #endif
  1258. len = strlen(buf);
  1259. out = static_cast<char *>(CTXMALLOC(parentCtx, len));
  1260. memcpy_iflen(out, buf, len);
  1261. }
  1262. __lenResult = len;
  1263. __result = out;
  1264. }
  1265. //--------------------------------------------------------------------------------------------------------------------
  1266. //--------------------------------------------------------------------------------------------------------------------
  1267. //--------------------------------------------------------------------------------------------------------------------
  1268. // Legacy functions that only work on fixed length strings
  1269. //--------------------------------------------------------------------------------------------------------------------
  1270. //--------------------------------------------------------------------------------------------------------------------
  1271. //--------------------------------------------------------------------------------------------------------------------
  1272. STRINGLIB_API void STRINGLIB_CALL slStringExtract50(char *tgt, unsigned srcLen, const char * src, unsigned instance)
  1273. {
  1274. unsigned lenret;
  1275. char * resret;
  1276. slStringExtract(lenret,resret,srcLen,src,instance);
  1277. if (lenret >= 50)
  1278. memcpy(tgt,resret,50);
  1279. else
  1280. {
  1281. memcpy_iflen(tgt,resret,lenret);
  1282. memset(tgt+lenret,' ',50-lenret);
  1283. }
  1284. CTXFREE(parentCtx, resret);
  1285. }
  1286. STRINGLIB_API void STRINGLIB_CALL slGetBuildInfo100(char *tgt)
  1287. {
  1288. size32_t len = (size32_t) strlen(STRINGLIB_VERSION);
  1289. if (len >= 100)
  1290. len = 100;
  1291. memcpy(tgt, STRINGLIB_VERSION, len);
  1292. memset(tgt+len, ' ', 100-len);
  1293. }
  1294. // -----------------------------------------------------------------
  1295. STRINGLIB_API void STRINGLIB_CALL slStringToLowerCase80(char *tgt, unsigned srcLen, const char * src)
  1296. {
  1297. unsigned int i;
  1298. for (i=0;i<srcLen && i < 80;i++)
  1299. *tgt++ = tolower(src[i]);
  1300. while (i < 80)
  1301. {
  1302. *tgt++=' ';
  1303. i++;
  1304. }
  1305. }
  1306. // -----------------------------------------------------------------
  1307. STRINGLIB_API void STRINGLIB_CALL slStringToUpperCase80(char *tgt, unsigned srcLen, const char * src)
  1308. {
  1309. unsigned int i;
  1310. for (i=0;i<srcLen && i < 80;i++)
  1311. *tgt++ = toupper(src[i]);
  1312. while (i < 80)
  1313. {
  1314. *tgt++=' ';
  1315. i++;
  1316. }
  1317. }
  1318. // -----------------------------------------------------------------
  1319. STRINGLIB_API void STRINGLIB_CALL slStringFindReplace80(char * tgt, unsigned srcLen, const char * src, unsigned stokLen, const char * stok, unsigned rtokLen, const char * rtok)
  1320. {
  1321. if ( srcLen < stokLen )
  1322. {
  1323. if (srcLen > 80)
  1324. srcLen = 80;
  1325. memcpy_iflen(tgt, src, srcLen);
  1326. if (srcLen < 80)
  1327. memset(tgt+srcLen, ' ', 80 - srcLen);
  1328. }
  1329. else
  1330. {
  1331. unsigned steps = srcLen-stokLen+1;
  1332. unsigned i;
  1333. unsigned lim = 80;
  1334. for ( i = 0; i < steps && lim > 0; )
  1335. {
  1336. if ( !memcmp(src+i,stok,stokLen) )
  1337. {
  1338. if (rtokLen > lim)
  1339. rtokLen = lim;
  1340. memcpy_iflen(tgt, rtok, rtokLen);
  1341. tgt += rtokLen;
  1342. i += stokLen;
  1343. lim -= rtokLen;
  1344. }
  1345. else
  1346. {
  1347. *tgt++ = src[i++];
  1348. lim--;
  1349. }
  1350. }
  1351. while (i < srcLen && lim > 0)
  1352. {
  1353. *tgt++ = src[i++];
  1354. lim--;
  1355. }
  1356. if (lim)
  1357. memset(tgt, ' ', lim);
  1358. }
  1359. }
  1360. STRINGLIB_API void STRINGLIB_CALL slStringCleanSpaces25(char *__ret_str,unsigned _len_instr,const char * instr)
  1361. {
  1362. // remove double spaces
  1363. // Fixed width version for Hole
  1364. unsigned outlen = _len_instr;
  1365. if (outlen < 25)
  1366. outlen = 25;
  1367. char *out = (char *) alloca(outlen);
  1368. char *origout = out;
  1369. bool spacePending = false;
  1370. bool atStart = true;
  1371. for(unsigned idx = 0; idx < _len_instr; idx++)
  1372. {
  1373. char c = *instr++;
  1374. switch (c)
  1375. {
  1376. case ' ':
  1377. case '\t':
  1378. spacePending = true;
  1379. break;
  1380. default:
  1381. if (spacePending && !atStart)
  1382. *out++ = ' ';
  1383. spacePending = false;
  1384. atStart = false;
  1385. *out++ = c;
  1386. break;
  1387. }
  1388. }
  1389. unsigned len = (size32_t)(out-origout);
  1390. if (len < 25)
  1391. memset(out, ' ', 25 - len);
  1392. memcpy(__ret_str, origout, 25);
  1393. }
  1394. STRINGLIB_API void STRINGLIB_CALL slStringCleanSpaces80(char *__ret_str,unsigned _len_instr,const char * instr)
  1395. {
  1396. // remove double spaces
  1397. // Another fixed width version for Hole
  1398. unsigned outlen = _len_instr;
  1399. if (outlen < 80)
  1400. outlen = 80;
  1401. char *out = (char *) alloca(outlen);
  1402. char *origout = out;
  1403. bool spacePending = false;
  1404. bool atStart = true;
  1405. for(unsigned idx = 0; idx < _len_instr; idx++)
  1406. {
  1407. char c = *instr++;
  1408. switch (c)
  1409. {
  1410. case ' ':
  1411. case '\t':
  1412. spacePending = true;
  1413. break;
  1414. default:
  1415. if (spacePending && !atStart)
  1416. *out++ = ' ';
  1417. spacePending = false;
  1418. atStart = false;
  1419. *out++ = c;
  1420. break;
  1421. }
  1422. }
  1423. unsigned len = (unsigned)(out-origout);
  1424. if (len < 80)
  1425. memset(out, ' ', 80 - len);
  1426. memcpy(__ret_str, origout, 80);
  1427. }