stringlib.cpp 50 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include <time.h>
  14. #include <stdlib.h>
  15. #include <string.h>
  16. #include <ctype.h>
  17. #include <assert.h>
  18. #include "stringlib.hpp"
  19. #include "wildmatch.tpp"
  20. static const char * compatibleVersions[] = {
  21. "STRINGLIB 1.1.06 [fd997dc3feb4ca385d59a12b9dc4beab]", // windows version
  22. "STRINGLIB 1.1.06 [f8305e66ca26a1447dee66d4a36d88dc]", // linux version
  23. "STRINGLIB 1.1.07",
  24. "STRINGLIB 1.1.08",
  25. "STRINGLIB 1.1.09",
  26. "STRINGLIB 1.1.10",
  27. "STRINGLIB 1.1.11",
  28. "STRINGLIB 1.1.12",
  29. "STRINGLIB 1.1.13",
  30. NULL };
  31. #define STRINGLIB_VERSION "STRINGLIB 1.1.14"
  32. static const char * EclDefinition =
  33. "export StringLib := SERVICE\n"
  34. " string StringFilterOut(const string src, const string _within) : c, pure,entrypoint='slStringFilterOut'; \n"
  35. " string StringFilter(const string src, const string _within) : c, pure,entrypoint='slStringFilter'; \n"
  36. " string StringSubstituteOut(const string src, const string _within, const string _newchar) : c, pure,entrypoint='slStringSubsOut'; \n"
  37. " string StringSubstitute(const string src, const string _within, const string _newchar) : c, pure,entrypoint='slStringSubs'; \n"
  38. " string StringRepad(const string src, unsigned4 size) : c, pure,entrypoint='slStringRepad'; \n"
  39. " string StringTranslate(const string src, const string _within, const string _mapping) : c, pure,entrypoint='slStringTranslate'; \n"
  40. " unsigned integer4 StringFind(const string src, const string tofind, unsigned4 instance ) : c, pure,entrypoint='slStringFind'; \n"
  41. " unsigned integer4 StringUnboundedUnsafeFind(const string src, const string tofind ) : c, pure,entrypoint='slStringFind2'; \n"
  42. " unsigned integer4 StringFindCount(const string src, const string tofind) : c, pure,entrypoint='slStringFindCount'; \n"
  43. " unsigned integer4 EbcdicStringFind(const ebcdic string src, const ebcdic string tofind , unsigned4 instance ) : c,pure,entrypoint='slStringFind'; \n"
  44. " unsigned integer4 EbcdicStringUnboundedUnsafeFind(const ebcdic string src, const ebcdic string tofind ) : c,pure,entrypoint='slStringFind2'; \n"
  45. " string StringExtract(const string src, unsigned4 instance) : c,pure,entrypoint='slStringExtract'; \n"
  46. " string8 GetDateYYYYMMDD() : c,once,entrypoint='slGetDateYYYYMMDD2';\n"
  47. " varstring GetBuildInfo() : c,once,entrypoint='slGetBuildInfo';\n"
  48. " string Data2String(const data src) : c,pure,entrypoint='slData2String';\n"
  49. " data String2Data(const string src) : c,pure,entrypoint='slString2Data';\n"
  50. " string StringToLowerCase(const string src) : c,pure,entrypoint='slStringToLowerCase';\n"
  51. " string StringToUpperCase(const string src) : c,pure,entrypoint='slStringToUpperCase';\n"
  52. " string StringToProperCase(const string src) : c,pure,entrypoint='slStringToProperCase';\n"
  53. " string StringToCapitalCase(const string src) : c,pure,entrypoint='slStringToCapitalCase';\n"
  54. " string StringToTitleCase(const string src) : c,pure,entrypoint='slStringToTitleCase';\n"
  55. " integer4 StringCompareIgnoreCase(const string src1, string src2) : c,pure,entrypoint='slStringCompareIgnoreCase';\n"
  56. " string StringReverse(const string src) : c,pure,entrypoint='slStringReverse';\n"
  57. " string StringFindReplace(const string src, const string stok, const string rtok) : c,pure,entrypoint='slStringFindReplace';\n"
  58. " string StringCleanSpaces(const string src) : c,pure,entrypoint='slStringCleanSpaces'; \n"
  59. " boolean StringWildMatch(const string src, const string _pattern, boolean _noCase) : c, pure,entrypoint='slStringWildMatch'; \n"
  60. " boolean StringWildExactMatch(const string src, const string _pattern, boolean _noCase) : c, pure,entrypoint='slStringWildExactMatch'; \n"
  61. " boolean StringContains(const string src, const string _pattern, boolean _noCase) : c, pure,entrypoint='slStringContains'; \n"
  62. " string StringExtractMultiple(const string src, unsigned8 mask) : c,pure,entrypoint='slStringExtractMultiple'; \n"
  63. " unsigned integer4 EditDistance(const string l, const string r) : c, pure,entrypoint='slEditDistanceV2'; \n"
  64. " boolean EditDistanceWithinRadius(const string l, const string r, unsigned4 radius) : c,pure,entrypoint='slEditDistanceWithinRadiusV2'; \n"
  65. " unsigned integer4 EditDistanceV2(const string l, const string r) : c, pure,entrypoint='slEditDistanceV2'; \n"
  66. " boolean EditDistanceWithinRadiusV2(const string l, const string r, unsigned4 radius) : c,pure,entrypoint='slEditDistanceWithinRadiusV2'; \n"
  67. " string StringGetNthWord(const string src, unsigned4 n) : c, pure,entrypoint='slStringGetNthWord'; \n"
  68. " string StringExcludeLastWord(const string src) : c, pure,entrypoint='slStringExcludeLastWord'; \n"
  69. " string StringExcludeNthWord(const string src, unsigned4 n) : c, pure,entrypoint='slStringExcludeNthWord'; \n"
  70. " unsigned4 StringWordCount(const string src) : c, pure,entrypoint='slStringWordCount'; \n"
  71. " unsigned4 CountWords(const string src, const string _separator, BOOLEAN allow_blanks) : c, pure,entrypoint='slCountWords'; \n"
  72. " SET OF STRING SplitWords(const string src, const string _separator, BOOLEAN allow_blanks) : c, pure,entrypoint='slSplitWords'; \n"
  73. " STRING CombineWords(set of string src, const string _separator) : c, pure,entrypoint='slCombineWords'; \n"
  74. " UNSIGNED4 StringToDate(const string src, const varstring format) : c, pure,entrypoint='slStringToDate'; \n"
  75. " UNSIGNED4 MatchDate(const string src, set of varstring formats) : c, pure,entrypoint='slMatchDate'; \n"
  76. " STRING FormatDate(UNSIGNED4 date, const varstring format) : c, pure,entrypoint='slFormatDate'; \n"
  77. " STRING StringRepeat(const string src, unsigned4 n) : c, pure,entrypoint='slStringRepeat'; \n"
  78. "END;";
  79. STRINGLIB_API bool getECLPluginDefinition(ECLPluginDefinitionBlock *pb)
  80. {
  81. if (pb->size == sizeof(ECLPluginDefinitionBlockEx))
  82. {
  83. ECLPluginDefinitionBlockEx * pbx = (ECLPluginDefinitionBlockEx *) pb;
  84. pbx->compatibleVersions = compatibleVersions;
  85. }
  86. else if (pb->size != sizeof(ECLPluginDefinitionBlock))
  87. return false;
  88. pb->magicVersion = PLUGIN_VERSION;
  89. pb->version = STRINGLIB_VERSION;
  90. pb->moduleName = "lib_stringlib";
  91. pb->ECL = EclDefinition;
  92. pb->flags = PLUGIN_IMPLICIT_MODULE | PLUGIN_MULTIPLE_VERSIONS;
  93. pb->description = "StringLib string manipulation library";
  94. return true;
  95. }
  96. namespace nsStringlib {
  97. IPluginContext * parentCtx = NULL;
  98. enum { bitsInUnsigned = sizeof(unsigned) * 8 };
  99. static const char hexchar[] = "0123456789ABCDEF";
  100. static unsigned hex2digit(char c)
  101. {
  102. switch (c)
  103. {
  104. default: case 0: return 0;
  105. case '1': return 1;
  106. case '2': return 2;
  107. case '3': return 3;
  108. case '4': return 4;
  109. case '5': return 5;
  110. case '6': return 6;
  111. case '7': return 7;
  112. case '8': return 8;
  113. case '9': return 9;
  114. case 'a': case 'A': return 10;
  115. case 'b': case 'B': return 11;
  116. case 'c': case 'C': return 12;
  117. case 'd': case 'D': return 13;
  118. case 'e': case 'E': return 14;
  119. case 'f': case 'F': return 15;
  120. }
  121. }
  122. inline char char_toupper(char c) { return (char)toupper(c); }
  123. inline void clip(unsigned &len, const char * s)
  124. {
  125. while ( len > 0 && s[len-1]==' ' )
  126. len--;
  127. }
  128. inline unsigned min3(unsigned a, unsigned b, unsigned c)
  129. {
  130. unsigned mi;
  131. mi = a;
  132. if (b < mi)
  133. {
  134. mi = b;
  135. }
  136. if (c < mi)
  137. {
  138. mi = c;
  139. }
  140. return mi;
  141. }
  142. //--- Optimized versions of the edit distance functions
  143. inline unsigned mask(unsigned x) { return x & 1; }
  144. unsigned editDistance(unsigned leftLen, const char * left, unsigned rightLen, const char * right)
  145. {
  146. unsigned i, j;
  147. clip(leftLen, left);
  148. clip(rightLen, right);
  149. if (leftLen > 255)
  150. leftLen = 255;
  151. if (rightLen > 255)
  152. rightLen = 255;
  153. if (leftLen == 0)
  154. return rightLen;
  155. if (rightLen == 0)
  156. return leftLen;
  157. //Optimize the storage requirements by
  158. //i) Only storing two stripes
  159. //ii) Calculate, but don't store the row comparing against the null string
  160. unsigned char da[2][256];
  161. char r_0 = right[0];
  162. char l_0 = left[0];
  163. bool matched_l0 = false;
  164. for (j = 0; j < rightLen; j++)
  165. {
  166. if (right[j] == l_0) matched_l0 = true;
  167. da[0][j] = (matched_l0) ? j : j+1;
  168. }
  169. bool matched_r0 = (l_0 == r_0);
  170. for (i = 1; i < leftLen; i++)
  171. {
  172. char l_i = left[i];
  173. if (l_i == r_0)
  174. matched_r0 = true;
  175. byte da_i_0 = matched_r0 ? i : i+1;
  176. da[mask(i)][0] = da_i_0;
  177. byte da_i_prevj = da_i_0;
  178. for (j = 1; j < rightLen; j++)
  179. {
  180. char r_j = right[j];
  181. unsigned char next = (l_i == r_j) ? da[mask(i-1)][j-1] :
  182. min3(da[mask(i-1)][j], da_i_prevj, da[mask(i-1)][j-1]) + 1;
  183. da[mask(i)][j] = next;
  184. da_i_prevj = next;
  185. }
  186. }
  187. return da[mask(leftLen-1)][rightLen-1];
  188. }
  189. //This could be further improved in the following ways:
  190. // * Only use 2*radius bytes of temporary storage - I doubt it is worth it.
  191. // * special case edit1 - you could use variables for the 6 interesting array elements, and get
  192. // rid of the array completely. You could also unwind the first (and last iterations).
  193. // * I suspect the early exit condition could be improved depending the lengths of the strings.
  194. extern STRINGLIB_API unsigned editDistanceWithinRadius(unsigned leftLen, const char * left, unsigned rightLen, const char * right, unsigned radius)
  195. {
  196. if (radius >= 255)
  197. return 255;
  198. clip(leftLen, left);
  199. clip(rightLen, right);
  200. unsigned minED = (leftLen < rightLen)? rightLen - leftLen: leftLen - rightLen;
  201. if (minED > radius)
  202. return minED;
  203. if (leftLen > 255)
  204. leftLen = 255;
  205. if (rightLen > 255)
  206. rightLen = 255;
  207. //Checking for leading common substrings actually slows the function down.
  208. if (leftLen == 0)
  209. return rightLen;
  210. if (rightLen == 0)
  211. return leftLen;
  212. /*
  213. This function applies two optimizations over the function above.
  214. a) Adding a charcter (next row) can at most decrease the edit distance by 1, so short circuit when
  215. we there is no possiblity of getting within the distance.
  216. b) We only need to evaluate the martix da[i-radius..i+radius][j-radius..j+radius]
  217. not taking into account values outside that range [can use max value to prevent access]
  218. */
  219. //Optimize the storage requirements by
  220. //i) Only storing two stripes
  221. //ii) Calculate, but don't store the row comparing against the null string
  222. unsigned char da[2][256];
  223. char r_0 = right[0];
  224. char l_0 = left[0];
  225. bool matched_l0 = false;
  226. for (unsigned j = 0; j < rightLen; j++)
  227. {
  228. if (right[j] == l_0) matched_l0 = true;
  229. da[0][j] = (matched_l0) ? j : j+1;
  230. }
  231. bool matched_r0 = (l_0 == r_0);
  232. for (unsigned i = 1; i < leftLen; i++)
  233. {
  234. char l_i = left[i];
  235. if (l_i == r_0)
  236. matched_r0 = true;
  237. byte da_i_0 = matched_r0 ? i : i+1;
  238. da[mask(i)][0] = da_i_0;
  239. byte da_i_prevj = da_i_0;
  240. unsigned low = i-radius;
  241. unsigned high = i+radius;
  242. unsigned first = (i > radius) ? low : 1;
  243. unsigned last = (high >= rightLen) ? rightLen : high +1;
  244. for (unsigned j = first; j < last; j++)
  245. {
  246. char r_j = right[j];
  247. unsigned next = da[mask(i-1)][j-1];
  248. if (l_i != r_j)
  249. {
  250. if (j != low)
  251. {
  252. if (next > da_i_prevj)
  253. next = da_i_prevj;
  254. }
  255. if (j != high)
  256. {
  257. byte da_previ_j = da[mask(i-1)][j];
  258. if (next > da_previ_j)
  259. next = da_previ_j;
  260. }
  261. next++;
  262. }
  263. da[mask(i)][j] = next;
  264. da_i_prevj = next;
  265. }
  266. // bail out early if ed can't possibly be <= radius
  267. // Only considering a strip down the middle of the matrix, so the maximum the score can ever be adjusted is 2xradius
  268. unsigned max_valid_score = 3*radius;
  269. // But maximum is also 1 for every difference in string length - comes in to play when close to the end.
  270. //In 32bit goes slower for radius=1 I suspect because running out of registers. Retest in 64bit.
  271. if (radius > 1)
  272. {
  273. unsigned max_distance = radius + (leftLen - (i+1)) + (rightLen - last);
  274. if (max_valid_score > max_distance)
  275. max_valid_score = max_distance;
  276. }
  277. if (da_i_prevj > max_valid_score)
  278. return da_i_prevj;
  279. }
  280. return da[mask(leftLen-1)][rightLen-1];
  281. }
  282. } // namespace
  283. //-------------------------------------------------------------------------------------------------------------------------------------------
  284. // Exported functions are NOT in the namespace
  285. using namespace nsStringlib;
  286. STRINGLIB_API void setPluginContext(IPluginContext * _ctx) { parentCtx = _ctx; }
  287. STRINGLIB_API void STRINGLIB_CALL slStringFilterOut(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned hitLen, const char * hit)
  288. {
  289. char *temp = (char *)CTXMALLOC(parentCtx, srcLen);
  290. unsigned tlen = 0;
  291. if (hitLen==1)
  292. {
  293. char test = *hit;
  294. for ( unsigned i = 0; i < srcLen; i++ )
  295. {
  296. char c = src[i];
  297. if (c!=test)
  298. temp[tlen++] = c;
  299. }
  300. }
  301. else {
  302. unsigned filter[256/bitsInUnsigned];
  303. memset(filter,0,sizeof(filter));
  304. for (unsigned j = 0; j < hitLen; j++ )
  305. {
  306. unsigned c = (unsigned char)hit[j];
  307. filter[c/bitsInUnsigned] |= (1<<(c%bitsInUnsigned));
  308. }
  309. for ( unsigned i = 0; i < srcLen; i++ )
  310. {
  311. unsigned c = (unsigned char)src[i];
  312. if ((filter[c/bitsInUnsigned] & (1<<(c%bitsInUnsigned))) == 0)
  313. temp[tlen++] = (char)c;
  314. }
  315. }
  316. tgt = (char *)CTXREALLOC(parentCtx, temp, tlen);
  317. tgtLen = tlen;
  318. }
  319. STRINGLIB_API void STRINGLIB_CALL slStringFilter(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned hitLen, const char * hit)
  320. {
  321. char *temp = (char *)CTXMALLOC(parentCtx, srcLen);
  322. unsigned tlen = 0;
  323. unsigned filter[256/bitsInUnsigned];
  324. memset(filter,0,sizeof(filter));
  325. for (unsigned j = 0; j < hitLen; j++ )
  326. {
  327. unsigned c = (unsigned char)hit[j];
  328. filter[c/bitsInUnsigned] |= (1<<(c%bitsInUnsigned));
  329. }
  330. for ( unsigned i = 0; i < srcLen; i++ )
  331. {
  332. unsigned c = (unsigned char)src[i];
  333. if ((filter[c/bitsInUnsigned] & (1<<(c%bitsInUnsigned))) != 0)
  334. temp[tlen++] = (char)c;
  335. }
  336. tgt = (char *)CTXREALLOC(parentCtx, temp, tlen);
  337. tgtLen = tlen;
  338. }
  339. STRINGLIB_API void STRINGLIB_CALL slStringSubsOut(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned hitLen, const char * hit, unsigned newCharLen, const char * newChar)
  340. {
  341. bool filter[256];
  342. memset(filter,0,sizeof(filter));
  343. for (unsigned j = 0; j < hitLen; j++ )
  344. {
  345. unsigned char c = ((unsigned char *)hit)[j];
  346. filter[c] = true;
  347. }
  348. tgt = (char *)CTXMALLOC(parentCtx, srcLen);
  349. if (newCharLen > 0)
  350. {
  351. for ( unsigned i = 0; i < srcLen; i++ )
  352. {
  353. unsigned char c = ((unsigned char *)src)[i];
  354. if (!filter[c])
  355. tgt[i] = c;
  356. else
  357. tgt[i] = ((char *)newChar)[0];
  358. }
  359. }
  360. else
  361. {
  362. memcpy(tgt, src, srcLen);
  363. }
  364. tgtLen = srcLen;
  365. }
  366. STRINGLIB_API void STRINGLIB_CALL slStringSubs(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned hitLen, const char * hit, unsigned newCharLen, const char * newChar)
  367. {
  368. bool filter[256];
  369. memset(filter,0,sizeof(filter));
  370. for (unsigned j = 0; j < hitLen; j++ )
  371. {
  372. unsigned char c = ((unsigned char *)hit)[j];
  373. filter[c] = true;
  374. }
  375. tgt = (char *)CTXMALLOC(parentCtx, srcLen);
  376. if (newCharLen > 0)
  377. {
  378. for ( unsigned i = 0; i < srcLen; i++ )
  379. {
  380. unsigned char c = ((unsigned char *)src)[i];
  381. if (filter[c])
  382. tgt[i] = c;
  383. else
  384. tgt[i] = ((char *)newChar)[0];
  385. }
  386. }
  387. else
  388. {
  389. memcpy(tgt, src, srcLen);
  390. }
  391. tgtLen = srcLen;
  392. }
  393. STRINGLIB_API void STRINGLIB_CALL slStringTranslate(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned hitLen, const char * hit, unsigned mappingLen, const char * mapping)
  394. {
  395. char mapped[256];
  396. for (unsigned i=0; i < sizeof(mapped); i++)
  397. mapped[i] = i;
  398. if (hitLen == mappingLen)
  399. {
  400. for (unsigned j = 0; j < hitLen; j++ )
  401. {
  402. unsigned char c = ((unsigned char *)hit)[j];
  403. mapped[c] = mapping[j];
  404. }
  405. }
  406. char * ret = (char *)CTXMALLOC(parentCtx, srcLen);
  407. for ( unsigned i = 0; i < srcLen; i++ )
  408. {
  409. unsigned char c = ((unsigned char *)src)[i];
  410. ret[i] = mapped[c];
  411. }
  412. tgt = ret;
  413. tgtLen = srcLen;
  414. }
  415. STRINGLIB_API void STRINGLIB_CALL slStringRepad(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned tLen)
  416. {
  417. char *base = (char *)src;
  418. while ( srcLen && *base == ' ' )
  419. {
  420. srcLen--;
  421. base++;
  422. }
  423. while ( srcLen && base[srcLen-1] == ' ' )
  424. srcLen--;
  425. if ( srcLen > tLen )
  426. srcLen = tLen;
  427. tgt = (char *)CTXMALLOC(parentCtx, tLen);
  428. tgtLen = tLen;
  429. memcpy(tgt,base,srcLen);
  430. memset(tgt+srcLen,' ',tLen-srcLen);
  431. }
  432. STRINGLIB_API unsigned STRINGLIB_CALL slStringFind(unsigned srcLen, const char * src, unsigned hitLen, const char * hit, unsigned instance)
  433. {
  434. if ( srcLen < hitLen )
  435. return 0;
  436. if (hitLen==1) { // common case optimization
  437. const char *p=src;
  438. const char *e = p+srcLen;
  439. char c = *hit;
  440. while (p!=e)
  441. if ((*(p++)==c))
  442. if (!--instance)
  443. return (unsigned)(p-src);
  444. }
  445. else
  446. {
  447. unsigned steps = srcLen-hitLen+1;
  448. for ( unsigned i = 0; i < steps; i++ )
  449. {
  450. if ( !memcmp((char *)src+i,hit,hitLen) )
  451. {
  452. if ( !--instance )
  453. return i+1;
  454. if (hitLen > 1)
  455. i += (hitLen-1);
  456. }
  457. }
  458. }
  459. return 0;
  460. }
  461. STRINGLIB_API unsigned STRINGLIB_CALL slStringFindCount(unsigned srcLen, const char * src, unsigned hitLen, const char * hit)
  462. {
  463. if ( srcLen < hitLen )
  464. return 0;
  465. unsigned matches = 0;
  466. if (hitLen==1) { // common case optimization
  467. const char *p=src;
  468. const char *e = p+srcLen;
  469. char c = *hit;
  470. while (p!=e)
  471. if ((*(p++)==c))
  472. matches++;
  473. }
  474. else
  475. {
  476. unsigned steps = srcLen-hitLen+1;
  477. for ( unsigned i = 0; i < steps; i++ )
  478. {
  479. if ( !memcmp((char *)src+i,hit,hitLen) )
  480. {
  481. matches++;
  482. if (hitLen > 1)
  483. i += (hitLen-1);
  484. }
  485. }
  486. }
  487. return matches;
  488. }
  489. STRINGLIB_API unsigned STRINGLIB_CALL slStringFind2(unsigned /*srcLen*/, const char * src, unsigned hitLen, const char * hit)
  490. {
  491. if (hitLen==1) { // common case optimization
  492. const char *p=src;
  493. char c = *hit;
  494. while (*(p++)!=c);
  495. return (unsigned)(p-src);
  496. }
  497. for ( unsigned i = 0; ; i++ )
  498. if ( !memcmp((char *)src+i,hit,hitLen) )
  499. return i+1;
  500. return 0;
  501. }
  502. STRINGLIB_API void STRINGLIB_CALL slStringExtract(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned instance)
  503. {
  504. tgtLen = 0;
  505. tgt = NULL;
  506. char * finger = (char *)src;
  507. if ( !instance )
  508. return;
  509. while ( --instance )
  510. {
  511. while ( srcLen && *finger != ',' )
  512. {
  513. srcLen--;
  514. finger++;
  515. }
  516. if ( !srcLen )
  517. return;
  518. srcLen--; // Skip ,
  519. finger++;
  520. }
  521. unsigned len = 0;
  522. for ( ; len < srcLen; len++ )
  523. if ( finger[len] == ',' )
  524. break;
  525. tgt = (char *)CTXMALLOC(parentCtx, len);
  526. memcpy(tgt,finger,len);
  527. tgtLen = len;
  528. }
  529. STRINGLIB_API void STRINGLIB_CALL slStringExtractMultiple(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned __int64 mask)
  530. {
  531. tgtLen = 0;
  532. tgt = NULL;
  533. char * finger = (char *)src;
  534. unsigned __int64 thisInstance = 1;
  535. while (mask)
  536. {
  537. while ( srcLen && *finger != ',' )
  538. {
  539. srcLen--;
  540. finger++;
  541. }
  542. if (mask & thisInstance)
  543. {
  544. mask &= ~thisInstance;
  545. unsigned matchLen = (unsigned)(finger - src);
  546. if (!tgt)
  547. tgt = (char *) CTXMALLOC(parentCtx, matchLen + srcLen);
  548. else
  549. tgt[tgtLen++] = ',';
  550. memcpy(tgt+tgtLen, src, finger - src);
  551. tgtLen += matchLen;
  552. }
  553. thisInstance <<= 1;
  554. if ( !srcLen )
  555. break;
  556. srcLen--; // Skip the ','
  557. finger++;
  558. src = finger;
  559. }
  560. }
  561. STRINGLIB_API char * STRINGLIB_CALL slGetDateYYYYMMDD(void)
  562. {
  563. char * result = (char *)CTXMALLOC(parentCtx, 9);
  564. time_t ltime;
  565. time( &ltime );
  566. tm *today = localtime( &ltime );
  567. strftime(result, 9, "%Y%m%d", today);
  568. return result;
  569. }
  570. STRINGLIB_API void STRINGLIB_CALL slGetDateYYYYMMDD2(char * ret)
  571. {
  572. char temp[9];
  573. time_t ltime;
  574. time( &ltime );
  575. tm *today = localtime( &ltime );
  576. strftime(temp, 9, "%Y%m%d", today);
  577. memcpy(ret, temp, 8);
  578. }
  579. STRINGLIB_API char * STRINGLIB_CALL slGetBuildInfo(void)
  580. {
  581. return CTXSTRDUP(parentCtx, STRINGLIB_VERSION);
  582. }
  583. STRINGLIB_API void STRINGLIB_CALL slData2String(size32_t & __ret_len,char * & __ret_str,unsigned _len_y, const void * y)
  584. {
  585. char *out = (char *)CTXMALLOC(parentCtx, _len_y * 2);
  586. char *res = out;
  587. unsigned char *yy = (unsigned char *) y;
  588. for (unsigned int i = 0; i < _len_y; i++)
  589. {
  590. *out++ = hexchar[yy[i] >> 4];
  591. *out++ = hexchar[yy[i] & 0x0f];
  592. }
  593. __ret_len = _len_y * 2;
  594. __ret_str = res;
  595. }
  596. STRINGLIB_API void STRINGLIB_CALL slString2Data(size32_t & __ret_len,void * & __ret_str,unsigned _len_src,const char * src)
  597. {
  598. // trailing nibbles are ignored
  599. // embedded spaces are ignored
  600. // illegal hex values are treated as zero
  601. // we could do a stricter one if it was considered desirable.
  602. char *out = (char *)CTXMALLOC(parentCtx, _len_src / 2);
  603. char *target = out;
  604. for (;;)
  605. {
  606. while (_len_src > 1 && isspace(*src))
  607. {
  608. src++;
  609. _len_src--;
  610. }
  611. if (_len_src < 2)
  612. break;
  613. *target++ = (hex2digit(src[0]) << 4) | hex2digit(src[1]);
  614. _len_src -= 2;
  615. src += 2;
  616. }
  617. __ret_len = (size32_t)(target - out);
  618. __ret_str = out;
  619. }
  620. // -----------------------------------------------------------------
  621. STRINGLIB_API void STRINGLIB_CALL slStringToLowerCase(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src)
  622. {
  623. char * res = (char *)CTXMALLOC(parentCtx, srcLen);
  624. for (unsigned int i=0;i<srcLen;i++)
  625. res[i] = tolower(src[i]);
  626. tgt = res;
  627. tgtLen = srcLen;
  628. }
  629. // -----------------------------------------------------------------
  630. STRINGLIB_API void STRINGLIB_CALL slStringToUpperCase(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src)
  631. {
  632. char * res = (char *)CTXMALLOC(parentCtx, srcLen);
  633. for (unsigned int i=0;i<srcLen;i++)
  634. res[i] = toupper(src[i]);
  635. tgt = res;
  636. tgtLen = srcLen;
  637. }
  638. // -----------------------------------------------------------------
  639. STRINGLIB_API void STRINGLIB_CALL slStringToProperCase(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src)
  640. {
  641. tgt = (char *)CTXMALLOC(parentCtx, srcLen);
  642. char * res = tgt;
  643. bool seenSpace = true;
  644. for (unsigned int i=0;i<srcLen;i++)
  645. {
  646. char c = src[i];
  647. *tgt++ = seenSpace ? toupper(c) : c;
  648. seenSpace = (c==' ');
  649. }
  650. tgt = res;
  651. tgtLen = srcLen;
  652. }
  653. // -----------------------------------------------------------------
  654. STRINGLIB_API void STRINGLIB_CALL slStringToCapitalCase(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src)
  655. {
  656. char * const result = (char *)CTXMALLOC(parentCtx, srcLen);
  657. bool upperPending = true;
  658. for (unsigned int i=0;i<srcLen;i++)
  659. {
  660. byte c = src[i];
  661. result[i] = upperPending ? toupper(c) : c;
  662. upperPending = !isalnum(c);
  663. }
  664. tgt = result;
  665. tgtLen = srcLen;
  666. }
  667. // -----------------------------------------------------------------
  668. STRINGLIB_API void STRINGLIB_CALL slStringToTitleCase(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src)
  669. {
  670. char * const result = (char *)CTXMALLOC(parentCtx, srcLen);
  671. bool upperPending = true;
  672. for (unsigned int i=0;i<srcLen;i++)
  673. {
  674. byte c = src[i];
  675. result[i] = upperPending ? toupper(c) : tolower(c);
  676. upperPending = !isalnum(c);
  677. }
  678. tgt = result;
  679. tgtLen = srcLen;
  680. }
  681. // -----------------------------------------------------------------
  682. STRINGLIB_API int STRINGLIB_CALL slStringCompareIgnoreCase (unsigned src1Len, const char * src1, unsigned src2Len, const char * src2)
  683. {
  684. unsigned int i;
  685. for (i=0;i < src1Len && i < src2Len;i++)
  686. {
  687. byte lc = src1[i];
  688. byte rc = src2[i];
  689. if (lc != rc)
  690. {
  691. lc = tolower(lc);
  692. rc = tolower(rc);
  693. if (lc != rc)
  694. return lc > rc ? 1 : -1;
  695. }
  696. }
  697. while (i < src1Len)
  698. {
  699. if (src1[i++] != ' ')
  700. return 1;
  701. }
  702. while (i < src2Len)
  703. {
  704. if (src2[i++] != ' ')
  705. return -1;
  706. }
  707. return 0;
  708. }
  709. // -----------------------------------------------------------------
  710. STRINGLIB_API void STRINGLIB_CALL slStringReverse (unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src)
  711. {
  712. char * res = (char *)CTXMALLOC(parentCtx, srcLen);
  713. unsigned int n = srcLen - 1;
  714. for (unsigned int i=0;i<srcLen;i++)
  715. res[i] = src[n-i];
  716. tgt = res;
  717. tgtLen = srcLen;
  718. }
  719. // -----------------------------------------------------------------
  720. STRINGLIB_API void STRINGLIB_CALL slStringFindReplace (unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned stokLen, const char * stok, unsigned rtokLen, const char * rtok)
  721. {
  722. if ( srcLen < stokLen || stokLen == 0)
  723. {
  724. tgt = (char *) CTXMALLOC(parentCtx, srcLen);
  725. memcpy(tgt, src, srcLen);
  726. tgtLen = srcLen;
  727. }
  728. else
  729. {
  730. unsigned steps = srcLen-stokLen+1;
  731. unsigned tgtmax = rtokLen > stokLen ? srcLen + steps * (rtokLen - stokLen) : srcLen;
  732. // This is the upper limit on target size - not a problem if we allocate a bit too much
  733. char * res = (char *)CTXMALLOC(parentCtx, tgtmax);
  734. tgt = res;
  735. unsigned i;
  736. for ( i = 0; i < steps; )
  737. {
  738. if ( !memcmp(src+i,stok,stokLen) )
  739. {
  740. memcpy(res, rtok, rtokLen);
  741. res += rtokLen;
  742. i += stokLen;
  743. }
  744. else
  745. *res++ = src[i++];
  746. }
  747. while (i <srcLen)
  748. *res++ = src[i++];
  749. tgtLen = (size32_t)(res - tgt);
  750. }
  751. }
  752. // -----------------------------------------------------------------
  753. STRINGLIB_API void STRINGLIB_CALL slStringCleanSpaces(size32_t & __ret_len,char * & __ret_str,unsigned _len_instr,const char * instr)
  754. {
  755. // remove double spaces
  756. char *out = (char *) CTXMALLOC(parentCtx, _len_instr);
  757. char *origout = out;
  758. bool spacePending = false;
  759. bool atStart = true;
  760. for(unsigned idx = 0; idx < _len_instr; idx++)
  761. {
  762. char c = *instr++;
  763. switch (c)
  764. {
  765. case ' ':
  766. case '\t':
  767. spacePending = true;
  768. break;
  769. default:
  770. if (spacePending && !atStart)
  771. *out++ = ' ';
  772. spacePending = false;
  773. atStart = false;
  774. *out++ = c;
  775. break;
  776. }
  777. }
  778. __ret_str = origout;
  779. __ret_len = (size32_t)(out - origout);
  780. }
  781. STRINGLIB_API bool STRINGLIB_CALL slStringWildMatch(unsigned srcLen, const char * src, unsigned patLen, const char * pat, bool noCase)
  782. {
  783. return wildTrimMatch<char, char_toupper, '?', '*', ' '>(src, srcLen, pat, patLen, noCase);
  784. }
  785. STRINGLIB_API bool STRINGLIB_CALL slStringWildExactMatch(unsigned srcLen, const char * src, unsigned patLen, const char * pat, bool noCase)
  786. {
  787. return wildMatch<char, char_toupper, '?', '*'>(src, srcLen, pat, patLen, noCase);
  788. }
  789. STRINGLIB_API bool STRINGLIB_CALL slStringContains(unsigned srcLen, const char * src, unsigned patLen, const char * pat, bool noCase)
  790. {
  791. unsigned char srcCount[256];
  792. memset(srcCount, 0, 256);
  793. while (srcLen && src[srcLen-1]==' ')
  794. srcLen--;
  795. while(srcLen-- > 0)
  796. {
  797. byte c = *src++;
  798. if (noCase)
  799. c = toupper(c);
  800. srcCount[c]++;
  801. }
  802. while (patLen && pat[patLen-1]==' ')
  803. patLen--;
  804. while(patLen-- > 0)
  805. {
  806. byte c = *pat++;
  807. if (noCase)
  808. c = toupper(c);
  809. if (srcCount[c] == 0)
  810. return false;
  811. else
  812. srcCount[c]--;
  813. }
  814. return true;
  815. }
  816. STRINGLIB_API unsigned STRINGLIB_CALL slEditDistanceV2(unsigned leftLen, const char * left, unsigned rightLen, const char * right)
  817. {
  818. return nsStringlib::editDistance(leftLen, left, rightLen, right);
  819. }
  820. STRINGLIB_API bool STRINGLIB_CALL slEditDistanceWithinRadiusV2(unsigned leftLen, const char * left, unsigned rightLen, const char * right, unsigned radius)
  821. {
  822. return nsStringlib::editDistanceWithinRadius(leftLen, left, rightLen, right, radius) <= radius;
  823. }
  824. inline bool isWordSeparator(char x)
  825. {
  826. return (unsigned char)x <= 0x20;
  827. }
  828. STRINGLIB_API void STRINGLIB_CALL slStringGetNthWord(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned n)
  829. {
  830. const char* start = 0;
  831. const char* end = 0;
  832. // skip any leading white space
  833. while (srcLen>0 && isWordSeparator(*src)) {
  834. src++;
  835. srcLen--;
  836. }
  837. while (srcLen>0 && n>0) {
  838. start = src;
  839. n--;
  840. // go to the next white space
  841. while (srcLen>0 && !isWordSeparator(*src)) {
  842. src++;
  843. srcLen--;
  844. }
  845. end = src;
  846. // skip white space again
  847. while (srcLen>0 && isWordSeparator(*src)) {
  848. src++;
  849. srcLen--;
  850. }
  851. }
  852. if (!n && (end-start)) {
  853. tgt = (char *)CTXMALLOC(parentCtx, end-start);
  854. memcpy(tgt,start,end-start);
  855. tgtLen = end-start;
  856. } else {
  857. tgt = 0;
  858. tgtLen = 0;
  859. }
  860. }
  861. STRINGLIB_API void STRINGLIB_CALL slStringRepeat(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned n)
  862. {
  863. char * buffer = NULL;
  864. if (n == 0 || (srcLen == 0))
  865. {
  866. tgtLen = 0;
  867. }
  868. else
  869. {
  870. tgtLen = srcLen*n;
  871. buffer = (char *)CTXMALLOC(parentCtx, tgtLen);
  872. if (srcLen == 1)
  873. {
  874. memset(buffer, *src, n);
  875. }
  876. else
  877. {
  878. for (unsigned i = 0; i < n; ++i)
  879. {
  880. memcpy(buffer + i*srcLen, src, srcLen);
  881. }
  882. }
  883. }
  884. tgt = buffer;
  885. }
  886. STRINGLIB_API unsigned STRINGLIB_CALL slStringWordCount(unsigned srcLen,const char * src)
  887. {
  888. // skip any leading white space
  889. unsigned word_count = 0;
  890. while (srcLen>0 && isWordSeparator(*src)) {
  891. src++;
  892. srcLen--;
  893. }
  894. while (srcLen>0) {
  895. word_count++;
  896. // go to the next white space
  897. while (srcLen>0 && !isWordSeparator(*src)) {
  898. src++;
  899. srcLen--;
  900. }
  901. // skip white space again
  902. while (srcLen>0 && isWordSeparator(*src)) {
  903. src++;
  904. srcLen--;
  905. }
  906. }
  907. return word_count;
  908. }
  909. STRINGLIB_API void STRINGLIB_CALL slStringExcludeLastWord(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src)
  910. {
  911. //Remove first word also removes leading whitespace, otherwise just remove trailing whitespace
  912. unsigned idx = 0;
  913. unsigned startLast = 0;
  914. while (idx < srcLen && isWordSeparator(src[idx]))
  915. idx++;
  916. for (;;)
  917. {
  918. while (idx < srcLen && !isWordSeparator(src[idx]))
  919. idx++;
  920. while (idx < srcLen && isWordSeparator(src[idx]))
  921. idx++;
  922. if (idx == srcLen)
  923. break;
  924. startLast = idx;
  925. }
  926. unsigned len = startLast;
  927. tgtLen = len;
  928. if (len)
  929. {
  930. tgt = (char *)CTXMALLOC(parentCtx, len);
  931. memcpy(tgt,src,len);
  932. }
  933. else
  934. tgt = NULL;
  935. }
  936. STRINGLIB_API void STRINGLIB_CALL slStringExcludeNthWord(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned n)
  937. {
  938. unsigned idx = 0;
  939. unsigned startLast = 0;
  940. while (idx < srcLen && isWordSeparator(src[idx]))
  941. idx++;
  942. unsigned matchIndex = 0;
  943. //Remove first word also removes leading whitespace, otherwise just remove trailing whitespace
  944. //No matching words returns a blank string
  945. if (idx != srcLen)
  946. {
  947. for (;;)
  948. {
  949. while (idx < srcLen && !isWordSeparator(src[idx]))
  950. idx++;
  951. while (idx < srcLen && isWordSeparator(src[idx]))
  952. idx++;
  953. if (++matchIndex == n)
  954. break;
  955. startLast = idx;
  956. if (idx == srcLen)
  957. break;
  958. }
  959. }
  960. unsigned len = startLast + (srcLen - idx);
  961. tgtLen = len;
  962. if (len)
  963. {
  964. tgt = (char *)CTXMALLOC(parentCtx, len);
  965. memcpy(tgt,src,startLast);
  966. memcpy(tgt+startLast,src+idx,(srcLen - idx));
  967. }
  968. else
  969. tgt = NULL;
  970. }
  971. //--------------------------------------------------------------------------------------------------------------------
  972. STRINGLIB_API unsigned STRINGLIB_CALL slCountWords(size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems)
  973. {
  974. if (lenSrc == 0)
  975. return 0;
  976. if ((lenSeparator == 0) || (lenSrc < lenSeparator))
  977. return 1;
  978. unsigned numWords=0;
  979. const char * end = src + lenSrc;
  980. const char * max = end - (lenSeparator - 1);
  981. const char * cur = src;
  982. const char * startWord = NULL;
  983. //MORE: optimize lenSeparator == 1!
  984. while (cur < max)
  985. {
  986. if (memcmp(cur, separator, lenSeparator) == 0)
  987. {
  988. if (startWord || allowBlankItems)
  989. {
  990. numWords++;
  991. startWord = NULL;
  992. }
  993. cur += lenSeparator;
  994. }
  995. else
  996. {
  997. if (!startWord)
  998. startWord = cur;
  999. cur++;
  1000. }
  1001. }
  1002. if (startWord || (cur != end) || allowBlankItems)
  1003. numWords++;
  1004. return numWords;
  1005. }
  1006. static unsigned calcWordSetSize(size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems)
  1007. {
  1008. if (lenSrc == 0)
  1009. return 0;
  1010. if ((lenSeparator == 0) || (lenSrc < lenSeparator))
  1011. return sizeof(size32_t) + lenSrc;
  1012. unsigned sizeWords=0;
  1013. const char * end = src + lenSrc;
  1014. const char * max = end - (lenSeparator - 1);
  1015. const char * cur = src;
  1016. const char * startWord = NULL;
  1017. //MORE: optimize lenSeparator == 1!
  1018. while (cur < max)
  1019. {
  1020. if (memcmp(cur, separator, lenSeparator) == 0)
  1021. {
  1022. if (startWord)
  1023. {
  1024. sizeWords += sizeof(size32_t) + (cur - startWord);
  1025. startWord = NULL;
  1026. }
  1027. else if (allowBlankItems)
  1028. sizeWords += sizeof(size32_t);
  1029. cur += lenSeparator;
  1030. }
  1031. else
  1032. {
  1033. if (!startWord)
  1034. startWord = cur;
  1035. cur++;
  1036. }
  1037. }
  1038. if (startWord || (cur != end) || allowBlankItems)
  1039. {
  1040. if (!startWord)
  1041. startWord = cur;
  1042. sizeWords += sizeof(size32_t) + (end - startWord);
  1043. }
  1044. return sizeWords;
  1045. }
  1046. STRINGLIB_API void STRINGLIB_CALL slSplitWords(bool & __isAllResult, size32_t & __lenResult, void * & __result, size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems)
  1047. {
  1048. unsigned sizeRequired = calcWordSetSize(lenSrc, src, lenSeparator, separator, allowBlankItems);
  1049. char * const result = static_cast<char *>(CTXMALLOC(parentCtx, sizeRequired));
  1050. __isAllResult = false;
  1051. __lenResult = sizeRequired;
  1052. __result = result;
  1053. if (lenSrc == 0)
  1054. return;
  1055. if ((lenSeparator == 0) || (lenSrc < lenSeparator))
  1056. {
  1057. *((size32_t *)result) = lenSrc;
  1058. memcpy(result+sizeof(size32_t), src, lenSrc);
  1059. return;
  1060. }
  1061. unsigned sizeWords=0;
  1062. char * target = result;
  1063. const char * end = src + lenSrc;
  1064. const char * max = end - (lenSeparator - 1);
  1065. const char * cur = src;
  1066. const char * startWord = NULL;
  1067. //MORE: optimize lenSeparator == 1!
  1068. while (cur < max)
  1069. {
  1070. if (memcmp(cur, separator, lenSeparator) == 0)
  1071. {
  1072. if (startWord || allowBlankItems)
  1073. {
  1074. size32_t len = startWord ? (cur - startWord) : 0;
  1075. memcpy(target, &len, sizeof(len));
  1076. memcpy(target+sizeof(size32_t), startWord, len);
  1077. target += sizeof(size32_t) + len;
  1078. startWord = NULL;
  1079. }
  1080. cur += lenSeparator;
  1081. }
  1082. else
  1083. {
  1084. if (!startWord)
  1085. startWord = cur;
  1086. cur++;
  1087. }
  1088. }
  1089. if (startWord || (cur != end) || allowBlankItems)
  1090. {
  1091. if (!startWord)
  1092. startWord = cur;
  1093. size32_t len = (end - startWord);
  1094. memcpy(target, &len, sizeof(len));
  1095. memcpy(target+sizeof(size32_t), startWord, len);
  1096. target += sizeof(size32_t) + len;
  1097. }
  1098. assert(target == result + sizeRequired);
  1099. // ctx->fail(1, "Size mismatch in StringLib.SplitWords");
  1100. }
  1101. static unsigned countWords(size32_t lenSrc, const char * src)
  1102. {
  1103. unsigned count = 0;
  1104. unsigned offset = 0;
  1105. while (offset < lenSrc)
  1106. {
  1107. size32_t len;
  1108. memcpy(&len, src+offset, sizeof(len));
  1109. offset += sizeof(len) + len;
  1110. count++;
  1111. }
  1112. return count;
  1113. }
  1114. STRINGLIB_API void STRINGLIB_CALL slCombineWords(size32_t & __lenResult, void * & __result, bool isAllSrc, size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems)
  1115. {
  1116. if (lenSrc == 0)
  1117. {
  1118. __lenResult = 0;
  1119. __result = NULL;
  1120. return;
  1121. }
  1122. unsigned numWords = countWords(lenSrc, src);
  1123. size32_t sizeRequired = lenSrc - numWords * sizeof(size32_t) + (numWords-1) * lenSeparator;
  1124. char * const result = static_cast<char *>(CTXMALLOC(parentCtx, sizeRequired));
  1125. __lenResult = sizeRequired;
  1126. __result = result;
  1127. char * target = result;
  1128. unsigned offset = 0;
  1129. while (offset < lenSrc)
  1130. {
  1131. if ((offset != 0) && lenSeparator)
  1132. {
  1133. memcpy(target, separator, lenSeparator);
  1134. target += lenSeparator;
  1135. }
  1136. size32_t len;
  1137. memcpy(&len, src+offset, sizeof(len));
  1138. offset += sizeof(len);
  1139. memcpy(target, src+offset, len);
  1140. target += len;
  1141. offset += len;
  1142. }
  1143. assert(target == result + sizeRequired);
  1144. }
  1145. //--------------------------------------------------------------------------------------------------------------------
  1146. inline bool readValue(unsigned & value, size32_t & _offset, size32_t lenStr, const char * str, unsigned max)
  1147. {
  1148. unsigned total = 0;
  1149. unsigned offset = _offset;
  1150. if (lenStr - offset < max)
  1151. max = lenStr - offset;
  1152. unsigned i=0;
  1153. for (; i < max; i++)
  1154. {
  1155. char next = str[offset+i];
  1156. if (next >= '0' && next <= '9')
  1157. total = total * 10 + (next - '0');
  1158. else
  1159. break;
  1160. }
  1161. if (i == 0)
  1162. return false;
  1163. value = total;
  1164. _offset = offset+i;
  1165. return true;
  1166. }
  1167. const char * const monthNames[12] = { "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" };
  1168. inline bool matchString(unsigned & value, size32_t & strOffset, size32_t lenStr, const byte * str, unsigned num, const char * const * strings, unsigned minMatch)
  1169. {
  1170. unsigned startOffset = strOffset;
  1171. for (unsigned i =0; i < num; i++)
  1172. {
  1173. const char * cur = strings[i];
  1174. unsigned offset = startOffset;
  1175. while (offset < lenStr)
  1176. {
  1177. byte next = *cur++;
  1178. if (!next || toupper(next) != toupper(str[offset]))
  1179. break;
  1180. offset++;
  1181. }
  1182. if (offset - startOffset >= minMatch)
  1183. {
  1184. value = i;
  1185. strOffset = offset;
  1186. return true;
  1187. }
  1188. }
  1189. return false;
  1190. }
  1191. //This implements a subset of the specifiers allowed for strptime
  1192. //Another difference is it works on a string with a separate length
  1193. static const char * simple_strptime(size32_t lenStr, const char * str, const char * format, struct tm * tm)
  1194. {
  1195. const char * curFormat = format;
  1196. size32_t offset = 0;
  1197. const byte * src = (const byte *)str;
  1198. unsigned value;
  1199. byte next;
  1200. while ((next = *curFormat++) != '\0')
  1201. {
  1202. if (next == '%')
  1203. {
  1204. switch (*curFormat++)
  1205. {
  1206. case 't':
  1207. while ((offset < lenStr) && isspace(src[offset]))
  1208. offset++;
  1209. break;
  1210. case 'Y':
  1211. if (!readValue(value, offset, lenStr, str, 4))
  1212. return NULL;
  1213. tm->tm_year = value-1900;
  1214. break;
  1215. case 'y':
  1216. if (!readValue(value, offset, lenStr, str, 2))
  1217. return NULL;
  1218. tm->tm_year = value > 68 ? value : value + 100;
  1219. break;
  1220. case 'm':
  1221. if (!readValue(value, offset, lenStr, str, 2) || (value < 1) || (value > 12))
  1222. return NULL;
  1223. tm->tm_mon = value-1;
  1224. break;
  1225. case 'd':
  1226. if (!readValue(value, offset, lenStr, str, 2) || (value < 1) || (value > 31))
  1227. return NULL;
  1228. tm->tm_mday = value;
  1229. break;
  1230. case 'b':
  1231. case 'B':
  1232. case 'h':
  1233. if (!matchString(value, offset, lenStr, src, sizeof(monthNames)/sizeof(*monthNames), monthNames, 3))
  1234. return NULL;
  1235. tm->tm_mon = value;
  1236. break;
  1237. case 'H':
  1238. if (!readValue(value, offset, lenStr, str, 2)|| (value > 24))
  1239. return NULL;
  1240. tm->tm_hour = value;
  1241. break;
  1242. case 'M':
  1243. if (!readValue(value, offset, lenStr, str, 2)|| (value > 59))
  1244. return NULL;
  1245. tm->tm_min = value;
  1246. break;
  1247. case 'S':
  1248. if (!readValue(value, offset, lenStr, str, 2)|| (value > 59))
  1249. return NULL;
  1250. tm->tm_sec = value;
  1251. break;
  1252. default:
  1253. return NULL;
  1254. }
  1255. }
  1256. else
  1257. {
  1258. if (isspace(next))
  1259. {
  1260. while ((offset < lenStr) && isspace(src[offset]))
  1261. offset++;
  1262. }
  1263. else
  1264. {
  1265. if ((offset >= lenStr) || (src[offset++] != next))
  1266. return NULL;
  1267. }
  1268. }
  1269. }
  1270. return str+offset;
  1271. }
  1272. inline unsigned makeDate(const tm & tm)
  1273. {
  1274. return (tm.tm_year + 1900) * 10000 + (tm.tm_mon + 1) * 100 + tm.tm_mday;
  1275. }
  1276. inline void extractDate(tm & tm, unsigned date)
  1277. {
  1278. tm.tm_year = (date / 10000) - 1900;
  1279. tm.tm_mon = ((date / 100) % 100) - 1;
  1280. tm.tm_mday = (date % 100);
  1281. }
  1282. STRINGLIB_API unsigned STRINGLIB_CALL slStringToDate(size32_t lenS, const char * s, const char * fmtin)
  1283. {
  1284. struct tm tm;
  1285. memset(&tm, 0, sizeof(tm));
  1286. if (simple_strptime(lenS, s, fmtin, &tm))
  1287. return makeDate(tm);
  1288. return 0;
  1289. }
  1290. STRINGLIB_API unsigned STRINGLIB_CALL slMatchDate(size32_t lenS, const char * s, bool isAllFormats, unsigned lenFormats, const void * _formats)
  1291. {
  1292. struct tm tm;
  1293. memset(&tm, 0, sizeof(tm));
  1294. const char * formats = (const char *)_formats;
  1295. for (unsigned off=0; off < lenFormats; )
  1296. {
  1297. const char * curFormat = formats+off;
  1298. if (simple_strptime(lenS, s, curFormat, &tm))
  1299. return makeDate(tm);
  1300. off += strlen(curFormat) + 1;
  1301. }
  1302. return 0;
  1303. }
  1304. STRINGLIB_API void STRINGLIB_CALL slFormatDate(size32_t & __lenResult, char * & __result, unsigned date, const char * format)
  1305. {
  1306. size32_t len = 0;
  1307. char * out = NULL;
  1308. if (date)
  1309. {
  1310. struct tm tm;
  1311. memset(&tm, 0, sizeof(tm));
  1312. extractDate(tm, date);
  1313. char buf[255];
  1314. strftime(buf, sizeof(buf), format, &tm);
  1315. len = strlen(buf);
  1316. out = static_cast<char *>(CTXMALLOC(parentCtx, len));
  1317. memcpy(out, buf, len);
  1318. }
  1319. __lenResult = len;
  1320. __result = out;
  1321. }
  1322. //--------------------------------------------------------------------------------------------------------------------
  1323. //--------------------------------------------------------------------------------------------------------------------
  1324. //--------------------------------------------------------------------------------------------------------------------
  1325. // Legacy functions that only work on fixed length strings
  1326. //--------------------------------------------------------------------------------------------------------------------
  1327. //--------------------------------------------------------------------------------------------------------------------
  1328. //--------------------------------------------------------------------------------------------------------------------
  1329. STRINGLIB_API void STRINGLIB_CALL slStringExtract50(char *tgt, unsigned srcLen, const char * src, unsigned instance)
  1330. {
  1331. unsigned lenret;
  1332. char * resret;
  1333. slStringExtract(lenret,resret,srcLen,src,instance);
  1334. if (lenret >= 50)
  1335. memcpy(tgt,resret,50);
  1336. else
  1337. {
  1338. memcpy(tgt,resret,lenret);
  1339. memset(tgt+lenret,' ',50-lenret);
  1340. }
  1341. CTXFREE(parentCtx, resret);
  1342. }
  1343. STRINGLIB_API void STRINGLIB_CALL slGetBuildInfo100(char *tgt)
  1344. {
  1345. size32_t len = (size32_t) strlen(STRINGLIB_VERSION);
  1346. if (len >= 100)
  1347. len = 100;
  1348. memcpy(tgt, STRINGLIB_VERSION, len);
  1349. memset(tgt+len, ' ', 100-len);
  1350. }
  1351. // -----------------------------------------------------------------
  1352. STRINGLIB_API void STRINGLIB_CALL slStringToLowerCase80(char *tgt, unsigned srcLen, const char * src)
  1353. {
  1354. unsigned int i;
  1355. for (i=0;i<srcLen && i < 80;i++)
  1356. *tgt++ = tolower(src[i]);
  1357. while (i < 80)
  1358. {
  1359. *tgt++=' ';
  1360. i++;
  1361. }
  1362. }
  1363. // -----------------------------------------------------------------
  1364. STRINGLIB_API void STRINGLIB_CALL slStringToUpperCase80(char *tgt, unsigned srcLen, const char * src)
  1365. {
  1366. unsigned int i;
  1367. for (i=0;i<srcLen && i < 80;i++)
  1368. *tgt++ = toupper(src[i]);
  1369. while (i < 80)
  1370. {
  1371. *tgt++=' ';
  1372. i++;
  1373. }
  1374. }
  1375. // -----------------------------------------------------------------
  1376. STRINGLIB_API void STRINGLIB_CALL slStringFindReplace80(char * tgt, unsigned srcLen, const char * src, unsigned stokLen, const char * stok, unsigned rtokLen, const char * rtok)
  1377. {
  1378. if ( srcLen < stokLen )
  1379. {
  1380. if (srcLen > 80)
  1381. srcLen = 80;
  1382. memcpy(tgt, src, srcLen);
  1383. if (srcLen < 80)
  1384. memset(tgt+srcLen, ' ', 80 - srcLen);
  1385. }
  1386. else
  1387. {
  1388. unsigned steps = srcLen-stokLen+1;
  1389. unsigned i;
  1390. unsigned lim = 80;
  1391. for ( i = 0; i < steps && lim > 0; )
  1392. {
  1393. if ( !memcmp(src+i,stok,stokLen) )
  1394. {
  1395. if (rtokLen > lim)
  1396. rtokLen = lim;
  1397. memcpy(tgt, rtok, rtokLen);
  1398. tgt += rtokLen;
  1399. i += stokLen;
  1400. lim -= rtokLen;
  1401. }
  1402. else
  1403. {
  1404. *tgt++ = src[i++];
  1405. lim--;
  1406. }
  1407. }
  1408. while (i < srcLen && lim > 0)
  1409. {
  1410. *tgt++ = src[i++];
  1411. lim--;
  1412. }
  1413. if (lim)
  1414. memset(tgt, ' ', lim);
  1415. }
  1416. }
  1417. STRINGLIB_API void STRINGLIB_CALL slStringCleanSpaces25(char *__ret_str,unsigned _len_instr,const char * instr)
  1418. {
  1419. // remove double spaces
  1420. // Fixed width version for Hole
  1421. unsigned outlen = _len_instr;
  1422. if (outlen < 25)
  1423. outlen = 25;
  1424. char *out = (char *) alloca(outlen);
  1425. char *origout = out;
  1426. bool spacePending = false;
  1427. bool atStart = true;
  1428. for(unsigned idx = 0; idx < _len_instr; idx++)
  1429. {
  1430. char c = *instr++;
  1431. switch (c)
  1432. {
  1433. case ' ':
  1434. case '\t':
  1435. spacePending = true;
  1436. break;
  1437. default:
  1438. if (spacePending && !atStart)
  1439. *out++ = ' ';
  1440. spacePending = false;
  1441. atStart = false;
  1442. *out++ = c;
  1443. break;
  1444. }
  1445. }
  1446. unsigned len = (size32_t)(out-origout);
  1447. if (len < 25)
  1448. memset(out, ' ', 25 - len);
  1449. memcpy(__ret_str, origout, 25);
  1450. }
  1451. STRINGLIB_API void STRINGLIB_CALL slStringCleanSpaces80(char *__ret_str,unsigned _len_instr,const char * instr)
  1452. {
  1453. // remove double spaces
  1454. // Another fixed width version for Hole
  1455. unsigned outlen = _len_instr;
  1456. if (outlen < 80)
  1457. outlen = 80;
  1458. char *out = (char *) alloca(outlen);
  1459. char *origout = out;
  1460. bool spacePending = false;
  1461. bool atStart = true;
  1462. for(unsigned idx = 0; idx < _len_instr; idx++)
  1463. {
  1464. char c = *instr++;
  1465. switch (c)
  1466. {
  1467. case ' ':
  1468. case '\t':
  1469. spacePending = true;
  1470. break;
  1471. default:
  1472. if (spacePending && !atStart)
  1473. *out++ = ' ';
  1474. spacePending = false;
  1475. atStart = false;
  1476. *out++ = c;
  1477. break;
  1478. }
  1479. }
  1480. unsigned len = (unsigned)(out-origout);
  1481. if (len < 80)
  1482. memset(out, ' ', 80 - len);
  1483. memcpy(__ret_str, origout, 80);
  1484. }