genht.cpp 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "platform.h"
  14. #include "jlib.hpp"
  15. #include "jlzw.hpp"
  16. #include "jfile.hpp"
  17. #include "jlog.hpp"
  18. #include "jstring.hpp"
  19. #include "jbuff.hpp"
  20. static void outECLheader(const char *eclmodname)
  21. {
  22. printf("export Module_%s := MODULE\n",eclmodname);
  23. printf("export UNSIGNED4 Find(STRING key) := BEGINC++\n");
  24. printf("#option pure\n");
  25. printf("#define HASHONE(hash, c) { hash *= 0x01000193; hash ^= c; } \n");
  26. printf("static inline unsigned hashc( const unsigned char *k, unsigned length, unsigned initval)\n");
  27. printf("{\n");
  28. printf(" unsigned hash = initval;\n");
  29. printf(" unsigned char c;\n");
  30. printf(" while (length >= 8) {\n");
  31. printf(" c = (*k++); HASHONE(hash, c);\n");
  32. printf(" c = (*k++); HASHONE(hash, c);\n");
  33. printf(" c = (*k++); HASHONE(hash, c);\n");
  34. printf(" c = (*k++); HASHONE(hash, c);\n");
  35. printf(" length-=4;\n");
  36. printf(" }\n");
  37. printf(" switch (length) {\n");
  38. printf(" case 7: c = (*k++); HASHONE(hash, c);\n");
  39. printf(" case 6: c = (*k++); HASHONE(hash, c);\n");
  40. printf(" case 5: c = (*k++); HASHONE(hash, c);\n");
  41. printf(" case 4: c = (*k++); HASHONE(hash, c);\n");
  42. printf(" case 3: c = (*k++); HASHONE(hash, c);\n");
  43. printf(" case 2: c = (*k++); HASHONE(hash, c);\n");
  44. printf(" case 1: c = (*k++); HASHONE(hash, c);\n");
  45. printf(" }\n");
  46. printf(" return hash;\n");
  47. printf("}\n");
  48. printf("#undef HASHONE\n");
  49. }
  50. static void outECLbody(const char *eclmodname)
  51. {
  52. printf(";\n");
  53. printf("#body\n");
  54. printf(" if (lenKey) {\n");
  55. printf(" unsigned h = hashc((const byte *)key,lenKey,lenKey)%%HASHTABLESIZE;\n");
  56. printf(" const byte *e = (const byte *)key+lenKey;\n");
  57. printf(" while (1) {\n");
  58. printf(" unsigned i = HashTab[h];\n");
  59. printf(" if (i==0) \n");
  60. printf(" break;\n");
  61. printf(" byte *p = _%s_StrData+i;\n",eclmodname);
  62. printf(" if (*p==lenKey) {\n");
  63. printf(" p++;\n");
  64. printf(" const byte *n=(byte *)key;\n");
  65. printf(" while (*p==*n) {\n");
  66. printf(" p++;\n");
  67. printf(" n++;\n");
  68. printf(" if (n==e)\n");
  69. printf(" return *(unsigned short *)p;\n");
  70. printf(" }\n");
  71. printf(" }\n");
  72. printf(" h++;\n");
  73. printf(" if (h==HASHTABLESIZE)\n");
  74. printf(" h = 0;\n");
  75. printf(" }\n");
  76. printf(" }\n");
  77. printf(" return 0;\n");
  78. printf("#undef HASHTABLESIZE\n");
  79. printf("ENDC++;\n");
  80. printf("STRING _Match(UNSIGNED idx) := BEGINC++\n");
  81. printf("#option pure\n");
  82. printf("extern byte _%s_StrData[];\n",eclmodname);
  83. printf("extern unsigned short _%s_Matches[];\n",eclmodname);
  84. printf("extern unsigned _%s_StrIdx[];\n",eclmodname);
  85. printf(";\n");
  86. printf("#body\n");
  87. printf(" const byte *r = _%s_StrData+_%s_StrIdx[_%s_Matches[idx]];\n",eclmodname,eclmodname,eclmodname); // could do better
  88. printf(" __lenResult = *(r++);\n");
  89. printf(" __result = (char *)rtlMalloc(__lenResult);\n");
  90. printf(" memcpy(__result,r,__lenResult);\n");
  91. printf("ENDC++;\n");
  92. printf("export STRING Match(STRING key) := FUNCTION return _Match(Find(key)); END;\n");
  93. printf("UNSIGNED4 _Value(UNSIGNED idx) := BEGINC++\n");
  94. printf("#option pure\n");
  95. printf("extern byte _%s_StrData[];\n",eclmodname);
  96. printf("extern unsigned short _%s_Values[];\n",eclmodname);
  97. printf("extern unsigned _%s_StrIdx[];\n",eclmodname);
  98. printf(";\n");
  99. printf("#body\n");
  100. printf(" return _%s_Values[idx];\n",eclmodname);
  101. printf("ENDC++;\n");
  102. printf("export UNSIGNED4 Value(STRING key) := FUNCTION return _Value(Find(key)); END;\n");
  103. printf("END;\n");
  104. }
  105. void process(const char *fname)
  106. {
  107. MemoryBuffer mb;
  108. UnsignedArray matches;
  109. UnsignedArray values;
  110. FILE *inFile = fopen(fname, "r" TEXT_TRANS);
  111. if (!inFile) {
  112. printf("ERROR: Cannot open '%s'\n",fname);
  113. exit(1);
  114. }
  115. char eclmodname[256];
  116. bool valuesshort = true;
  117. strcpy(eclmodname,"UNKNOWN");
  118. char ln[1024];
  119. bool gotheader=false;
  120. unsigned count = 0;
  121. unsigned lastres = (unsigned)-1;
  122. unsigned lastpos = 0;
  123. while (fgets(ln,sizeof(ln),inFile)) {
  124. // format { NN,"SSS" }
  125. const char *s = ln;
  126. while (*s&&isspace(*s))
  127. s++;
  128. if (*s=='{') {
  129. s++;
  130. while (*s&&isspace(*s))
  131. s++;
  132. unsigned res = 0;
  133. while (*s&&isdigit(*s)) {
  134. res = 10*res+(*s-'0');
  135. s++;
  136. }
  137. values.append(res);
  138. if (res>=0x10000)
  139. valuesshort = false;
  140. if (res!=lastres) {
  141. lastpos = matches.ordinality()+1;
  142. matches.append(lastpos);
  143. lastres = res;
  144. }
  145. else
  146. matches.append(lastpos);
  147. while (*s&&isspace(*s))
  148. s++;
  149. if (*s&&*s==',')
  150. s++;
  151. while (*s&&isspace(*s))
  152. s++;
  153. if (*s&&*s=='"')
  154. s++;
  155. const char *e = s;
  156. while (*e&&(*e!='"'))
  157. e++;
  158. if (e!=s) {
  159. size32_t l = (byte)((e-s>254)?254:(e-s));
  160. mb.append((byte)l).append(l,s);
  161. count++;
  162. }
  163. }
  164. else {
  165. if (memcmp(ln,"TITLE:",6)==0) {
  166. if (ln[6]>' ') {
  167. strcpy(eclmodname,ln+6);
  168. while (eclmodname[strlen(eclmodname)-1]<=' ')
  169. eclmodname[strlen(eclmodname)-1] = 0;
  170. }
  171. }
  172. /* not yet supported
  173. else if (memcmp(ln,"MATCHONLY:",10)==0)
  174. matchonly = ln[10]=='Y';
  175. else if (memcmp(ln,"USETABLE:",9)==0) {
  176. usetable = ln[9]=='Y';
  177. matchonly = true;
  178. }
  179. else if (memcmp(ln,"MAPVALUE:",9)==0)
  180. mapvalue = ln[9]=='Y';
  181. */
  182. gotheader = true;
  183. }
  184. }
  185. fclose(inFile);
  186. assertex(count<0x10000);
  187. unsigned htsize = count*4/2+15;
  188. outECLheader(eclmodname);
  189. printf("#define HASHTABLESIZE %d\n",htsize);
  190. printf("byte _%s_StrData[%d] = {\n",eclmodname,mb.length()+4+count*2);
  191. printf(" 0, 0, 0,\n");
  192. const byte *base = (const byte *)mb.toByteArray();
  193. const byte *p = base;
  194. unsigned *htab=(unsigned *)calloc(htsize,sizeof(unsigned));
  195. unsigned i=0;
  196. unsigned o = 3;
  197. size32_t l = mb.length();
  198. UnsignedArray offsets;
  199. offsets.append(0);
  200. StringBuffer s;
  201. while ((size32_t)(p-base)<l) {
  202. i++;
  203. offsets.append(o);
  204. size32_t sl = *p;
  205. unsigned h = hashc(p+1,sl,sl)%htsize;
  206. for (;;) {
  207. if (htab[h]==0) {
  208. htab[h] = o;
  209. break;
  210. }
  211. h++;
  212. if (h==htsize)
  213. h = 0;
  214. }
  215. s.clear().append(' ');
  216. for (unsigned j=0; j<=sl; j++)
  217. s.appendf(" %3d,",(int)*(p++));
  218. s.appendf(" %3d, %3d,",(i&0xff),(i>>8));
  219. printf("%s\n",s.str());
  220. o+=sl+3;
  221. }
  222. printf(" 0\n};\n");
  223. unsigned n = offsets.ordinality();
  224. printf("unsigned _%s_StrIdx[%d] = {\n",eclmodname,n+1);
  225. assertex(n<0x10000);
  226. for (i=0;i<n;) {
  227. printf(" ");
  228. unsigned ln = (n-i<10)?(n-i):10;
  229. while (ln--)
  230. printf("%5d, ",offsets.item(i++));
  231. printf("\n");
  232. }
  233. printf(" 0\n};\n");
  234. printf("static unsigned HashTab[HASHTABLESIZE+1] = {\n");
  235. for (i=0;i<htsize;) {
  236. printf(" ");
  237. unsigned ln = (htsize-i<10)?(htsize-i):10;
  238. while (ln--)
  239. printf("%7d, ",htab[i++]);
  240. printf("\n");
  241. }
  242. printf(" 0\n};\n");
  243. n = values.ordinality();
  244. printf("unsigned%s _%s_Values[%d] = {\n",valuesshort?" short":"",eclmodname,n+2);
  245. printf(" 0,\n");
  246. for (i=0;i<n;) {
  247. assertex(i<values.ordinality());
  248. printf(" ");
  249. unsigned ln = (n-i<10)?(n-i):10;
  250. while (ln--)
  251. printf("%5d, ",values.item(i++));
  252. printf("\n");
  253. }
  254. printf(" 0\n};\n");
  255. n = matches.ordinality();
  256. printf("unsigned%s _%s_Matches[%d] = {\n",valuesshort?" short":"",eclmodname,n+2);
  257. printf(" 0,\n");
  258. for (i=0;i<n;) {
  259. assertex(i<matches.ordinality());
  260. printf(" ");
  261. unsigned ln = (n-i<10)?(n-i):10;
  262. while (ln--)
  263. printf("%5d, ",matches.item(i++));
  264. printf("\n");
  265. }
  266. printf(" 0\n};\n");
  267. outECLbody(eclmodname);
  268. }
  269. int main(int argc, char* argv[])
  270. {
  271. InitModuleObjects();
  272. EnableSEHtoExceptionMapping();
  273. if (argc<2)
  274. printf("Usage: genht <mst-file>\n");
  275. else
  276. process(argv[1]);
  277. releaseAtoms();
  278. return 0;
  279. }