jhash.hpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #ifndef JHASH_HPP
  14. #define JHASH_HPP
  15. #include "platform.h"
  16. #include <stdio.h>
  17. #include "jiface.hpp"
  18. #include "jobserve.hpp"
  19. #include "jiter.hpp"
  20. #include "jsuperhash.hpp"
  21. #ifndef IHASH_DEFINED // may be defined already elsewhere
  22. #define IHASH_DEFINED
  23. interface IHash
  24. {
  25. virtual unsigned hash(const void *data)=0;
  26. protected:
  27. virtual ~IHash() {}
  28. };
  29. #endif
  30. interface jlib_decl IMapping : extends IObservable
  31. {
  32. public:
  33. virtual const void * getKey() const = 0;
  34. virtual unsigned getHash() const = 0;
  35. virtual void setHash(unsigned) = 0;
  36. };
  37. interface jlib_decl IAtom : extends IMapping
  38. {
  39. public:
  40. virtual const char * queryStr() const = 0;
  41. };
  42. inline jlib_decl const char * str(const IAtom * atom) { return atom ? atom->queryStr() : NULL; }
  43. //This interface represents an atom which preserves its case, but also stores a lower case representation
  44. //for efficient case insensitive comparison.
  45. //It is deliberately NOT derived from IAtom to avoid accidentally using the wrong interface
  46. interface jlib_decl IIdAtom : extends IMapping
  47. {
  48. public:
  49. virtual const char * queryStr() const = 0;
  50. public:
  51. virtual IAtom * queryLower() const = 0;
  52. };
  53. inline jlib_decl const char * str(const IIdAtom * atom) { return atom ? atom->queryStr() : NULL; }
  54. inline jlib_decl IAtom * lower(const IIdAtom * atom) { return atom ? atom->queryLower() : NULL; }
  55. #ifdef _MSC_VER
  56. #pragma warning (push)
  57. #pragma warning( disable : 4275 )
  58. #endif
  59. class jlib_decl HashTable
  60. : public SuperHashTableOf<IMapping, const void>, implements IObserver
  61. {
  62. public:
  63. HashTable(int _keysize, bool _ignorecase)
  64. : SuperHashTableOf<IMapping, const void>(),
  65. keysize(_keysize), ignorecase(_ignorecase) {}
  66. HashTable(unsigned initsize, int _keysize, bool _ignorecase)
  67. : SuperHashTableOf<IMapping, const void>(initsize),
  68. keysize(_keysize), ignorecase(_ignorecase) {}
  69. ~HashTable() { _releaseAll(); }
  70. IMPLEMENT_IINTERFACE
  71. bool add(IMapping & donor)
  72. {
  73. donor.setHash(hash(donor.getKey(), keysize));
  74. return SuperHashTableOf<IMapping, const void>::add(donor);
  75. }
  76. bool replace(IMapping & donor)
  77. {
  78. donor.setHash(hash(donor.getKey(), keysize));
  79. return SuperHashTableOf<IMapping, const void>::replace(donor);
  80. }
  81. bool addOwn(IMapping & donor);
  82. bool replaceOwn(IMapping & donor);
  83. IMapping * findLink(const void *key) const;
  84. virtual bool onNotify(INotification & notify);
  85. IMapping * createLink(const void *key);
  86. protected:
  87. virtual IMapping * newMapping(const void *key);
  88. private:
  89. unsigned getHashFromElement(const void * et) const
  90. { return static_cast<const IMapping *>(et)->getHash(); }
  91. unsigned getHashFromFindParam(const void *fp) const
  92. { return hash(fp, keysize); }
  93. const void * getFindParam(const void * et) const
  94. { return const_cast<const void *>(static_cast<const IMapping *>(et)->getKey()); }
  95. bool matchesFindParam(const void * et, const void * key, unsigned fphash __attribute__((unused))) const
  96. { return keyeq(key, static_cast<const IMapping *>(et)->getKey(), keysize); }
  97. bool keyeq(const void *key1, const void *key2, int ksize) const;
  98. unsigned hash(const void *key, int ksize) const;
  99. private:
  100. int keysize;
  101. bool ignorecase;
  102. };
  103. class jlib_decl KeptHashTable : public HashTable
  104. {
  105. public:
  106. KeptHashTable(int _keysize, bool _ignorecase)
  107. : HashTable(_keysize, _ignorecase) {}
  108. KeptHashTable(unsigned _initsize, int _keysize, bool _ignorecase)
  109. : HashTable(_initsize, _keysize, _ignorecase) {}
  110. ~KeptHashTable() { _releaseAll(); }
  111. IMapping * create(const void *key);
  112. private:
  113. void onAdd(void *et);
  114. void onRemove(void *et);
  115. };
  116. class jlib_decl ObservedHashTable : public HashTable
  117. {
  118. public:
  119. ObservedHashTable(int _keysize, bool _ignorecase)
  120. : HashTable(_keysize, _ignorecase) {}
  121. ObservedHashTable(unsigned _initsize, int _keysize, bool _ignorecase)
  122. : HashTable(_initsize, _keysize, _ignorecase) {}
  123. ~ObservedHashTable() { _releaseAll(); }
  124. private:
  125. void onAdd(void *et);
  126. void onRemove(void *et);
  127. };
  128. class jlib_decl HashIterator : public SuperHashIteratorOf<IMapping>
  129. {
  130. public:
  131. HashIterator(const HashTable & _table) : SuperHashIteratorOf<IMapping>(_table) {}
  132. IMapping & get() { IMapping & et = query(); et.Link(); return et; }
  133. };
  134. #ifdef _MSC_VER
  135. #pragma warning (pop)
  136. #endif
  137. void IntersectHash(HashTable & h1, HashTable & h2);
  138. void UnionHash(HashTable & h1, HashTable & h2);
  139. void SubtractHash(HashTable & main, HashTable & sub);
  140. #include "jhash.ipp"
  141. typedef MappingStringTo<IInterfacePtr,IInterfacePtr> CopyMappingStringToIInterface;
  142. typedef MapStringTo<IInterfacePtr,IInterfacePtr,CopyMappingStringToIInterface> CopyMapStringToIInterface;
  143. template <class C> class CopyMapStringToMyClass : public CopyMapStringToIInterface
  144. {
  145. public:
  146. CopyMapStringToMyClass() : CopyMapStringToIInterface() {};
  147. CopyMapStringToMyClass(bool _ignorecase) : CopyMapStringToIInterface(_ignorecase) {};
  148. static inline C * mapToValue(IMapping * _map)
  149. {
  150. CopyMappingStringToIInterface * map = (CopyMappingStringToIInterface *)_map;
  151. IInterface ** x = &map->getValue();
  152. return x ? (C *) *x : NULL;
  153. }
  154. C *getValue(const char *k) const
  155. {
  156. IInterface **x = CopyMapStringToIInterface::getValue(k);
  157. return x ? (C *) *x : NULL;
  158. }
  159. };
  160. template <class C, class BASE> class CopyMapStringToMyClassViaBase : public CopyMapStringToIInterface
  161. {
  162. public:
  163. CopyMapStringToMyClassViaBase() : CopyMapStringToIInterface() {};
  164. CopyMapStringToMyClassViaBase(bool _ignorecase) : CopyMapStringToIInterface(_ignorecase) {};
  165. C *getValue(const char *k) const
  166. {
  167. IInterface **x = CopyMapStringToIInterface::getValue(k);
  168. return x ? (C *)(BASE *)*x : NULL;
  169. }
  170. bool setValue(const char *k, C * v)
  171. {
  172. return CopyMapStringToIInterface::setValue(k, (BASE *)v);
  173. }
  174. };
  175. //===========================================================================
  176. #ifdef _MSC_VER
  177. #pragma warning (push)
  178. #pragma warning( disable : 4275 ) // hope this warning not significant! (may get link errors I guess)
  179. #endif
  180. class jlib_decl MappingStringToIInterface : public MappingStringTo<IInterfacePtr,IInterfacePtr>
  181. {
  182. public:
  183. MappingStringToIInterface(const char * k, IInterfacePtr a) :
  184. MappingStringTo<IInterfacePtr,IInterfacePtr>(k,a) { ::Link(a); }
  185. ~MappingStringToIInterface() { ::Release(val); }
  186. };
  187. #ifdef _MSC_VER
  188. #pragma warning (pop)
  189. #endif
  190. typedef MapStringTo<IInterfacePtr,IInterfacePtr,MappingStringToIInterface> MapStringToIInterface;
  191. template <class C> class MapStringToMyClass : public MapStringToIInterface
  192. {
  193. public:
  194. MapStringToMyClass() : MapStringToIInterface() {};
  195. MapStringToMyClass(bool _ignorecase) : MapStringToIInterface(_ignorecase) {};
  196. static inline C * mapToValue(IMapping * _map)
  197. {
  198. MappingStringToIInterface * map = (MappingStringToIInterface *)_map;
  199. IInterface ** x = &map->getValue();
  200. return x ? (C *) *x : NULL;
  201. }
  202. C *getValue(const char *k) const
  203. {
  204. IInterface **x = MapStringToIInterface::getValue(k);
  205. return x ? (C *) *x : NULL;
  206. }
  207. };
  208. template <class C, class BASE> class MapStringToMyClassViaBase : public MapStringToIInterface
  209. {
  210. public:
  211. MapStringToMyClassViaBase() : MapStringToIInterface() {};
  212. MapStringToMyClassViaBase(bool _ignorecase) : MapStringToIInterface(_ignorecase) {};
  213. C *getValue(const char *k) const
  214. {
  215. IInterface **x = MapStringToIInterface::getValue(k);
  216. return x ? (C *)(BASE *)*x : NULL;
  217. }
  218. bool setValue(const char *k, C * v)
  219. {
  220. return MapStringToIInterface::setValue(k, (BASE *)v);
  221. }
  222. };
  223. //===========================================================================
  224. template <class KEY, class KEYINIT>
  225. class CopyMapXToIInterface : public MapBetween<KEY, KEYINIT, IInterfacePtr,IInterfacePtr,MappingBetween<KEY, KEYINIT, IInterfacePtr, IInterfacePtr> >
  226. {
  227. };
  228. template <class KEY, class KEYINIT, class C>
  229. class CopyMapXToMyClass : public CopyMapXToIInterface<KEY, KEYINIT>
  230. {
  231. public:
  232. CopyMapXToMyClass() : CopyMapXToIInterface<KEY, KEYINIT>() {};
  233. static inline C * mapToValue(IMapping * _map)
  234. {
  235. MappingBetween<KEY, KEYINIT, IInterfacePtr, IInterfacePtr> * map = (MappingBetween<KEY, KEYINIT, IInterfacePtr, IInterfacePtr> *)_map;
  236. IInterface ** x = &map->getValue();
  237. return x ? (C *) *x : NULL;
  238. }
  239. C *getValue(KEYINIT k) const
  240. {
  241. IInterface **x = CopyMapXToIInterface<KEY, KEYINIT>::getValue(k);
  242. return x ? (C *) *x : NULL;
  243. }
  244. };
  245. template <class KEY, class KEYINIT>
  246. class MappingXToIInterface : public MappingBetween<KEY, KEYINIT, IInterfacePtr,IInterfacePtr>
  247. {
  248. typedef MappingXToIInterface<KEY, KEYINIT> SELF;
  249. public:
  250. MappingXToIInterface(KEYINIT k, IInterfacePtr a) :
  251. MappingBetween<KEY, KEYINIT, IInterfacePtr,IInterfacePtr>(k,a) { ::Link(a); }
  252. ~MappingXToIInterface() { ::Release(SELF::val); }
  253. };
  254. template <class KEY, class KEYINIT>
  255. class MapXToIInterface : public MapBetween<KEY, KEYINIT, IInterfacePtr,IInterfacePtr,MappingXToIInterface<KEY, KEYINIT> >
  256. {
  257. public:
  258. using ELEMENT = MappingXToIInterface<KEY, KEYINIT>;
  259. };
  260. template <class KEY, class KEYINIT, class C>
  261. class MapXToMyClass : public MapXToIInterface<KEY, KEYINIT>
  262. {
  263. public:
  264. MapXToMyClass() : MapXToIInterface<KEY, KEYINIT>() {};
  265. static inline C * mapToValue(IMapping * _map)
  266. {
  267. MappingXToIInterface<KEY, KEYINIT> * map = (MappingXToIInterface<KEY, KEYINIT> *)_map;
  268. IInterface ** x = &map->getValue();
  269. return x ? (C *) *x : NULL;
  270. }
  271. C *getValue(KEYINIT k) const
  272. {
  273. IInterface **x = MapXToIInterface<KEY, KEYINIT>::getValue(k);
  274. return x ? (C *) *x : NULL;
  275. }
  276. };
  277. template <class KEY, class KEYINIT, class C, class BASE>
  278. class MapXToMyClassViaBase : public MapXToIInterface<KEY, KEYINIT>
  279. {
  280. public:
  281. MapXToMyClassViaBase () : MapXToIInterface<KEY, KEYINIT>() {};
  282. static inline C * mapToValue(IMapping * _map)
  283. {
  284. MappingXToIInterface<KEY, KEYINIT> * map = (MappingXToIInterface<KEY, KEYINIT> *)_map;
  285. IInterface ** x = &map->getValue();
  286. return x ? (C *)(BASE *)*x : NULL;
  287. }
  288. C *getValue(KEYINIT k) const
  289. {
  290. IInterface **x = MapXToIInterface<KEY, KEYINIT>::getValue(k);
  291. return x ? (C *)(BASE *) *x : NULL;
  292. }
  293. bool setValue(KEYINIT k, BASE * v)
  294. {
  295. return MapXToIInterface<KEY, KEYINIT>::setValue(k, v);
  296. }
  297. };
  298. //===========================================================================
  299. template <class KEY, class VALUE>
  300. class MappingOwnedToOwned : public MappingBetween<Linked<KEY>, KEY *, Linked<VALUE>,VALUE *>
  301. {
  302. public:
  303. MappingOwnedToOwned(KEY * k, VALUE * a) : MappingBetween<Linked<KEY>, KEY *, Linked<VALUE>,VALUE *>(k, a) {}
  304. };
  305. template <class KEY, class VALUE>
  306. class MapOwnedToOwned : public MapBetween<Linked<KEY>, KEY * , Linked<VALUE>,VALUE *,MappingOwnedToOwned<KEY, VALUE> >
  307. {
  308. public:
  309. inline MapOwnedToOwned() {}
  310. inline MapOwnedToOwned(unsigned initsize) : MapBetween<Linked<KEY>, KEY * , Linked<VALUE>,VALUE *,MappingOwnedToOwned<KEY, VALUE> >(initsize) {}
  311. };
  312. /*
  313. The hash tables can be used from the interfaces above. However there are
  314. some helper classes/macros in jhash.ipp to make them easier to use:
  315. Element classes:
  316. HashMapping - An implementation of IMapping that works in keep and
  317. non kept hash tables.
  318. Atom - An implementation of IAtom
  319. MappingKey - A class for storing a hash key - used by above.
  320. There are also some classes to make creating hash tables easier:
  321. (They are actually implemented as macros to stop the compiler choking on
  322. the template classes).
  323. a) To create a hash table of a given class use:
  324. typedef HashTableOf<MAPPING-TYPE, HASH-KEY-SIZE> MapHashTable;
  325. Using a HashTableOf also adds the following method to the hash table class
  326. MAPPING-TYPE * CreateLink(const void * key);
  327. which creates entries in the hash table.
  328. b) To create a hash table that maps a string to another type use:
  329. typedef MapStringTo<TO-TYPE, TO-INIT> MyMappingName;
  330. where
  331. TO-TYPE - what is stored in the mapping
  332. TO-INIT - what you pass to the value to initialise it.
  333. e.g.
  334. typedef MapStringTo<StringAttr, const char *> MapStrToStr;
  335. c) To create a hash table that maps a non-string to another type use:
  336. typedef MapBetween<KEY-TYPE, KEY-INIT, MAP-TYPE, MAP-INIT> myMap;
  337. where MAP-... as above and
  338. KEY-TYPE - what is stored as the key
  339. KEY-INIT - what you pass to the key to initialise it.
  340. e.g.,
  341. typedef MapBetween<int, int, StringAttr, const char *> MapIntToStr;
  342. *** HashTable Key sizes ***
  343. The HashTable key size can have one of the following forms:
  344. +n - The key is n bytes of data
  345. 0 - The key is a null terminated string
  346. -n - The key is n bytes of data followed by a null terminated string.
  347. */
  348. // Create an atom in the global atom table
  349. extern jlib_decl IAtom * createAtom(const char *value);
  350. extern jlib_decl IAtom * createAtom(const char *value, size32_t len);
  351. inline IAtom * createLowerCaseAtom(const char *value) { return createAtom(value); }
  352. inline IAtom * createLowerCaseAtom(const char *value, size32_t len) { return createAtom(value, len); }
  353. extern jlib_decl IIdAtom * createIdAtom(const char *value);
  354. extern jlib_decl IIdAtom * createIdAtom(const char *value, size32_t len);
  355. extern jlib_decl void releaseAtoms();
  356. extern jlib_decl unsigned hashc( const unsigned char *k, unsigned length, unsigned initval);
  357. extern jlib_decl unsigned hashnc( const unsigned char *k, unsigned length, unsigned initval);
  358. extern jlib_decl unsigned hashvalue( unsigned value, unsigned initval);
  359. extern jlib_decl unsigned hashvalue( unsigned __int64 value, unsigned initval);
  360. extern jlib_decl unsigned hashvalue( const void * value, unsigned initval);
  361. //================================================
  362. // Minimal Hash table template - slightly less overhead that HashTable/SuperHashTable
  363. template <class C> class CMinHashTable
  364. {
  365. protected:
  366. unsigned htn;
  367. unsigned n;
  368. C **table;
  369. void expand(bool expand=true)
  370. {
  371. C **t = table+htn; // more interesting going backwards
  372. if (expand)
  373. htn = htn*2+1;
  374. else
  375. htn /= 2;
  376. C **newtable = (C **)calloc(sizeof(C *),htn);
  377. while (t--!=table) {
  378. C *c = *t;
  379. if (c) {
  380. unsigned h = c->hash%htn;
  381. while (newtable[h]) {
  382. h++;
  383. if (h==htn)
  384. h = 0;
  385. }
  386. newtable[h] = c;
  387. }
  388. }
  389. free(table);
  390. table = newtable;
  391. }
  392. public:
  393. CMinHashTable<C>(unsigned _initialSize = 7)
  394. {
  395. htn = _initialSize;
  396. n = 0;
  397. table = (C **)calloc(sizeof(C *),htn);
  398. }
  399. ~CMinHashTable<C>()
  400. {
  401. C **t = table+htn;
  402. while (t--!=table)
  403. if (*t)
  404. C::destroy(*t);
  405. free(table);
  406. }
  407. void add(C *c)
  408. {
  409. if (n*4>htn*3)
  410. expand();
  411. unsigned i = c->hash%htn;
  412. while (table[i])
  413. if (++i==htn)
  414. i = 0;
  415. table[i] = c;
  416. n++;
  417. }
  418. C *findh(const char *key,unsigned h)
  419. {
  420. unsigned i=h%htn;
  421. while (table[i]) {
  422. if ((table[i]->hash==h)&&table[i]->eq(key))
  423. return table[i];
  424. if (++i==htn)
  425. i = 0;
  426. }
  427. return NULL;
  428. }
  429. C *find(const char *key,bool add)
  430. {
  431. unsigned h = C::getHash(key);
  432. unsigned i=h%htn;
  433. while (table[i]) {
  434. if ((table[i]->hash==h)&&table[i]->eq(key))
  435. return table[i];
  436. if (++i==htn)
  437. i = 0;
  438. }
  439. if (!add)
  440. return NULL;
  441. C *c = C::create(key);
  442. table[i] = c;
  443. n++;
  444. if (n*4>htn*3)
  445. expand();
  446. return c;
  447. }
  448. unsigned findIndex(const char *key, unsigned h)
  449. {
  450. unsigned i=h%htn;
  451. while (table[i]) {
  452. if ((table[i]->hash==h)&&table[i]->eq(key))
  453. return i;
  454. if (++i==htn)
  455. i = 0;
  456. }
  457. return (unsigned) -1;
  458. }
  459. C *getIndex(unsigned v) const
  460. {
  461. assert(v != (unsigned)-1 && v < htn);
  462. return table[v];
  463. }
  464. void remove(C *c)
  465. {
  466. unsigned i = c->hash%htn;
  467. C::destroy(c);
  468. while (table[i]!=c) {
  469. if (table[i]==NULL) {
  470. return;
  471. }
  472. if (++i==htn)
  473. i = 0;
  474. }
  475. unsigned j = i+1;
  476. for (;;) {
  477. if (j==htn)
  478. j = 0;
  479. C *cn = table[j];
  480. if (cn==NULL)
  481. break;
  482. unsigned k = cn->hash%htn;
  483. if (j>i) {
  484. if ((k<=i)||(k>j)) {
  485. table[i] = cn;
  486. i = j;
  487. }
  488. }
  489. else if ((k>j)&&(k<=i)) {
  490. table[i] = cn;
  491. i = j;
  492. }
  493. j++;
  494. }
  495. table[i] = NULL;
  496. n--;
  497. if ((n>1024)&&(n<htn/4))
  498. expand(false);
  499. }
  500. C *first(unsigned &i)
  501. {
  502. i = 0;
  503. return next(i);
  504. }
  505. C *next(unsigned &i)
  506. {
  507. while (i!=htn) {
  508. C* r = table[i++];
  509. if (r)
  510. return r;
  511. }
  512. return NULL;
  513. }
  514. unsigned ordinality()
  515. {
  516. return n;
  517. }
  518. };
  519. #endif