jhash.hpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #ifndef JHASH_HPP
  14. #define JHASH_HPP
  15. #include "platform.h"
  16. #include <stdio.h>
  17. #include "jiface.hpp"
  18. #include "jobserve.hpp"
  19. #include "jiter.hpp"
  20. #include "jsuperhash.hpp"
  21. #ifndef IHASH_DEFINED // may be defined already elsewhere
  22. #define IHASH_DEFINED
  23. interface IHash
  24. {
  25. virtual unsigned hash(const void *data)=0;
  26. protected:
  27. virtual ~IHash() {}
  28. };
  29. #endif
  30. interface jlib_decl IMapping : extends IObservable
  31. {
  32. public:
  33. virtual const void * getKey() const = 0;
  34. virtual unsigned getHash() const = 0;
  35. virtual void setHash(unsigned) = 0;
  36. };
  37. interface jlib_decl IAtom : extends IMapping
  38. {
  39. public:
  40. virtual const char * getNamePtr() const = 0;
  41. const char * getAtomNamePtr() const; // ok if this=NULL
  42. inline const char * str() const { return getAtomNamePtr(); }
  43. inline operator const char *() const { return getAtomNamePtr(); }
  44. };
  45. //This interface represents an atom which preserves its case, but also stores a lower case representation
  46. //for efficient case insensitive comparison.
  47. //It is deliberately NOT derived from IAtom to avoid accidentally using the wrong interface
  48. interface jlib_decl IIdAtom : extends IMapping
  49. {
  50. public:
  51. virtual const char * getNamePtr() const = 0;
  52. const char * getAtomNamePtr() const { return this ? getNamePtr() : NULL; }
  53. inline const char * str() const { return getAtomNamePtr(); }
  54. inline operator const char *() const { return getAtomNamePtr(); }
  55. public:
  56. virtual IAtom * queryLower() const = 0;
  57. IAtom * lower() const; // safe if this==NULL
  58. };
  59. #ifdef _MSC_VER
  60. #pragma warning (push)
  61. #pragma warning( disable : 4275 )
  62. #endif
  63. class jlib_decl HashTable
  64. : public SuperHashTableOf<IMapping, const void>, implements IObserver
  65. {
  66. public:
  67. HashTable(int _keysize, bool _ignorecase)
  68. : SuperHashTableOf<IMapping, const void>(),
  69. keysize(_keysize), ignorecase(_ignorecase) {}
  70. HashTable(unsigned initsize, int _keysize, bool _ignorecase)
  71. : SuperHashTableOf<IMapping, const void>(initsize),
  72. keysize(_keysize), ignorecase(_ignorecase) {}
  73. ~HashTable() { releaseAll(); }
  74. IMPLEMENT_IINTERFACE
  75. bool add(IMapping & donor)
  76. {
  77. donor.setHash(hash(donor.getKey(), keysize));
  78. return SuperHashTableOf<IMapping, const void>::add(donor);
  79. }
  80. bool replace(IMapping & donor)
  81. {
  82. donor.setHash(hash(donor.getKey(), keysize));
  83. return SuperHashTableOf<IMapping, const void>::replace(donor);
  84. }
  85. bool addOwn(IMapping & donor);
  86. bool replaceOwn(IMapping & donor);
  87. IMapping * findLink(const void *key) const;
  88. virtual bool onNotify(INotification & notify);
  89. IMapping * createLink(const void *key);
  90. protected:
  91. virtual IMapping * newMapping(const void *key);
  92. private:
  93. unsigned getHashFromElement(const void * et) const
  94. { return static_cast<const IMapping *>(et)->getHash(); }
  95. unsigned getHashFromFindParam(const void *fp) const
  96. { return hash(fp, keysize); }
  97. const void * getFindParam(const void * et) const
  98. { return const_cast<const void *>(static_cast<const IMapping *>(et)->getKey()); }
  99. bool matchesFindParam(const void * et, const void * key, unsigned fphash) const
  100. { return keyeq(key, static_cast<const IMapping *>(et)->getKey(), keysize); }
  101. bool keyeq(const void *key1, const void *key2, int ksize) const;
  102. unsigned hash(const void *key, int ksize) const;
  103. private:
  104. int keysize;
  105. bool ignorecase;
  106. };
  107. class jlib_decl KeptHashTable : public HashTable
  108. {
  109. public:
  110. KeptHashTable(int _keysize, bool _ignorecase)
  111. : HashTable(_keysize, _ignorecase) {}
  112. KeptHashTable(unsigned _initsize, int _keysize, bool _ignorecase)
  113. : HashTable(_initsize, _keysize, _ignorecase) {}
  114. ~KeptHashTable() { releaseAll(); }
  115. IMapping * create(const void *key);
  116. private:
  117. void onAdd(void *et);
  118. void onRemove(void *et);
  119. };
  120. class jlib_decl ObservedHashTable : public HashTable
  121. {
  122. public:
  123. ObservedHashTable(int _keysize, bool _ignorecase)
  124. : HashTable(_keysize, _ignorecase) {}
  125. ObservedHashTable(unsigned _initsize, int _keysize, bool _ignorecase)
  126. : HashTable(_initsize, _keysize, _ignorecase) {}
  127. ~ObservedHashTable() { releaseAll(); }
  128. private:
  129. void onAdd(void *et);
  130. void onRemove(void *et);
  131. };
  132. class jlib_decl HashIterator : public SuperHashIteratorOf<IMapping>
  133. {
  134. public:
  135. HashIterator(const HashTable & _table) : SuperHashIteratorOf<IMapping>(_table) {}
  136. IMapping & get() { IMapping & et = query(); et.Link(); return et; }
  137. };
  138. #ifdef _MSC_VER
  139. #pragma warning (pop)
  140. #endif
  141. void IntersectHash(HashTable & h1, HashTable & h2);
  142. void UnionHash(HashTable & h1, HashTable & h2);
  143. void SubtractHash(HashTable & main, HashTable & sub);
  144. #include "jhash.ipp"
  145. typedef MappingStringTo<IInterfacePtr,IInterfacePtr> CopyMappingStringToIInterface;
  146. typedef MapStringTo<IInterfacePtr,IInterfacePtr,CopyMappingStringToIInterface> CopyMapStringToIInterface;
  147. template <class C> class CopyMapStringToMyClass : public CopyMapStringToIInterface
  148. {
  149. public:
  150. CopyMapStringToMyClass() : CopyMapStringToIInterface() {};
  151. CopyMapStringToMyClass(bool _ignorecase) : CopyMapStringToIInterface(_ignorecase) {};
  152. static inline C * mapToValue(IMapping * _map)
  153. {
  154. CopyMappingStringToIInterface * map = (CopyMappingStringToIInterface *)_map;
  155. IInterface ** x = &map->getValue();
  156. return x ? (C *) *x : NULL;
  157. }
  158. C *getValue(const char *k) const
  159. {
  160. IInterface **x = CopyMapStringToIInterface::getValue(k);
  161. return x ? (C *) *x : NULL;
  162. }
  163. };
  164. template <class C, class BASE> class CopyMapStringToMyClassViaBase : public CopyMapStringToIInterface
  165. {
  166. public:
  167. CopyMapStringToMyClassViaBase() : CopyMapStringToIInterface() {};
  168. CopyMapStringToMyClassViaBase(bool _ignorecase) : CopyMapStringToIInterface(_ignorecase) {};
  169. C *getValue(const char *k) const
  170. {
  171. IInterface **x = CopyMapStringToIInterface::getValue(k);
  172. return x ? (C *)(BASE *)*x : NULL;
  173. }
  174. bool setValue(const char *k, C * v)
  175. {
  176. return CopyMapStringToIInterface::setValue(k, (BASE *)v);
  177. }
  178. };
  179. //===========================================================================
  180. #ifdef _MSC_VER
  181. #pragma warning (push)
  182. #pragma warning( disable : 4275 ) // hope this warning not significant! (may get link errors I guess)
  183. #endif
  184. class jlib_decl MappingStringToIInterface : public MappingStringTo<IInterfacePtr,IInterfacePtr>
  185. {
  186. public:
  187. MappingStringToIInterface(const char * k, IInterfacePtr a) :
  188. MappingStringTo<IInterfacePtr,IInterfacePtr>(k,a) { ::Link(a); }
  189. ~MappingStringToIInterface() { ::Release(val); }
  190. };
  191. #ifdef _MSC_VER
  192. #pragma warning (pop)
  193. #endif
  194. typedef MapStringTo<IInterfacePtr,IInterfacePtr,MappingStringToIInterface> MapStringToIInterface;
  195. template <class C> class MapStringToMyClass : public MapStringToIInterface
  196. {
  197. public:
  198. MapStringToMyClass() : MapStringToIInterface() {};
  199. MapStringToMyClass(bool _ignorecase) : MapStringToIInterface(_ignorecase) {};
  200. static inline C * mapToValue(IMapping * _map)
  201. {
  202. MappingStringToIInterface * map = (MappingStringToIInterface *)_map;
  203. IInterface ** x = &map->getValue();
  204. return x ? (C *) *x : NULL;
  205. }
  206. C *getValue(const char *k) const
  207. {
  208. IInterface **x = MapStringToIInterface::getValue(k);
  209. return x ? (C *) *x : NULL;
  210. }
  211. };
  212. template <class C, class BASE> class MapStringToMyClassViaBase : public MapStringToIInterface
  213. {
  214. public:
  215. MapStringToMyClassViaBase() : MapStringToIInterface() {};
  216. MapStringToMyClassViaBase(bool _ignorecase) : MapStringToIInterface(_ignorecase) {};
  217. C *getValue(const char *k) const
  218. {
  219. IInterface **x = MapStringToIInterface::getValue(k);
  220. return x ? (C *)(BASE *)*x : NULL;
  221. }
  222. bool setValue(const char *k, C * v)
  223. {
  224. return MapStringToIInterface::setValue(k, (BASE *)v);
  225. }
  226. };
  227. //===========================================================================
  228. template <class KEY, class KEYINIT>
  229. class CopyMapXToIInterface : public MapBetween<KEY, KEYINIT, IInterfacePtr,IInterfacePtr,MappingBetween<KEY, KEYINIT, IInterfacePtr, IInterfacePtr> >
  230. {
  231. };
  232. template <class KEY, class KEYINIT, class C>
  233. class CopyMapXToMyClass : public CopyMapXToIInterface<KEY, KEYINIT>
  234. {
  235. public:
  236. CopyMapXToMyClass() : CopyMapXToIInterface<KEY, KEYINIT>() {};
  237. static inline C * mapToValue(IMapping * _map)
  238. {
  239. MappingBetween<KEY, KEYINIT, IInterfacePtr, IInterfacePtr> * map = (MappingBetween<KEY, KEYINIT, IInterfacePtr, IInterfacePtr> *)_map;
  240. IInterface ** x = &map->getValue();
  241. return x ? (C *) *x : NULL;
  242. }
  243. C *getValue(KEYINIT k) const
  244. {
  245. IInterface **x = CopyMapXToIInterface<KEY, KEYINIT>::getValue(k);
  246. return x ? (C *) *x : NULL;
  247. }
  248. };
  249. template <class KEY, class KEYINIT>
  250. class MappingXToIInterface : public MappingBetween<KEY, KEYINIT, IInterfacePtr,IInterfacePtr>
  251. {
  252. typedef MappingXToIInterface<KEY, KEYINIT> SELF;
  253. public:
  254. MappingXToIInterface(KEYINIT k, IInterfacePtr a) :
  255. MappingBetween<KEY, KEYINIT, IInterfacePtr,IInterfacePtr>(k,a) { ::Link(a); }
  256. ~MappingXToIInterface() { ::Release(SELF::val); }
  257. };
  258. template <class KEY, class KEYINIT>
  259. class MapXToIInterface : public MapBetween<KEY, KEYINIT, IInterfacePtr,IInterfacePtr,MappingXToIInterface<KEY, KEYINIT> >
  260. {
  261. };
  262. template <class KEY, class KEYINIT, class C>
  263. class MapXToMyClass : public MapXToIInterface<KEY, KEYINIT>
  264. {
  265. public:
  266. MapXToMyClass() : MapXToIInterface<KEY, KEYINIT>() {};
  267. static inline C * mapToValue(IMapping * _map)
  268. {
  269. MappingXToIInterface<KEY, KEYINIT> * map = (MappingXToIInterface<KEY, KEYINIT> *)_map;
  270. IInterface ** x = &map->getValue();
  271. return x ? (C *) *x : NULL;
  272. }
  273. C *getValue(KEYINIT k) const
  274. {
  275. IInterface **x = MapXToIInterface<KEY, KEYINIT>::getValue(k);
  276. return x ? (C *) *x : NULL;
  277. }
  278. };
  279. template <class KEY, class KEYINIT, class C, class BASE>
  280. class MapXToMyClassViaBase : public MapXToIInterface<KEY, KEYINIT>
  281. {
  282. public:
  283. MapXToMyClassViaBase () : MapXToIInterface<KEY, KEYINIT>() {};
  284. static inline C * mapToValue(IMapping * _map)
  285. {
  286. MappingXToIInterface<KEY, KEYINIT> * map = (MappingXToIInterface<KEY, KEYINIT> *)_map;
  287. IInterface ** x = &map->getValue();
  288. return x ? (C *)(BASE *)*x : NULL;
  289. }
  290. C *getValue(KEYINIT k) const
  291. {
  292. IInterface **x = MapXToIInterface<KEY, KEYINIT>::getValue(k);
  293. return x ? (C *)(BASE *) *x : NULL;
  294. }
  295. bool setValue(KEYINIT k, BASE * v)
  296. {
  297. return MapXToIInterface<KEY, KEYINIT>::setValue(k, v);
  298. }
  299. };
  300. //===========================================================================
  301. template <class KEY, class VALUE>
  302. class MappingOwnedToOwned : public MappingBetween<Linked<KEY>, KEY *, Linked<VALUE>,VALUE *>
  303. {
  304. public:
  305. MappingOwnedToOwned(KEY * k, VALUE * a) : MappingBetween<Linked<KEY>, KEY *, Linked<VALUE>,VALUE *>(k, a) {}
  306. };
  307. template <class KEY, class VALUE>
  308. class MapOwnedToOwned : public MapBetween<Linked<KEY>, KEY * , Linked<VALUE>,VALUE *,MappingOwnedToOwned<KEY, VALUE> >
  309. {
  310. public:
  311. inline MapOwnedToOwned() {}
  312. inline MapOwnedToOwned(unsigned initsize) : MapBetween<Linked<KEY>, KEY * , Linked<VALUE>,VALUE *,MappingOwnedToOwned<KEY, VALUE> >(initsize) {}
  313. };
  314. /*
  315. The hash tables can be used from the interfaces above. However there are
  316. some helper classes/macros in jhash.ipp to make them easier to use:
  317. Element classes:
  318. HashMapping - An implementation of IMapping that works in keep and
  319. non kept hash tables.
  320. Atom - An implementation of IAtom
  321. MappingKey - A class for storing a hash key - used by above.
  322. There are also some classes to make creating hash tables easier:
  323. (They are actually implemented as macros to stop the compiler choking on
  324. the template classes).
  325. a) To create a hash table of a given class use:
  326. typedef HashTableOf<MAPPING-TYPE, HASH-KEY-SIZE> MapHashTable;
  327. Using a HashTableOf also adds the following method to the hash table class
  328. MAPPING-TYPE * CreateLink(const void * key);
  329. which creates entries in the hash table.
  330. b) To create a hash table that maps a string to another type use:
  331. typedef MapStringTo<TO-TYPE, TO-INIT> MyMappingName;
  332. where
  333. TO-TYPE - what is stored in the mapping
  334. TO-INIT - what you pass to the value to initialise it.
  335. e.g.
  336. typedef MapStringTo<StringAttr, const char *> MapStrToStr;
  337. c) To create a hash table that maps a non-string to another type use:
  338. typedef MapBetween<KEY-TYPE, KEY-INIT, MAP-TYPE, MAP-INIT> myMap;
  339. where MAP-... as above and
  340. KEY-TYPE - what is stored as the key
  341. KEY-INIT - what you pass to the key to initialise it.
  342. e.g.,
  343. typedef MapBetween<int, int, StringAttr, const char *> MapIntToStr;
  344. *** HashTable Key sizes ***
  345. The HashTable key size can have one of the following forms:
  346. +n - The key is n bytes of data
  347. 0 - The key is a null terminated string
  348. -n - The key is n bytes of data followed by a null terminated string.
  349. */
  350. // Create an atom in the global atom table
  351. extern jlib_decl IAtom * createAtom(const char *value);
  352. extern jlib_decl IAtom * createAtom(const char *value, size32_t len);
  353. inline IAtom * createLowerCaseAtom(const char *value) { return createAtom(value); }
  354. inline IAtom * createLowerCaseAtom(const char *value, size32_t len) { return createAtom(value, len); }
  355. extern jlib_decl IIdAtom * createIdAtom(const char *value);
  356. extern jlib_decl IIdAtom * createIdAtom(const char *value, size32_t len);
  357. extern jlib_decl void releaseAtoms();
  358. extern jlib_decl unsigned hashc( const unsigned char *k, unsigned length, unsigned initval);
  359. extern jlib_decl unsigned hashnc( const unsigned char *k, unsigned length, unsigned initval);
  360. extern jlib_decl unsigned hashvalue( unsigned value, unsigned initval);
  361. extern jlib_decl unsigned hashvalue( unsigned __int64 value, unsigned initval);
  362. extern jlib_decl unsigned hashvalue( const void * value, unsigned initval);
  363. //================================================
  364. // Minimal Hash table template - slightly less overhead that HashTable/SuperHashTable
  365. template <class C> class CMinHashTable
  366. {
  367. protected:
  368. unsigned htn;
  369. unsigned n;
  370. C **table;
  371. void expand(bool expand=true)
  372. {
  373. C **t = table+htn; // more interesting going backwards
  374. if (expand)
  375. htn = htn*2+1;
  376. else
  377. htn /= 2;
  378. C **newtable = (C **)calloc(sizeof(C *),htn);
  379. while (t--!=table) {
  380. C *c = *t;
  381. if (c) {
  382. unsigned h = c->hash%htn;
  383. while (newtable[h]) {
  384. h++;
  385. if (h==htn)
  386. h = 0;
  387. }
  388. newtable[h] = c;
  389. }
  390. }
  391. free(table);
  392. table = newtable;
  393. }
  394. public:
  395. CMinHashTable<C>()
  396. {
  397. htn = 7;
  398. n = 0;
  399. table = (C **)calloc(sizeof(C *),htn);
  400. }
  401. ~CMinHashTable<C>()
  402. {
  403. C **t = table+htn;
  404. while (t--!=table)
  405. if (*t)
  406. C::destroy(*t);
  407. free(table);
  408. }
  409. void add(C *c)
  410. {
  411. if (n*4>htn*3)
  412. expand();
  413. unsigned i = c->hash%htn;
  414. while (table[i])
  415. if (++i==htn)
  416. i = 0;
  417. table[i] = c;
  418. n++;
  419. }
  420. C *findh(const char *key,unsigned h)
  421. {
  422. unsigned i=h%htn;
  423. while (table[i]) {
  424. if ((table[i]->hash==h)&&table[i]->eq(key))
  425. return table[i];
  426. if (++i==htn)
  427. i = 0;
  428. }
  429. return NULL;
  430. }
  431. C *find(const char *key,bool add)
  432. {
  433. unsigned h = C::getHash(key);
  434. unsigned i=h%htn;
  435. while (table[i]) {
  436. if ((table[i]->hash==h)&&table[i]->eq(key))
  437. return table[i];
  438. if (++i==htn)
  439. i = 0;
  440. }
  441. if (!add)
  442. return NULL;
  443. C *c = C::create(key);
  444. table[i] = c;
  445. n++;
  446. if (n*4>htn*3)
  447. expand();
  448. return c;
  449. }
  450. void remove(C *c)
  451. {
  452. unsigned i = c->hash%htn;
  453. C::destroy(c);
  454. while (table[i]!=c) {
  455. if (table[i]==NULL) {
  456. return;
  457. }
  458. if (++i==htn)
  459. i = 0;
  460. }
  461. unsigned j = i+1;
  462. loop {
  463. if (j==htn)
  464. j = 0;
  465. C *cn = table[j];
  466. if (cn==NULL)
  467. break;
  468. unsigned k = cn->hash%htn;
  469. if (j>i) {
  470. if ((k<=i)||(k>j)) {
  471. table[i] = cn;
  472. i = j;
  473. }
  474. }
  475. else if ((k>j)&&(k<=i)) {
  476. table[i] = cn;
  477. i = j;
  478. }
  479. j++;
  480. }
  481. table[i] = NULL;
  482. n--;
  483. if ((n>1024)&&(n<htn/4))
  484. expand(false);
  485. }
  486. C *first(unsigned &i)
  487. {
  488. i = 0;
  489. return next(i);
  490. }
  491. C *next(unsigned &i)
  492. {
  493. while (i!=htn) {
  494. C* r = table[i++];
  495. if (r)
  496. return r;
  497. }
  498. return NULL;
  499. }
  500. unsigned ordinality()
  501. {
  502. return n;
  503. }
  504. };
  505. #endif