/*############################################################################## HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ############################################################################## */ #ifndef JHASH_HPP #define JHASH_HPP #include #include #include #include "platform.h" #include #include "jdebug.hpp" // for get_cycles_now() #include "jiface.hpp" #include "jobserve.hpp" #include "jiter.hpp" #include "jsuperhash.hpp" #ifndef IHASH_DEFINED // may be defined already elsewhere #define IHASH_DEFINED interface IHash { virtual unsigned hash(const void *data)=0; protected: virtual ~IHash() {} }; #endif interface jlib_decl IMapping : extends IObservable { public: virtual const void * getKey() const = 0; virtual unsigned getHash() const = 0; virtual void setHash(unsigned) = 0; }; interface jlib_decl IAtom : extends IMapping { public: virtual const char * queryStr() const = 0; }; inline jlib_decl const char * str(const IAtom * atom) { return atom ? atom->queryStr() : NULL; } //This interface represents an atom which preserves its case, but also stores a lower case representation //for efficient case insensitive comparison. //It is deliberately NOT derived from IAtom to avoid accidentally using the wrong interface interface jlib_decl IIdAtom : extends IMapping { public: virtual const char * queryStr() const = 0; public: virtual IAtom * queryLower() const = 0; }; inline jlib_decl const char * str(const IIdAtom * atom) { return atom ? atom->queryStr() : NULL; } inline jlib_decl IAtom * lower(const IIdAtom * atom) { return atom ? atom->queryLower() : NULL; } #ifdef _MSC_VER #pragma warning (push) #pragma warning( disable : 4275 ) #endif class jlib_decl HashTable : public SuperHashTableOf, implements IObserver { public: HashTable(int _keysize, bool _ignorecase) : SuperHashTableOf(), keysize(_keysize), ignorecase(_ignorecase) {} HashTable(unsigned initsize, int _keysize, bool _ignorecase) : SuperHashTableOf(initsize), keysize(_keysize), ignorecase(_ignorecase) {} ~HashTable() { _releaseAll(); } IMPLEMENT_IINTERFACE bool add(IMapping & donor) { donor.setHash(hash(donor.getKey(), keysize)); return SuperHashTableOf::add(donor); } bool replace(IMapping & donor) { donor.setHash(hash(donor.getKey(), keysize)); return SuperHashTableOf::replace(donor); } bool addOwn(IMapping & donor); bool replaceOwn(IMapping & donor); IMapping * findLink(const void *key) const; virtual bool onNotify(INotification & notify); IMapping * createLink(const void *key); protected: virtual IMapping * newMapping(const void *key); private: unsigned getHashFromElement(const void * et) const { return static_cast(et)->getHash(); } unsigned getHashFromFindParam(const void *fp) const { return hash(fp, keysize); } const void * getFindParam(const void * et) const { return const_cast(static_cast(et)->getKey()); } bool matchesFindParam(const void * et, const void * key, unsigned fphash __attribute__((unused))) const { return keyeq(key, static_cast(et)->getKey(), keysize); } bool keyeq(const void *key1, const void *key2, int ksize) const; unsigned hash(const void *key, int ksize) const; private: int keysize; bool ignorecase; }; class jlib_decl KeptHashTable : public HashTable { public: KeptHashTable(int _keysize, bool _ignorecase) : HashTable(_keysize, _ignorecase) {} KeptHashTable(unsigned _initsize, int _keysize, bool _ignorecase) : HashTable(_initsize, _keysize, _ignorecase) {} ~KeptHashTable() { _releaseAll(); } IMapping * create(const void *key); private: void onAdd(void *et); void onRemove(void *et); }; class jlib_decl ObservedHashTable : public HashTable { public: ObservedHashTable(int _keysize, bool _ignorecase) : HashTable(_keysize, _ignorecase) {} ObservedHashTable(unsigned _initsize, int _keysize, bool _ignorecase) : HashTable(_initsize, _keysize, _ignorecase) {} ~ObservedHashTable() { _releaseAll(); } private: void onAdd(void *et); void onRemove(void *et); }; class jlib_decl HashIterator : public SuperHashIteratorOf { public: HashIterator(const HashTable & _table) : SuperHashIteratorOf(_table) {} IMapping & get() { IMapping & et = query(); et.Link(); return et; } }; #ifdef _MSC_VER #pragma warning (pop) #endif void IntersectHash(HashTable & h1, HashTable & h2); void UnionHash(HashTable & h1, HashTable & h2); void SubtractHash(HashTable & main, HashTable & sub); #include "jhash.ipp" typedef MappingStringTo CopyMappingStringToIInterface; typedef MapStringTo CopyMapStringToIInterface; template class CopyMapStringToMyClass : public CopyMapStringToIInterface { public: CopyMapStringToMyClass() : CopyMapStringToIInterface() {}; CopyMapStringToMyClass(bool _ignorecase) : CopyMapStringToIInterface(_ignorecase) {}; static inline C * mapToValue(IMapping * _map) { CopyMappingStringToIInterface * map = (CopyMappingStringToIInterface *)_map; IInterface ** x = &map->getValue(); return x ? (C *) *x : NULL; } C *getValue(const char *k) const { IInterface **x = CopyMapStringToIInterface::getValue(k); return x ? (C *) *x : NULL; } }; template class CopyMapStringToMyClassViaBase : public CopyMapStringToIInterface { public: CopyMapStringToMyClassViaBase() : CopyMapStringToIInterface() {}; CopyMapStringToMyClassViaBase(bool _ignorecase) : CopyMapStringToIInterface(_ignorecase) {}; C *getValue(const char *k) const { IInterface **x = CopyMapStringToIInterface::getValue(k); return x ? (C *)(BASE *)*x : NULL; } bool setValue(const char *k, C * v) { return CopyMapStringToIInterface::setValue(k, (BASE *)v); } }; //=========================================================================== #ifdef _MSC_VER #pragma warning (push) #pragma warning( disable : 4275 ) // hope this warning not significant! (may get link errors I guess) #endif class jlib_decl MappingStringToIInterface : public MappingStringTo { public: MappingStringToIInterface(const char * k, IInterfacePtr a) : MappingStringTo(k,a) { ::Link(a); } ~MappingStringToIInterface() { ::Release(val); } }; #ifdef _MSC_VER #pragma warning (pop) #endif typedef MapStringTo MapStringToIInterface; template class MapStringToMyClass : public MapStringToIInterface { public: MapStringToMyClass() : MapStringToIInterface() {}; MapStringToMyClass(bool _ignorecase) : MapStringToIInterface(_ignorecase) {}; static inline C * mapToValue(IMapping * _map) { MappingStringToIInterface * map = (MappingStringToIInterface *)_map; IInterface ** x = &map->getValue(); return x ? (C *) *x : NULL; } C *getValue(const char *k) const { IInterface **x = MapStringToIInterface::getValue(k); return x ? (C *) *x : NULL; } }; template class MapStringToMyClassViaBase : public MapStringToIInterface { public: MapStringToMyClassViaBase() : MapStringToIInterface() {}; MapStringToMyClassViaBase(bool _ignorecase) : MapStringToIInterface(_ignorecase) {}; C *getValue(const char *k) const { IInterface **x = MapStringToIInterface::getValue(k); return x ? (C *)(BASE *)*x : NULL; } bool setValue(const char *k, C * v) { return MapStringToIInterface::setValue(k, (BASE *)v); } }; //=========================================================================== template class CopyMapXToIInterface : public MapBetween > { }; template class CopyMapXToMyClass : public CopyMapXToIInterface { public: CopyMapXToMyClass() : CopyMapXToIInterface() {}; static inline C * mapToValue(IMapping * _map) { MappingBetween * map = (MappingBetween *)_map; IInterface ** x = &map->getValue(); return x ? (C *) *x : NULL; } C *getValue(KEYINIT k) const { IInterface **x = CopyMapXToIInterface::getValue(k); return x ? (C *) *x : NULL; } }; template class MappingXToIInterface : public MappingBetween { typedef MappingXToIInterface SELF; public: MappingXToIInterface(KEYINIT k, IInterfacePtr a) : MappingBetween(k,a) { ::Link(a); } ~MappingXToIInterface() { ::Release(SELF::val); } }; template class MapXToIInterface : public MapBetween > { public: using ELEMENT = MappingXToIInterface; }; template class MapXToMyClass : public MapXToIInterface { public: MapXToMyClass() : MapXToIInterface() {}; static inline C * mapToValue(IMapping * _map) { MappingXToIInterface * map = (MappingXToIInterface *)_map; IInterface ** x = &map->getValue(); return x ? (C *) *x : NULL; } C *getValue(KEYINIT k) const { IInterface **x = MapXToIInterface::getValue(k); return x ? (C *) *x : NULL; } }; template class MapXToMyClassViaBase : public MapXToIInterface { public: MapXToMyClassViaBase () : MapXToIInterface() {}; static inline C * mapToValue(IMapping * _map) { MappingXToIInterface * map = (MappingXToIInterface *)_map; IInterface ** x = &map->getValue(); return x ? (C *)(BASE *)*x : NULL; } C *getValue(KEYINIT k) const { IInterface **x = MapXToIInterface::getValue(k); return x ? (C *)(BASE *) *x : NULL; } bool setValue(KEYINIT k, BASE * v) { return MapXToIInterface::setValue(k, v); } }; //=========================================================================== template class MappingOwnedToOwned : public MappingBetween, KEY *, Linked,VALUE *> { public: MappingOwnedToOwned(KEY * k, VALUE * a) : MappingBetween, KEY *, Linked,VALUE *>(k, a) {} }; template class MapOwnedToOwned : public MapBetween, KEY * , Linked,VALUE *,MappingOwnedToOwned > { public: inline MapOwnedToOwned() {} inline MapOwnedToOwned(unsigned initsize) : MapBetween, KEY * , Linked,VALUE *,MappingOwnedToOwned >(initsize) {} }; /* The hash tables can be used from the interfaces above. However there are some helper classes/macros in jhash.ipp to make them easier to use: Element classes: HashMapping - An implementation of IMapping that works in keep and non kept hash tables. Atom - An implementation of IAtom MappingKey - A class for storing a hash key - used by above. There are also some classes to make creating hash tables easier: (They are actually implemented as macros to stop the compiler choking on the template classes). a) To create a hash table of a given class use: typedef HashTableOf MapHashTable; Using a HashTableOf also adds the following method to the hash table class MAPPING-TYPE * CreateLink(const void * key); which creates entries in the hash table. b) To create a hash table that maps a string to another type use: typedef MapStringTo MyMappingName; where TO-TYPE - what is stored in the mapping TO-INIT - what you pass to the value to initialise it. e.g. typedef MapStringTo MapStrToStr; c) To create a hash table that maps a non-string to another type use: typedef MapBetween myMap; where MAP-... as above and KEY-TYPE - what is stored as the key KEY-INIT - what you pass to the key to initialise it. e.g., typedef MapBetween MapIntToStr; *** HashTable Key sizes *** The HashTable key size can have one of the following forms: +n - The key is n bytes of data 0 - The key is a null terminated string -n - The key is n bytes of data followed by a null terminated string. */ // Create an atom in the global atom table extern jlib_decl IAtom * createAtom(const char *value); extern jlib_decl IAtom * createAtom(const char *value, size32_t len); inline IAtom * createLowerCaseAtom(const char *value) { return createAtom(value); } inline IAtom * createLowerCaseAtom(const char *value, size32_t len) { return createAtom(value, len); } extern jlib_decl IIdAtom * createIdAtom(const char *value); extern jlib_decl IIdAtom * createIdAtom(const char *value, size32_t len); extern jlib_decl void releaseAtoms(); extern jlib_decl unsigned hashc( const unsigned char *k, unsigned length, unsigned initval); extern jlib_decl unsigned hashnc( const unsigned char *k, unsigned length, unsigned initval); extern jlib_decl unsigned hashvalue( unsigned value, unsigned initval); extern jlib_decl unsigned hashvalue( unsigned __int64 value, unsigned initval); extern jlib_decl unsigned hashvalue( const void * value, unsigned initval); //================================================ // Minimal Hash table template - slightly less overhead that HashTable/SuperHashTable template class CMinHashTable { protected: unsigned htn; unsigned n; C **table; void expand(bool expand=true) { C **t = table+htn; // more interesting going backwards if (expand) htn = htn*2+1; else htn /= 2; C **newtable = (C **)calloc(sizeof(C *),htn); while (t--!=table) { C *c = *t; if (c) { unsigned h = c->hash%htn; while (newtable[h]) { h++; if (h==htn) h = 0; } newtable[h] = c; } } free(table); table = newtable; } public: CMinHashTable(unsigned _initialSize = 7) { htn = _initialSize; n = 0; table = (C **)calloc(sizeof(C *),htn); } ~CMinHashTable() { C **t = table+htn; while (t--!=table) if (*t) C::destroy(*t); free(table); } void add(C *c) { if (n*4>htn*3) expand(); unsigned i = c->hash%htn; while (table[i]) if (++i==htn) i = 0; table[i] = c; n++; } C *findh(const char *key,unsigned h) { unsigned i=h%htn; while (table[i]) { if ((table[i]->hash==h)&&table[i]->eq(key)) return table[i]; if (++i==htn) i = 0; } return NULL; } C *find(const char *key,bool add) { unsigned h = C::getHash(key); unsigned i=h%htn; while (table[i]) { if ((table[i]->hash==h)&&table[i]->eq(key)) return table[i]; if (++i==htn) i = 0; } if (!add) return NULL; C *c = C::create(key); table[i] = c; n++; if (n*4>htn*3) expand(); return c; } unsigned findIndex(const char *key, unsigned h) { unsigned i=h%htn; while (table[i]) { if ((table[i]->hash==h)&&table[i]->eq(key)) return i; if (++i==htn) i = 0; } return (unsigned) -1; } C *getIndex(unsigned v) const { assert(v != (unsigned)-1 && v < htn); return table[v]; } void remove(C *c) { unsigned i = c->hash%htn; C::destroy(c); while (table[i]!=c) { if (table[i]==NULL) { return; } if (++i==htn) i = 0; } unsigned j = i+1; for (;;) { if (j==htn) j = 0; C *cn = table[j]; if (cn==NULL) break; unsigned k = cn->hash%htn; if (j>i) { if ((k<=i)||(k>j)) { table[i] = cn; i = j; } } else if ((k>j)&&(k<=i)) { table[i] = cn; i = j; } j++; } table[i] = NULL; n--; if ((n>1024)&&(n> myCache(10); * Owned match; * verifyex( !myCache.get("tree1", match) ); * myCache.add("tree1", createPTree("tree1")); * verifyex( nullptr != myCache.query("tree1") ); * MilliSleep(11000); // sleep until "tree1" has expired * verifyex( nullptr == myCache.query("tree1") ); * * myCache.ensure("tree2", [](std::string key) { return createPTree(key.c_str()); }); * * Keywords: cache,time,limit,hashtable,hash */ template class jlib_decl CTimeLimitedCache { public: CTimeLimitedCache(unsigned timeoutMs=defaultCacheTimeoutMs) { timeoutPeriodCycles = ((cycle_t)timeoutMs) * queryOneSecCycles() / 1000; } VALUETYPE *query(KEYTYPE key, bool touch=false) { CacheElement *match = getMatch(key, touch); if (!match) return nullptr; return &match->second; } bool get(KEYTYPE key, VALUETYPE &result, bool touch=false) { VALUETYPE *res = query(key, touch); if (!res) return false; result = *res; return true; } VALUETYPE &add(KEYTYPE key, VALUETYPE val) { auto &ref = ht[key]; ref = std::make_pair(get_cycles_now(), val); return ref.second; } VALUETYPE &ensure(KEYTYPE key, std::function func) { VALUETYPE *res = query(key); if (res) return *res; return add(key, func(key)); } void kill() { // NB: std::unordered_map clear() does not free the map memory (or call dtors) until it is out of scope std::unordered_map empty; empty.swap(ht); } private: static constexpr unsigned defaultCacheTimeoutMs = 30000; typedef std::pair CacheElement; cycle_t timeoutPeriodCycles = 0; std::unordered_map ht; CacheElement *getMatch(KEYTYPE key, bool touch) { auto it = ht.find(key); if (it == ht.end()) return nullptr; cycle_t nowCycles = get_cycles_now(); if ((nowCycles - it->second.first) > timeoutPeriodCycles) // NB: rollover is okay { ht.erase(it); return nullptr; } if (touch) it->second.first = nowCycles; return &it->second; } }; #endif