123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509 |
- /*##############################################################################
- HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ############################################################################## */
- #include "platform.h"
- #include <limits.h>
- #include <ctype.h>
- #include <stdio.h>
- #include "jhash.hpp"
- #include "jmutex.hpp"
- #define PSTRINGDATA INT_MIN
- #define mix(a,b,c) \
- { \
- a -= b; a -= c; a ^= (c>>13); \
- b -= c; b -= a; b ^= (a<<8); \
- c -= a; c -= b; c ^= (b>>13); \
- a -= b; a -= c; a ^= (c>>12); \
- b -= c; b -= a; b ^= (a<<16); \
- c -= a; c -= b; c ^= (b>>5); \
- a -= b; a -= c; a ^= (c>>3); \
- b -= c; b -= a; b ^= (a<<10); \
- c -= a; c -= b; c ^= (b>>15); \
- }
- #define GETBYTE0(n) ((unsigned)k[n])
- #define GETBYTE1(n) ((unsigned)k[n+1]<<8)
- #define GETBYTE2(n) ((unsigned)k[n+2]<<16)
- #define GETBYTE3(n) ((unsigned)k[n+3]<<24)
- #define GETWORD(k,n) (GETBYTE0(n)+GETBYTE1(n)+GETBYTE2(n)+GETBYTE3(n))
- // the above looks inefficient but the compiler optimizes well
- // this hash looks slow but is about twice as quick as using our CRC table
- // and gives gives better results
- // (see paper at http://burtleburtle.net/bob/hash/evahash.html for more info)
- // Global atom table
- MODULE_INIT(INIT_PRIORITY_JHASH)
- {
- return true;
- }
- MODULE_EXIT()
- {
- // do not delete globalAtomTable as it will slow things down. If you
- // do not want these in your leak checking call releaseAtoms()
- }
- #define HASHONE(hash, c) { hash *= 0x01000193; hash ^= c; } // Fowler/Noll/Vo Hash... seems to work pretty well, and fast
- unsigned hashc( const unsigned char *k, unsigned length, unsigned initval)
- {
- unsigned hash = initval; // ^_rotl(length,2)??
- unsigned char c;
- while (length >= 8)
- {
- c = (*k++); HASHONE(hash, c);
- c = (*k++); HASHONE(hash, c);
- c = (*k++); HASHONE(hash, c);
- c = (*k++); HASHONE(hash, c);
- length-=4;
- }
- switch (length)
- {
- case 7: c = (*k++); HASHONE(hash, c); // fallthrough
- case 6: c = (*k++); HASHONE(hash, c); // fallthrough
- case 5: c = (*k++); HASHONE(hash, c); // fallthrough
- case 4: c = (*k++); HASHONE(hash, c); // fallthrough
- case 3: c = (*k++); HASHONE(hash, c); // fallthrough
- case 2: c = (*k++); HASHONE(hash, c); // fallthrough
- case 1: c = (*k++); HASHONE(hash, c);
- }
- return hash;
- }
- template <typename T>
- inline unsigned doHashValue( T value, unsigned initval)
- {
- //The values returned from this function are only consistent with those from hashn() if running on little endian architecture
- unsigned hash = initval;
- unsigned char c;
- for (unsigned i=0; i < sizeof(T); i++)
- {
- c = (byte)value;
- value >>= 8;
- HASHONE(hash, c);
- }
- return hash;
- }
- unsigned hashvalue( unsigned value, unsigned initval)
- {
- return doHashValue(value, initval);
- }
- unsigned hashvalue( unsigned __int64 value, unsigned initval)
- {
- return doHashValue(value, initval);
- }
- unsigned hashvalue( const void * value, unsigned initval)
- {
- return doHashValue((memsize_t)value, initval);
- }
- #define GETWORDNC(k,n) ((GETBYTE0(n)+GETBYTE1(n)+GETBYTE2(n)+GETBYTE3(n))&0xdfdfdfdf)
- unsigned hashnc( const unsigned char *k, unsigned length, unsigned initval)
- {
- unsigned hash = initval;
- unsigned char c;
- while (length >= 8)
- {
- c = (*k++)&0xdf; HASHONE(hash, c);
- c = (*k++)&0xdf; HASHONE(hash, c);
- c = (*k++)&0xdf; HASHONE(hash, c);
- c = (*k++)&0xdf; HASHONE(hash, c);
- length-=4;
- }
- switch (length)
- {
- case 7: c = (*k++)&0xdf; HASHONE(hash, c); // fallthrough
- case 6: c = (*k++)&0xdf; HASHONE(hash, c); // fallthrough
- case 5: c = (*k++)&0xdf; HASHONE(hash, c); // fallthrough
- case 4: c = (*k++)&0xdf; HASHONE(hash, c); // fallthrough
- case 3: c = (*k++)&0xdf; HASHONE(hash, c); // fallthrough
- case 2: c = (*k++)&0xdf; HASHONE(hash, c); // fallthrough
- case 1: c = (*k++)&0xdf; HASHONE(hash, c);
- }
- return hash;
- }
- MappingKey::MappingKey(const void * inKey, int keysize)
- {
- int ksm = keysize;
- if (!ksm)
- ksm = (size32_t)strlen((char *) inKey) + 1;
- else if (ksm<0)
- {
- if (ksm==PSTRINGDATA) {
- ksm = (*((unsigned char *)inKey))+1;
- }
- else {
- ksm = -ksm;
- ksm += (size32_t)strlen((char *) inKey + ksm) + 1;
- }
- }
- void * temp = malloc(ksm);
- memcpy(temp, inKey, ksm);
- key = temp;
- }
- //-- Mapping ---------------------------------------------------
- unsigned MappingBase::getHash() const { return hash; }
- void MappingBase::setHash(unsigned hval){ hash = hval; }
- const void * Mapping::getKey() const { return key.key; }
- //-- HashMapping ---------------------------------------------------
- //const void * HashMapping::getKey() const { return key.key; }
- //-- Atom ---------------------------------------------------
- unsigned AtomBase::getHash() const { return hash; }
- void AtomBase::setHash(unsigned hval) { hash = hval; }
- const void * AtomBase::getKey() const { return key; }
- //-- Case Atom ---------------------------------------------------
- CaseAtom::CaseAtom(const void * k) : hash(0)
- {
- text = strdup((const char *)k);
- lower = createLowerCaseAtom(text);
- }
- //-- HashTable ---------------------------------------------------
- bool HashTable::addOwn(IMapping & donor)
- {
- if(add(donor))
- {
- donor.Release();
- return true;
- }
- return false;
- }
- bool HashTable::replaceOwn(IMapping & donor)
- {
- if(replace(donor))
- {
- donor.Release();
- return true;
- }
- return false;
- }
- IMapping * HashTable::findLink(const void * findParam) const
- {
- IMapping * found = SuperHashTableOf<IMapping, const void>::find(findParam);
- if(found) found->Link();
- return found;
- }
- bool HashTable::onNotify(INotification & notify)
- {
- bool ret = true;
- if (notify.getAction() == NotifyOnDispose)
- {
- IMapping * mapping = static_cast<IMapping *>(notify.querySource());
- ret = removeExact(mapping);
- assertex(ret);
- }
- return ret;
- }
- bool HashTable::keyeq(const void *key1, const void *key2, int ksize) const
- {
- if (ksize<=0)
- {
- if (ksize<0)
- {
- if (ksize==PSTRINGDATA)
- return (memcmp(key1,key2,(*(unsigned char*)key1)) == 0);
- unsigned ksm = -ksize;
- if (memcmp(key1,key2,ksm))
- return false;
- key1 = (char *)key1 + ksm;
- key2 = (char *)key2 + ksm;
- }
- if (ignorecase)
- {
- unsigned char *k1 = (unsigned char *) key1;
- unsigned char *k2 = (unsigned char *) key2;
- for (;;)
- {
- unsigned char c1 = toupper(*k1);
- if (c1 != toupper(*k2))
- return false;
- if (c1 == 0)
- return true;
- k1++;
- k2++;
- }
- }
- return strcmp((char *)key1, (char *)key2) == 0;
- }
- return memcmp(key1,key2,ksize)==0;
- }
- unsigned HashTable::hash(const void *key, int ksize) const
- {
- unsigned h = 0x811C9DC5;
- unsigned char *bp = (unsigned char *) key;
- if (ksize<=0)
- {
- if (ksize==PSTRINGDATA) {
- ksize = (*(unsigned char*)key)+1;
- goto BlockHash;
- }
- if (ksize<0) {
- h = hashc(bp,-ksize,h);
- bp -= ksize;
- }
- unsigned char* ks = bp;
- while (*bp) bp++;
- if (ignorecase)
- h = hashnc(ks,(size32_t)(bp-ks),h);
- else
- h = hashc(ks,(size32_t)(bp-ks),h);
- }
- else
- {
- BlockHash:
- if (ignorecase)
- h = hashnc(bp,ksize,h);
- else
- h = hashc(bp,ksize,h);
- }
- //Strings that only differ by a single character don't generate hashes very far apart without this
- h *= 0x01000193;
- //h = ((h << 7) ^ (h >> 25));
- return h;
- }
- IMapping * HashTable::createLink(const void *key)
- {
- IMapping * match = findLink(key);
- if (!match)
- {
- match = newMapping(key);
- if (match) add(*match); // link for hash table
- }
- return match;
- }
- IMapping *HashTable::newMapping(const void *)
- {
- assertex(!"Newmapping must be overridden to use HashTable::Create");
- return NULL;
- }
- // -- Helper functions...
- void IntersectHash(HashTable & h1, HashTable & h2)
- {
- HashIterator iter(h1);
- iter.first();
- while (iter.isValid())
- {
- IMapping & cur = iter.query();
- IMapping * matched = h2.find(cur.getKey());
- iter.next();
- if (!matched)
- h1.removeExact(&cur);
- }
- }
- void UnionHash(HashTable & h1, HashTable & h2)
- {
- HashIterator iter(h2);
- iter.first();
- while (iter.isValid())
- {
- IMapping & cur = iter.query();
- IMapping * matched = h1.find(cur.getKey());
- if (!matched)
- h1.add(cur);
- iter.next();
- }
- }
- void SubtractHash(HashTable & main, HashTable & sub)
- {
- HashIterator iter(sub);
- iter.first();
- while (iter.isValid())
- {
- IMapping & cur = iter.query();
- IMapping * matched = main.find(cur.getKey());
- iter.next();
- if (matched)
- main.removeExact(&cur);
- }
- }
- //===========================================================================
- void KeptHashTable::onAdd(void * et)
- {
- static_cast<IMapping *>(et)->Link();
- }
- void KeptHashTable::onRemove(void * et)
- {
- static_cast<IMapping *>(et)->Release();
- }
- IMapping * KeptHashTable::create(const void *key)
- {
- IMapping * match = find(key);
- if (!match)
- {
- match = newMapping(key);
- if (match) addOwn(*match); // already linked for hash table
- }
- return match;
- }
- void ObservedHashTable::onAdd(void * et)
- {
- static_cast<IMapping *>(et)->addObserver(*this);
- }
- void ObservedHashTable::onRemove(void * et)
- {
- static_cast<IMapping *>(et)->removeObserver(*this);
- }
- //===========================================================================
- static CriticalSection atomCrit;
- static KeptLowerCaseAtomTable *globalAtomTable = NULL;
- inline KeptLowerCaseAtomTable * queryGlobalAtomTable()
- {
- if (!globalAtomTable)
- {
- globalAtomTable = new KeptLowerCaseAtomTable;
- globalAtomTable->reinit(2000);
- }
- return globalAtomTable;
- }
- extern jlib_decl IAtom * createAtom(const char *value)
- {
- if (!value) return NULL;
- CriticalBlock crit(atomCrit);
- return queryGlobalAtomTable()->addAtom(value);
- }
- extern jlib_decl IAtom * createAtom(const char *value, size32_t len)
- {
- if (!value || !len)
- return NULL;
- char * nullTerminated = (char *)alloca(len+1);
- memcpy(nullTerminated, value, len);
- nullTerminated[len] = 0;
- CriticalBlock crit(atomCrit);
- return queryGlobalAtomTable()->addAtom(nullTerminated);
- }
- //===========================================================================
- static CriticalSection caseAtomCrit;
- static KeptCaseAtomTable *globalCaseAtomTable = NULL;
- inline KeptCaseAtomTable * queryGlobalCaseAtomTable()
- {
- if (!globalCaseAtomTable)
- {
- globalCaseAtomTable = new KeptCaseAtomTable;
- globalCaseAtomTable->reinit(2000);
- }
- return globalCaseAtomTable;
- }
- extern jlib_decl IIdAtom * createIdAtom(const char *value)
- {
- if (!value) return NULL;
- CriticalBlock crit(caseAtomCrit);
- return queryGlobalCaseAtomTable()->addAtom(value);
- }
- extern jlib_decl IIdAtom * createIdAtom(const char *value, size32_t len)
- {
- if (!value || !len)
- return NULL;
- char * nullTerminated = (char *)alloca(len+1);
- memcpy(nullTerminated, value, len);
- nullTerminated[len] = 0;
- CriticalBlock crit(caseAtomCrit);
- return queryGlobalCaseAtomTable()->addAtom(nullTerminated);
- }
- #ifdef THE_GLOBAL_HASH_TABLE_BECOMES_CASE_SENSITIVE
- extern jlib_decl IAtom * createLowerCaseAtom(const char *value)
- {
- if (!value) return NULL;
- unsigned len = strlen(value);
- const byte * src = (const byte *)value;
- char * lower = (char *)alloca(len+1);
- for (unsigned i=0; i < len; i++)
- lower[i] = tolower(src[i]);
- lower[len] = 0;
- CriticalBlock crit(atomCrit);
- return queryGlobalAtomTable()->addAtom(value);
- }
- extern jlib_decl IAtom * createLowerCaseAtom(const char *value, size32_t len)
- {
- if (!value || !len)
- return NULL;
- const byte * src = (const byte *)value;
- char * lower = (char *)alloca(len+1);
- for (unsigned i=0; i < len; i++)
- lower[i] = tolower(src[i]);
- lower[len] = 0;
- CriticalBlock crit(atomCrit);
- return queryGlobalAtomTable()->addAtom(lower);
- }
- #endif
- extern jlib_decl void releaseAtoms()
- {
- delete globalCaseAtomTable;
- globalCaseAtomTable = NULL;
- delete globalAtomTable;
- globalAtomTable = NULL;
- }
|