Pārlūkot izejas kodu

HPCC-9790 Use a hash table to quickly resolve expressions

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 12 gadi atpakaļ
vecāks
revīzija
0126fd9508
3 mainītis faili ar 99 papildinājumiem un 97 dzēšanām
  1. 45 56
      ecl/hqlcpp/hqlstmt.cpp
  2. 17 13
      ecl/hqlcpp/hqlstmt.hpp
  3. 37 28
      ecl/hqlcpp/hqlstmt.ipp

+ 45 - 56
ecl/hqlcpp/hqlstmt.cpp

@@ -588,29 +588,6 @@ bool BuildCtx::hasAssociation(HqlExprAssociation & search, bool unconditional)
 }
 
 
-void BuildCtx::walkAssociations(IAssociationVisitor & visitor)
-{
-    HqlStmts * searchStmts = curStmts;
-
-    // walk all associations in the tree before this one
-    loop
-    {
-        CIArrayOf<HqlExprAssociation> & defs = searchStmts->defs;
-        ForEachItemInRev(idx, defs)
-        {
-            HqlExprAssociation & cur = defs.item(idx);
-            if (visitor.visit(cur))
-                return;
-        }
-
-        HqlStmt * limitStmt = searchStmts->queryStmt();
-        if (!limitStmt)
-            break;
-        searchStmts = limitStmt->queryContainer();
-    }
-}
-
-
 HqlExprAssociation * BuildCtx::queryAssociation(IHqlExpression * search, AssocKind kind, HqlExprCopyArray * selectors)
 {
     HqlStmts * searchStmts = curStmts;
@@ -618,40 +595,28 @@ HqlExprAssociation * BuildCtx::queryAssociation(IHqlExpression * search, AssocKi
     if (!search)
         return NULL;
     search = search->queryBody();
-    unsigned searchMask = (1 << kind);
+    unsigned searchMask = kind;
     if (selectors)
-        searchMask |= (1 << AssocCursor);
+        searchMask |= AssocCursor;
 
     // search all statements in the tree before this one, to see
     // if an expression already exists...  If so return the target
     // of the assignment.
     loop
     {
-
-        if (searchStmts->associationMask & searchMask)
+        unsigned stmtMask = searchStmts->associationMask;
+        if (stmtMask & searchMask)
         {
-            const CIArrayOf<HqlExprAssociation> & defs = searchStmts->defs;
-    #ifdef CACHE_DEFINITION_HASHES
-            if (!selectors)
+            //Safe to use the hash iterator if no selectors, or this definition list contains no cursors
+            if ((kind == AssocExpr) && (!selectors || !(stmtMask & AssocCursor)))
             {
-                unsigned searchHash = getSearchHash(search);
-                const DefinitionHashArray & hashes = searchStmts->exprHashes;
-                ForEachItemInRev(idx, hashes)
-                {
-                    if (hashes.item(idx) == searchHash)
-                    {
-                        HqlExprAssociation & cur = defs.item(idx);
-                        if (cur.represents == search)
-                        {
-                            if (cur.getKind() == kind)
-                                return &cur;
-                        }
-                    }
-                }
+                HqlExprAssociation * match = searchStmts->exprAssociationCache.find(*search);
+                if (match)
+                    return match;
             }
             else
-    #endif
             {
+                const CIArrayOf<HqlExprAssociation> & defs = searchStmts->defs;
                 ForEachItemInRev(idx, defs)
                 {
                     HqlExprAssociation & cur = defs.item(idx);
@@ -700,7 +665,7 @@ void BuildCtx::removeAssociation(HqlExprAssociation * search)
 HqlExprAssociation * BuildCtx::queryFirstAssociation(AssocKind searchKind)
 {
     HqlStmts * searchStmts = curStmts;
-    unsigned searchMask = (1 << searchKind);
+    unsigned searchMask = searchKind;
 
     // search all statements in the tree before this one, to see
     // if an expression already exists...  If so return the target
@@ -731,7 +696,7 @@ HqlExprAssociation * BuildCtx::queryFirstAssociation(AssocKind searchKind)
 HqlExprAssociation * BuildCtx::queryFirstCommonAssociation(AssocKind searchKind)
 {
     HqlStmts * searchStmts = curStmts;
-    unsigned searchMask = (1 << searchKind) | (1 << AssocCursor);
+    unsigned searchMask = searchKind|AssocCursor;
 
     // search all statements in the tree before this one, to see
     // if an expression already exists...  If so return the target
@@ -961,7 +926,7 @@ IHqlStmt * BuildCtx::selectBestContext(IHqlExpression * expr)
 
 //---------------------------------------------------------------------------
 
-HqlStmts::HqlStmts(HqlStmt * _owner)    : owner(_owner) 
+HqlStmts::HqlStmts(HqlStmt * _owner) : owner(_owner)
 {
     associationMask = 0;
 }
@@ -969,22 +934,20 @@ HqlStmts::HqlStmts(HqlStmt * _owner)    : owner(_owner)
 void HqlStmts::appendOwn(HqlExprAssociation & next)
 {
     defs.append(next);
-#ifdef CACHE_DEFINITION_HASHES
-    exprHashes.append(getSearchHash(next.represents));
-#endif
-    associationMask |= (1 << next.getKind());
+    associationMask |= next.getKind();
+    if (next.getKind() == AssocExpr)
+        exprAssociationCache.replace(next);
 }
 
 void HqlStmts::inheritDefinitions(HqlStmts & other)
 {
+    associationMask |= other.associationMask;
     ForEachItemIn(i, other.defs)
     {
         HqlExprAssociation & cur = other.defs.item(i);
         defs.append(OLINK(cur));
-        associationMask |= (1 << cur.getKind());
-#ifdef CACHE_DEFINITION_HASHES
-        exprHashes.append(other.exprHashes.item(i));
-#endif
+        if (cur.getKind() == AssocExpr)
+            exprAssociationCache.replace(cur);
     }
 
 }
@@ -1031,6 +994,32 @@ void HqlStmts::appendStmt(HqlStmt & stmt)
     }
 }
 
+bool HqlStmts::zap(HqlExprAssociation & next)
+{
+    unsigned match = defs.find(next);
+    if (match == NotFound)
+        return false;
+
+    //MORE: Try and avoid this if we can - we should probably use a different kind for items that are removed
+    if (next.getKind() == AssocExpr)
+    {
+        exprAssociationCache.removeExact(&next);
+        IHqlExpression * search = next.represents;
+        for (unsigned i=match; i-- != 0; )
+        {
+            HqlExprAssociation & cur = defs.item(i);
+            if ((cur.getKind() == AssocExpr) && (cur.represents == search))
+            {
+                exprAssociationCache.add(cur);
+                break;
+            }
+        }
+    }
+
+    defs.remove(match);
+    return true;
+}
+
 
 //---------------------------------------------------------------------------
 

+ 17 - 13
ecl/hqlcpp/hqlstmt.hpp

@@ -39,16 +39,21 @@ interface IFunctionInfo;
 // used to represent an association of something already calculated in the context
 // e.g. a cursor, temporary value, or even dependency information.
 
-enum AssocKind { AssocExpr, AssocActivity, AssocCursor, AssocClass,
-                 AssocRow,
-                 AssocActivityInstance,
-                 AssocExtract,
-                 AssocExtractContext,
-                 AssocSubQuery,
-                 AssocGraphNode,
-                 AssocSubGraph,
-                 AssocStmt,
-                 AssocMax,
+//These are represented using unique bits, so that a mask can be stored to indicate which associations are held
+enum AssocKind
+{
+    AssocExpr              = 0x0000001,
+    AssocActivity          = 0x0000002,
+    AssocCursor            = 0x0000004,
+    AssocClass             = 0x0000008,
+    AssocRow               = 0x0000010,
+    AssocActivityInstance  = 0x0000020,
+    AssocExtract           = 0x0000040,
+    AssocExtractContext    = 0x0000080,
+    AssocSubQuery          = 0x0000100,
+    AssocGraphNode         = 0x0000200,
+    AssocSubGraph          = 0x0000400,
+    AssocStmt              = 0x0000800,
  };
 
 class CHqlBoundExpr;
@@ -142,7 +147,6 @@ public:
 
     void                        set(IAtom * section);
     void                        set(BuildCtx & _owner);
-    void                        walkAssociations(IAssociationVisitor & visitor);
 
 public:
     enum { ConPrio = 1, EarlyPrio =3000, NormalPrio = 5000, LatePrio = 7000, DesPrio = 9999, OutermostScopePrio };
@@ -238,7 +242,7 @@ public:
     FilteredAssociationIterator(BuildCtx & ctx, AssocKind _searchKind) : AssociationIterator(ctx)
     {
         searchKind = _searchKind;
-        searchMask = (1 << searchKind);
+        searchMask = searchKind;
     }
 
     virtual bool doNext();
@@ -255,7 +259,7 @@ class RowAssociationIterator : public AssociationIterator
 public:
     RowAssociationIterator(BuildCtx & ctx) : AssociationIterator(ctx)
     {
-        searchMask = (1 << AssocRow) | (1 << AssocCursor);
+        searchMask = AssocRow|AssocCursor;
     }
 
     virtual bool doNext();

+ 37 - 28
ecl/hqlcpp/hqlstmt.ipp

@@ -19,19 +19,6 @@
 
 #include "hqlstmt.hpp"
 
-//Sometimes the number of definitions can get very very large.  
-//To help with that situation the following option creates an array of "hashes" of the definitions
-//It uses more memory, but has much better cache locality than accessing a member of the definition
-
-#define CACHE_DEFINITION_HASHES
-inline unsigned getSearchHash(IHqlExpression * search)
-{
-    unsigned hash = search->getHash();
-    return (unsigned short)((hash >> 16) | hash);
-}
-//typedef UnsignedArray DefinitionHashArray;
-typedef UnsignedShortArray DefinitionHashArray;
-
 //---------------------------------------------------------------------------
 
 class HqlStmts;
@@ -82,6 +69,41 @@ protected:
 
 typedef IArrayOf<HqlStmt> HqlStmtArray;
 
+//The elements are kept in the hash table, but not linked - they are owned by an associated array.
+class HQLCPP_API AssociationCache : public SuperHashTableOf<HqlExprAssociation, IHqlExpression>
+{
+public:
+    AssociationCache() {}
+    ~AssociationCache() { releaseAll(); }
+
+    IMPLEMENT_SUPERHASHTABLEOF_REF_FIND(HqlExprAssociation, IHqlExpression);
+
+    virtual void onAdd(void *et) { }
+    virtual void onRemove(void *et) { }
+    virtual unsigned getHashFromElement(const void *et) const
+    {
+        const HqlExprAssociation * elem = reinterpret_cast<const HqlExprAssociation *>(et);
+        return elem->represents->getHash();
+    }
+    virtual unsigned getHashFromFindParam(const void *fp) const
+    {
+        const IHqlExpression * expr = (const IHqlExpression *)fp;
+        return expr->getHash();
+    }
+    virtual const void *getFindParam(const void *et) const
+    {
+        const HqlExprAssociation * elem = reinterpret_cast<const HqlExprAssociation *>(et);
+        return elem->represents;
+    }
+    virtual bool matchesFindParam(const void *et, const void *fp, unsigned) const
+    {
+        const IHqlExpression * expr = (const IHqlExpression *)fp;
+        const HqlExprAssociation * elem = reinterpret_cast<const HqlExprAssociation *>(et);
+        return expr == elem->represents;
+    }
+};
+
+
 class HQLCPP_API HqlStmts : public IArrayOf<HqlStmt>
 {
     friend class BuildCtx;
@@ -95,27 +117,14 @@ public:
     HqlStmt *                       queryStmt() { return owner; };
 
     void appendOwn(HqlExprAssociation & next);
-
-    inline bool zap(HqlExprAssociation & next)
-    {
-        unsigned match = defs.find(next);
-        if (match == NotFound)
-            return false;
-        defs.remove(match);
-#ifdef CACHE_DEFINITION_HASHES
-        exprHashes.remove(match);
-#endif
-        return true;
-    }
+    bool zap(HqlExprAssociation & next);
 
 protected:
     HqlStmt *                       owner;
     CIArrayOf<HqlExprAssociation>   defs;
     // A bit mask of which types of associations this contains.  Don't worry about false positives.
     unsigned                        associationMask;
-#ifdef CACHE_DEFINITION_HASHES
-    DefinitionHashArray             exprHashes;
-#endif
+    AssociationCache                exprAssociationCache;
 };