Przeglądaj źródła

HPCC-10653 Problems in recreateCloneSource if dali not available

If not connected to local dali, use cache info only.

Exceptions in looking up the remote file no longer fatal, even in leagacy
mode.

Add a longer expiry for group lookup cache when the lookup results in an
exception, together with a control command (control:resetremotedalicache)
to clear the cached information.

Make sure that remote file resolutions are cached for use in lockdali mode.

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 11 lat temu
rodzic
commit
3dfeca030d
4 zmienionych plików z 73 dodań i 45 usunięć
  1. 17 2
      dali/base/dadfs.cpp
  2. 1 0
      dali/base/dadfs.hpp
  3. 51 43
      roxie/ccd/ccddali.cpp
  4. 4 0
      roxie/ccd/ccdstate.cpp

+ 17 - 2
dali/base/dadfs.cpp

@@ -6532,6 +6532,7 @@ public:
 // --------------------------------------------------------
 
 #define GROUP_CACHE_INTERVAL (1000*60)
+#define GROUP_EXCEPTION_CACHE_INTERVAL (1000*60*10)
 
 static GroupType translateGroupType(const char *groupType)
 {
@@ -6553,7 +6554,6 @@ public:
     Linked<IGroup> group;
     StringAttr name;
     StringAttr groupDir;
-    unsigned cachedtime;
     GroupType groupType;
     Linked<IException> exception;
 
@@ -6568,6 +6568,16 @@ public:
     {
         cachedtime = msTick();
     }
+
+    bool expired(unsigned timeNow)
+    {
+        if (exception)
+            return timeNow-cachedtime > GROUP_EXCEPTION_CACHE_INTERVAL;
+        else
+            return timeNow-cachedtime > GROUP_CACHE_INTERVAL;
+    }
+protected:
+    unsigned cachedtime;
 };
 
 class CNamedGroupStore: public CInterface, implements INamedGroupStore
@@ -6648,7 +6658,7 @@ public:
             ForEachItemInRev(idx, cache)
             {
                 CNamedGroupCacheEntry &entry = cache.item(idx);
-                if (timeNow-entry.cachedtime > GROUP_CACHE_INTERVAL)
+                if (entry.expired(timeNow))
                 {
                     cache.remove(idx);
                 }
@@ -6974,6 +6984,11 @@ public:
         return ret;
     }
 
+    void resetCache()
+    {
+        CriticalBlock block(cachesect);
+        cache.kill();
+    }
 private:
     bool getRemoteGroup(const INode *foreigndali, const char *gname, unsigned foreigndalitimeout,
                            StringAttr &groupdir, GroupType &type, SocketEndpointArray &epa)

+ 1 - 0
dali/base/dadfs.hpp

@@ -587,6 +587,7 @@ interface INamedGroupStore: implements IGroupResolver
     virtual IGroup *lookup(const char *logicalgroupname, StringBuffer &dir, GroupType &groupType) = 0;
     virtual unsigned setDefaultTimeout(unsigned timems) = 0;     // sets default timeout for SDS connections and locking
     virtual unsigned setRemoteTimeout(unsigned timems) = 0;      // sets default timeout for remote SDS connections and locking
+    virtual void resetCache() = 0;      // resets any cached lookups
 };
 
 extern da_decl INamedGroupStore  &queryNamedGroupStore();

+ 51 - 43
roxie/ccd/ccddali.cpp

@@ -419,43 +419,44 @@ public:
         // NOTE - we rely on the fact that  queryNamedGroupStore().lookup caches results,to avoid excessive load on remote dali
         if (_lfn && !strnicmp(_lfn, "foreign", 7)) //if need to support dali hopping should add each remote location
             return NULL;
-        if (!fdesc || !fdesc->queryProperties().hasProp("@cloneFrom"))
+        if (!fdesc)
             return NULL;
-        if (fdesc->queryProperties().hasProp("cloneFromGroup") && fdesc->queryProperties().hasProp("@cloneFromDir"))
+        const char *cloneFrom = fdesc->queryProperties().queryProp("@cloneFrom");
+        if (!cloneFrom)
+            return NULL;
+        StringBuffer foreignLfn("foreign::");
+        foreignLfn.append(cloneFrom);
+        if (!connected())
+            return resolveCachedLFN(foreignLfn);  // Note - cache only used when no dali connection available
+        try
         {
-            try
+            if (fdesc->queryProperties().hasProp("cloneFromGroup") && fdesc->queryProperties().hasProp("@cloneFromDir"))
             {
-                return recreateCloneSource(fdesc, _lfn);
+                Owned<IFileDescriptor> ret = recreateCloneSource(fdesc, _lfn);
+                if (cacheIt)
+                    cacheFileDescriptor(foreignLfn, ret);
+                return ret.getClear();
             }
-            catch (IException *E)
+            else // Legacy mode - recently cloned files should have the extra info
             {
-                E->Release();
-                return NULL;
+                if (traceLevel > 1)
+                    DBGLOG("checkClonedFromRemote: Resolving %s in legacy mode", _lfn);
+                Owned<IDistributedFile> cloneFile = resolveLFN(foreignLfn, cacheIt, false);
+                if (cloneFile)
+                {
+                    Owned<IFileDescriptor> cloneFDesc = cloneFile->getFileDescriptor();
+                    if (cloneFDesc->numParts()==fdesc->numParts())
+                        return cloneFDesc.getClear();
+
+                    DBGLOG(ROXIE_MISMATCH, "File %s cloneFrom(%s) mismatch", _lfn, cloneFrom);
+                }
             }
         }
-        else // Legacy mode - recently cloned files should have the extra info
+        catch (IException *E)
         {
-            if (traceLevel > 1)
-                DBGLOG("checkClonedFromRemote: Resolving %s in legacy mode", _lfn);
-            SocketEndpoint cloneFrom;
-            cloneFrom.set(fdesc->queryProperties().queryProp("@cloneFrom"));
-            if (cloneFrom.isNull())
-                return NULL;
-            CDfsLogicalFileName lfn;
-            lfn.set(_lfn);
-            lfn.setForeign(cloneFrom, false);
-            if (!connected())
-                return resolveCachedLFN(lfn.get());
-            Owned<IDistributedFile> cloneFile = resolveLFN(lfn.get(), cacheIt, false);
-            if (cloneFile)
-            {
-                Owned<IFileDescriptor> cloneFDesc = cloneFile->getFileDescriptor();
-                if (cloneFDesc->numParts()==fdesc->numParts())
-                    return cloneFDesc.getClear();
-
-                StringBuffer s;
-                DBGLOG(ROXIE_MISMATCH, "File %s cloneFrom(%s) mismatch", _lfn, cloneFrom.getIpText(s).str());
-            }
+            if (traceLevel > 3)
+                EXCLOG(E);
+            E->Release();  // Any failure means act as if no remote info
         }
         return NULL;
     }
@@ -475,20 +476,7 @@ public:
                     dfsFile.clear();
             }
             if (cacheIt)
-            {
-                Owned<IFileDescriptor> fd;
-                Owned<IPropertyTree> pt;
-                if (dfsFile)
-                {
-                    fd.setown(dfsFile->getFileDescriptor());
-                    if (fd)
-                        pt.setown(fd->getFileTree());
-                }
-                StringBuffer xpath("Files/");
-                StringBuffer lcname;
-                xpath.append(lcname.append(logicalName).toLowerCase());
-                writeCache(xpath.str(), xpath.str(), pt);
-            }
+                cacheDistributedFile(logicalName, dfsFile);
             if (traceLevel > 1)
                 DBGLOG("Dali lookup %s returned %s in %u ms", logicalName, dfsFile != NULL ? "match" : "NO match", msTick()-start);
             return dfsFile.getClear();
@@ -712,7 +700,27 @@ public:
             }
         }
     }
+protected:
+    void cacheDistributedFile(const char *logicalName, IDistributedFile *dfsFile)
+    {
+        assertex(isConnected);
+        Owned<IFileDescriptor> fd;
+        if (dfsFile)
+            fd.setown(dfsFile->getFileDescriptor());
+        cacheFileDescriptor(logicalName, fd);
+    }
 
+    void cacheFileDescriptor(const char *logicalName, IFileDescriptor *fd)
+    {
+        assertex(isConnected);
+        Owned<IPropertyTree> pt;
+        if (fd)
+            pt.setown(fd->getFileTree());
+        StringBuffer xpath("Files/");
+        StringBuffer lcname;
+        xpath.append(lcname.append(logicalName).toLowerCase());
+        writeCache(xpath.str(), xpath.str(), pt);
+    }
 };
 
 class CRoxieDllServer : public CInterface, implements IDllServer

+ 4 - 0
roxie/ccd/ccdstate.cpp

@@ -2143,6 +2143,10 @@ private:
                 else
                     allQueryPackages->resetStats(NULL, logctx);
             }
+            else if (stricmp(queryName, "control:resetremotedalicache")==0)
+            {
+                queryNamedGroupStore().resetCache();
+            }
             else if (stricmp(queryName, "control:restart")==0)
             {
                 FatalError("Roxie process restarted by operator request");