Преглед изворни кода

HPCC-18725 Optimize dafilesrv size() and getInfo() operations

size() and getInfo() are very expensive if the files they
operate on are inside directories with lots of files.
This is because both operations use the remote command getDir
to retrieve 1 entry and despite it only serializing 1 item back
it is very expensive on the server side at the OS level and in
dafilesrv to scan large directories.

Change to spot directory request involving trivial case (as used
by size() and getInfo()) where there is no recursion and no
wildcards. In these cases a simple direct single file request
can be made and the same information can be serialized back.

Signed-off-by: Jake Smith <jake.smith@lexisnexisrisk.com>
Jake Smith пре 7 година
родитељ
комит
a7103b6b4a
1 измењених фајлова са 62 додато и 26 уклоњено
  1. 62 26
      common/remote/sockfile.cpp

+ 62 - 26
common/remote/sockfile.cpp

@@ -2003,7 +2003,7 @@ public:
             return createDirectoryIterator("",""); // NULL iterator
             return createDirectoryIterator("",""); // NULL iterator
 
 
         CRemoteDirectoryIterator *ret = new CRemoteDirectoryIterator(ep, filename);
         CRemoteDirectoryIterator *ret = new CRemoteDirectoryIterator(ep, filename);
-        byte stream=1;
+        byte stream = (sub || !mask || containsFileWildcard(mask)) ? 1 : 0; // no point in streaming if mask without wildcards or sub, as will only be <= 1 match.
 
 
         Owned<CEndpointCS> crit = dirCSTable->getCrit(ep); // NB dirCSTable doesn't own, last reference will remove from table
         Owned<CEndpointCS> crit = dirCSTable->getCrit(ep); // NB dirCSTable doesn't own, last reference will remove from table
         CriticalBlock block(*crit);
         CriticalBlock block(*crit);
@@ -2015,7 +2015,7 @@ public:
             sendRemoteCommand(sendBuffer, replyBuffer);
             sendRemoteCommand(sendBuffer, replyBuffer);
             if (ret->appendBuf(replyBuffer))
             if (ret->appendBuf(replyBuffer))
                 break;
                 break;
-            stream = 2;
+            stream = 2; // NB: will never get here if streaming was (if stream==0 above)
         }
         }
         return ret;
         return ret;
     }
     }
@@ -5282,36 +5282,72 @@ public:
         bool sub;
         bool sub;
         byte stream = 0;
         byte stream = 0;
         msg.read(name).read(mask).read(includedir).read(sub);
         msg.read(name).read(mask).read(includedir).read(sub);
-        if (msg.remaining()>=sizeof(byte)) {
+        if (msg.remaining()>=sizeof(byte))
+        {
             msg.read(stream);
             msg.read(stream);
             if (stream==1)
             if (stream==1)
                 client.opendir.clear();
                 client.opendir.clear();
         }
         }
-
         if (TF_TRACE)
         if (TF_TRACE)
-            PROGLOG("GetDir,  '%s', '%s'",name.get(),mask.get());
-        Owned<IFile> dir=createIFile(name);
-
-        Owned<IDirectoryIterator> iter;
-        if (stream>1)
-            iter.set(client.opendir);
-        else {
-            iter.setown(dir->directoryFiles(mask.length()?mask.get():NULL,sub,includedir));
-            if (stream != 0)
-                client.opendir.set(iter);
-        }
-        if (!iter) {
-            reply.append((unsigned)RFSERR_GetDirFailed);
-            return false;
-        }
-        reply.append((unsigned)RFEnoerror);
-        if (CRemoteDirectoryIterator::serialize(reply,iter,stream?0x100000:0,stream<2)) {
-            if (stream != 0)
-                client.opendir.clear();
+            PROGLOG("GetDir,  '%s', '%s', stream='%u'",name.get(),mask.get(),stream);
+        if (!stream && !containsFileWildcard(mask))
+        {
+            // if no streaming, and mask contains no wildcard, it is much more efficient to get the info without a directory iterator!
+            StringBuffer fullFilename(name);
+            addPathSepChar(fullFilename).append(mask);
+            Owned<IFile> iFile = createIFile(fullFilename);
+            if (!iFile->exists())
+            {
+                reply.append((unsigned)RFSERR_GetDirFailed);
+                return false;
+            }
+            else
+            {
+                reply.append((unsigned)RFEnoerror);
+                // NB: This must preserve same serialization format as CRemoteDirectoryIterator::serialize produces for 1 file.
+                byte b=1;
+                reply.append(b);
+                bool isDir = foundYes == iFile->isDirectory();
+                reply.append(isDir);
+                reply.append(isDir ? 0 : iFile->size());
+                CDateTime dt;
+                iFile->getTime(nullptr, &dt, nullptr);
+                dt.serialize(reply);
+                reply.append(iFile->queryFilename());
+                b = 0;
+                reply.append(b);
+                return true;
+            }
         }
         }
-        else {
-            bool cont=true;
-            reply.append(cont);
+        else
+        {
+            Owned<IFile> dir=createIFile(name);
+
+            Owned<IDirectoryIterator> iter;
+            if (stream>1)
+                iter.set(client.opendir);
+            else
+            {
+                iter.setown(dir->directoryFiles(mask.length()?mask.get():NULL,sub,includedir));
+                if (stream != 0)
+                    client.opendir.set(iter);
+            }
+            if (!iter)
+            {
+                reply.append((unsigned)RFSERR_GetDirFailed);
+                return false;
+            }
+            reply.append((unsigned)RFEnoerror);
+            if (CRemoteDirectoryIterator::serialize(reply,iter,stream?0x100000:0,stream<2))
+            {
+                if (stream != 0)
+                    client.opendir.clear();
+            }
+            else
+            {
+                bool cont=true;
+                reply.append(cont);
+            }
         }
         }
         return true;
         return true;
     }
     }