Explorar o código

HPCC-13168 Revert default compression for fixed length to RDiff

In all OSS versions, LZW has been the unintentional default for
compressed disk writes. In the legacy system, if the output was
fixed length, row diff compression was used.
This delivers signiciantly faster compression at the expense of
a lower compression ratio.

This commit reverts the originally intended defaults of RDiff
for fixed length and LZW for variable.

Signed-off-by: Jake Smith <jake.smith@lexisnexis.com>
Jake Smith %!s(int64=10) %!d(string=hai) anos
pai
achega
71de70be17

+ 3 - 1
thorlcr/activities/thactivityutil.cpp

@@ -782,6 +782,8 @@ IFileIO *createMultipleWrite(CActivityBase *activity, IPartDescriptor &partDesc,
     Owned<IFileIO> fileio;
     if (compress)
     {
+        if (activity->getOptBool(THOROPT_COMP_FORCELZW, false))
+            recordSize = 0; // by default if fixed length (recordSize set), row diff compression is used. This forces LZW
         fileio.setown(createCompressedFileWriter(file, recordSize, extend, true, ecomp));
         if (!fileio)
         {
@@ -795,7 +797,7 @@ IFileIO *createMultipleWrite(CActivityBase *activity, IPartDescriptor &partDesc,
         fileio.setown(file->open(extend&&file->exists()?IFOwrite:IFOcreate)); 
     if (!fileio)
         throw MakeActivityException(activity, TE_FileCreationFailed, "Failed to create file for write (%s) error = %d", outLocationName.str(), GetLastError());
-    ActPrintLog(activity, "Writing to file: %s", file->queryFilename());
+    ActPrintLog(activity, "Writing to file: %s, compress=%s, rdiff=%s", file->queryFilename(), compress ? "true" : "false", (compress && recordSize) ? "true" : "false");
     return new CWriteHandler(*activity, partDesc, file, fileio, iProgress, direct, renameToPrimary, aborted);
 }
 

+ 11 - 2
thorlcr/activities/thdiskbaseslave.cpp

@@ -314,7 +314,16 @@ void CDiskWriteSlaveActivityBase::open()
     bool extend = 0 != (diskHelperBase->getFlags() & TDWextend);
     if (extend)
         ActPrintLog("Extending file %s", fName.get());
-    size32_t exclsz = 0;
+
+    size32_t diskRowMinSz = 0;
+    IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta();
+    if (diskRowMeta->isFixedSize())
+    {
+        diskRowMinSz = diskRowMeta->getMinRecordSize();
+        if (grouped)
+            diskRowMinSz += 1;
+    }
+
     calcFileCrc = true;
 
     bool external = dlfn.isExternal();
@@ -324,7 +333,7 @@ void CDiskWriteSlaveActivityBase::open()
 
     bool direct = query || (external && !firstNode());
     bool rename = !external || (!query && lastNode());
-    Owned<IFileIO> iFileIO = createMultipleWrite(this, *partDesc, exclsz, compress, extend||(external&&!query), ecomp, this, direct, rename, &abortSoon, (external&&!query) ? &tempExternalName : NULL);
+    Owned<IFileIO> iFileIO = createMultipleWrite(this, *partDesc, diskRowMinSz, compress, extend||(external&&!query), ecomp, this, direct, rename, &abortSoon, (external&&!query) ? &tempExternalName : NULL);
 
     if (compress)
     {

+ 1 - 0
thorlcr/thorutil/thormisc.hpp

@@ -65,6 +65,7 @@
 #define THOROPT_JOINHELPER_THREADS    "joinHelperThreads"       // Number of threads to use in threaded variety of join helper
 #define THOROPT_LKJOIN_LOCALFAILOVER  "lkjoin_localfailover"    // Force SMART to failover to distributed local lookup join (for testing only)   (default = false)
 #define THOROPT_LKJOIN_HASHJOINFAILOVER "lkjoin_hashjoinfailover" // Force SMART to failover to hash join (for testing only)                     (default = false)
+#define THOROPT_COMP_FORCELZW         "forceLZW"                // Forces file compression to use LZW                                            (default = false)
 
 #define INITIAL_SELFJOIN_MATCH_WARNING_LEVEL 20000  // max of row matches before selfjoin emits warning