Explorar el Código

Fix gh-2836 - buildindex var row,col_prefix fix

Building an index with COMPRESS(FIRST) and variable rows, caused excessive
amounts of memory to be used when reading the index. Because, the when the
nodes were unpacked, the max length * numRows was allocated and retained per
expanded node.

This commit encodes the row lengths in the compressed format

Signed-off-by: Jake Smith <jake.smith@lexisnexis.com>
Jake Smith hace 13 años
padre
commit
80b61faa93
Se han modificado 1 ficheros con 68 adiciones y 23 borrados
  1. 68 23
      system/jhtree/ctfile.cpp

+ 68 - 23
system/jhtree/ctfile.cpp

@@ -345,9 +345,18 @@ bool CWriteNode::add(offset_t pos, const void *indata, size32_t insize, unsigned
         }
 
         int bytes = sizeof(pos) + size;
+        if (isVariable)
+            bytes += sizeof(KEYRECSIZE_T);
         if (hdr.keyBytes + bytes >= maxBytes)    // probably could be '>' (loses byte)
             return false;
 
+        if (isVariable && isLeaf())
+        {
+            KEYRECSIZE_T _insize = insize;
+            _WINREV(_insize);
+            memcpy(keyPtr, &_insize, sizeof(_insize));
+            keyPtr += sizeof(_insize);
+        }
         _WINREV(pos);
         memcpy(keyPtr, &pos, sizeof(pos));
         keyPtr += sizeof(pos);
@@ -613,43 +622,74 @@ void CJHTreeNode::unpack(const void *node, bool needCopy)
         if (keyType & COL_PREFIX)
         {
             MTIME_SECTION(timer, "COL_PREFIX expand");
-            expandedSize = hdr.numKeys * keyRecLen;
             
-            keyBuf = (char *) allocMem(expandedSize);
-            unsigned workRecLen = keyRecLen - sizeof(offset_t);
-            const char *s = keys;
-            char *t = keyBuf;
-
-            if (expandedSize) {
+            if (hdr.numKeys) {
+                bool handleVariable = isVariable && isLeaf();
+                KEYRECSIZE_T workRecLen;
+                MemoryBuffer keyBufMb;
+                const char *s = keys;
+                char *t;
                 // do first row
-                *(offset_t *)t = *(const offset_t *)s;
-                t += sizeof( offset_t );
-                s += sizeof( offset_t );
-                const char *prev = t;           // this is where next row gets data from
-                // first time fill pack with 0
+                if (handleVariable) {
+                    memcpy(&workRecLen, s, sizeof(workRecLen));
+                    _WINREV(workRecLen);
+                    t = (char *)keyBufMb.reserve(sizeof(workRecLen)+sizeof(offset_t)+workRecLen);
+                    memcpy(t, s, sizeof(workRecLen));
+                    s += sizeof(workRecLen);
+                    t += sizeof(workRecLen);
+                }
+                else {
+                    t = (char *)keyBufMb.reserveTruncate(hdr.numKeys * keyRecLen);
+                    workRecLen = keyRecLen - sizeof(offset_t);
+                }
+                memcpy(t, s, sizeof(offset_t));
+                s += sizeof(offset_t);
+                t += sizeof(offset_t);
+
+                // this is where next row gets data from
+                const char *prev, *next;
+                unsigned prevOffset;
+                if (handleVariable)
+                    prevOffset = t-((char *)keyBufMb.bufferBase());
+                else
+                    next = t;
+
                 unsigned char pack1 = *s++;
 #ifdef _DEBUG
-                assertex(pack1<=workRecLen);            
+                assertex(0==pack1); // 1st time will be always be 0
 #endif
-                memset(t,0,pack1);
-                t += pack1;
-                size32_t left = workRecLen - pack1;
+                KEYRECSIZE_T left = workRecLen;
                 while (left--) {
                     *t = *s;
                     s++;
                     t++;
                 }
                 // do subsequent rows
-                for (i = 1; i < hdr.numKeys; i++)
-                {
-                    *(offset_t *)t = *(const offset_t *)s;
-                    t += sizeof( offset_t );
-                    s += sizeof( offset_t );
-                    const char * next = t;
+                for (i = 1; i < hdr.numKeys; i++) {
+                    if (handleVariable) {
+                        memcpy(&workRecLen, s, sizeof(workRecLen));
+                        _WINREV(workRecLen);
+                        t = (char *)keyBufMb.reserve(sizeof(workRecLen)+sizeof(offset_t)+workRecLen);
+                        memcpy(t, s, sizeof(workRecLen));
+                        t += sizeof(workRecLen);
+                        s += sizeof(workRecLen);
+                    }
+                    memcpy(t, s, sizeof(offset_t));
+                    s += sizeof(offset_t);
+                    t += sizeof(offset_t);
                     pack1 = *s++;
 #ifdef _DEBUG
                     assertex(pack1<=workRecLen);            
 #endif
+                    if (handleVariable) {
+                        prev = ((char *)keyBufMb.bufferBase())+prevOffset;
+                        // for next
+                        prevOffset = t-((char *)keyBufMb.bufferBase());
+                    }
+                    else {
+                        prev = next;
+                        next = t;
+                    }
                     left = workRecLen - pack1;
                     while (pack1--) {
                         *t = *prev;
@@ -661,8 +701,13 @@ void CJHTreeNode::unpack(const void *node, bool needCopy)
                         s++;
                         t++;
                     }
-                    prev = next;
                 }
+                expandedSize = keyBufMb.length();
+                keyBuf = (char *)keyBufMb.detach();
+            }
+            else {
+                keyBuf = NULL;
+                expandedSize = 0;
             }
         }
         else