소스 검색

Fix gh-2836 - buildindex var row,col_prefix fix

Building an index with COMPRESS(FIRST) and variable rows, caused excessive
amounts of memory to be used when reading the index. Because, the when the
nodes were unpacked, the max length * numRows was allocated and retained per
expanded node.

This commit encodes the row lengths in the compressed format

Signed-off-by: Jake Smith <jake.smith@lexisnexis.com>
Jake Smith 13 년 전
부모
커밋
80b61faa93
1개의 변경된 파일68개의 추가작업 그리고 23개의 파일을 삭제
  1. 68 23
      system/jhtree/ctfile.cpp

+ 68 - 23
system/jhtree/ctfile.cpp

@@ -345,9 +345,18 @@ bool CWriteNode::add(offset_t pos, const void *indata, size32_t insize, unsigned
         }
 
         int bytes = sizeof(pos) + size;
+        if (isVariable)
+            bytes += sizeof(KEYRECSIZE_T);
         if (hdr.keyBytes + bytes >= maxBytes)    // probably could be '>' (loses byte)
             return false;
 
+        if (isVariable && isLeaf())
+        {
+            KEYRECSIZE_T _insize = insize;
+            _WINREV(_insize);
+            memcpy(keyPtr, &_insize, sizeof(_insize));
+            keyPtr += sizeof(_insize);
+        }
         _WINREV(pos);
         memcpy(keyPtr, &pos, sizeof(pos));
         keyPtr += sizeof(pos);
@@ -613,43 +622,74 @@ void CJHTreeNode::unpack(const void *node, bool needCopy)
         if (keyType & COL_PREFIX)
         {
             MTIME_SECTION(timer, "COL_PREFIX expand");
-            expandedSize = hdr.numKeys * keyRecLen;
             
-            keyBuf = (char *) allocMem(expandedSize);
-            unsigned workRecLen = keyRecLen - sizeof(offset_t);
-            const char *s = keys;
-            char *t = keyBuf;
-
-            if (expandedSize) {
+            if (hdr.numKeys) {
+                bool handleVariable = isVariable && isLeaf();
+                KEYRECSIZE_T workRecLen;
+                MemoryBuffer keyBufMb;
+                const char *s = keys;
+                char *t;
                 // do first row
-                *(offset_t *)t = *(const offset_t *)s;
-                t += sizeof( offset_t );
-                s += sizeof( offset_t );
-                const char *prev = t;           // this is where next row gets data from
-                // first time fill pack with 0
+                if (handleVariable) {
+                    memcpy(&workRecLen, s, sizeof(workRecLen));
+                    _WINREV(workRecLen);
+                    t = (char *)keyBufMb.reserve(sizeof(workRecLen)+sizeof(offset_t)+workRecLen);
+                    memcpy(t, s, sizeof(workRecLen));
+                    s += sizeof(workRecLen);
+                    t += sizeof(workRecLen);
+                }
+                else {
+                    t = (char *)keyBufMb.reserveTruncate(hdr.numKeys * keyRecLen);
+                    workRecLen = keyRecLen - sizeof(offset_t);
+                }
+                memcpy(t, s, sizeof(offset_t));
+                s += sizeof(offset_t);
+                t += sizeof(offset_t);
+
+                // this is where next row gets data from
+                const char *prev, *next;
+                unsigned prevOffset;
+                if (handleVariable)
+                    prevOffset = t-((char *)keyBufMb.bufferBase());
+                else
+                    next = t;
+
                 unsigned char pack1 = *s++;
 #ifdef _DEBUG
-                assertex(pack1<=workRecLen);            
+                assertex(0==pack1); // 1st time will be always be 0
 #endif
-                memset(t,0,pack1);
-                t += pack1;
-                size32_t left = workRecLen - pack1;
+                KEYRECSIZE_T left = workRecLen;
                 while (left--) {
                     *t = *s;
                     s++;
                     t++;
                 }
                 // do subsequent rows
-                for (i = 1; i < hdr.numKeys; i++)
-                {
-                    *(offset_t *)t = *(const offset_t *)s;
-                    t += sizeof( offset_t );
-                    s += sizeof( offset_t );
-                    const char * next = t;
+                for (i = 1; i < hdr.numKeys; i++) {
+                    if (handleVariable) {
+                        memcpy(&workRecLen, s, sizeof(workRecLen));
+                        _WINREV(workRecLen);
+                        t = (char *)keyBufMb.reserve(sizeof(workRecLen)+sizeof(offset_t)+workRecLen);
+                        memcpy(t, s, sizeof(workRecLen));
+                        t += sizeof(workRecLen);
+                        s += sizeof(workRecLen);
+                    }
+                    memcpy(t, s, sizeof(offset_t));
+                    s += sizeof(offset_t);
+                    t += sizeof(offset_t);
                     pack1 = *s++;
 #ifdef _DEBUG
                     assertex(pack1<=workRecLen);            
 #endif
+                    if (handleVariable) {
+                        prev = ((char *)keyBufMb.bufferBase())+prevOffset;
+                        // for next
+                        prevOffset = t-((char *)keyBufMb.bufferBase());
+                    }
+                    else {
+                        prev = next;
+                        next = t;
+                    }
                     left = workRecLen - pack1;
                     while (pack1--) {
                         *t = *prev;
@@ -661,8 +701,13 @@ void CJHTreeNode::unpack(const void *node, bool needCopy)
                         s++;
                         t++;
                     }
-                    prev = next;
                 }
+                expandedSize = keyBufMb.length();
+                keyBuf = (char *)keyBufMb.detach();
+            }
+            else {
+                keyBuf = NULL;
+                expandedSize = 0;
             }
         }
         else