Bläddra i källkod

HPCC-18671 Move the fileposition field into the index row

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 7 år sedan
förälder
incheckning
67431be158

+ 3 - 21
common/fileview2/fvidxsource.cpp

@@ -197,10 +197,7 @@ bool IndexDataSource::init()
         values.append(*set);
     }
 
-    fileposFieldType = diskMeta->queryType(diskMeta->numColumns()-1);
-    assertex(fileposFieldType && fileposFieldType->isInteger());
-
-    //Now gather all the 
+    diskMeta->patchIndexFileposition(); // Now returned as a bigendian field on the end of the row
 
     //Default cursor if no filter is applied
     applyFilter();
@@ -307,26 +304,11 @@ bool IndexDataSource::getNextRow(MemoryBuffer & out, bool extractRow)
                 }
                 else
                 {
-                    unsigned fileposSize = fileposFieldType->getSize();
-    //              unsigned thisSize = manager->queryRecordSize();             // Should be possible - needs a new function to call cursor->getSize()
                     offset_t filepos;
                     const byte * thisRow = manager->queryKeyBuffer(filepos);
-                    unsigned thisSize = diskMeta->getRecordSize(thisRow) - fileposSize;
+                    unsigned thisSize = diskMeta->getRecordSize(thisRow);
                     void * temp = out.reserve(thisSize);
                     memcpy(temp, thisRow, thisSize);
-
-                    //Append the fileposition, in the correct size/endianness
-                    assertex(sizeof(filepos) >= 8);
-
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-                    void * data = &filepos;
-#else
-                    void * data = (byte *)&filepos + sizeof(filepos) - fileposSize;
-#endif
-                    if (fileposFieldType->isSwappedEndian())
-                        out.appendSwap(fileposSize, data);
-                    else
-                        out.append(fileposSize, data);
                 }
             }
             return true;
@@ -446,7 +428,7 @@ bool IndexDataSource::addFilter(unsigned column, unsigned matchLen, unsigned siz
 
 void IndexDataSource::applyFilter()
 {
-    manager.setown(createLocalKeyManager(tlk, tlk->keySize(), NULL));
+    manager.setown(createLocalKeyManager(tlk, 0, NULL));
     ForEachItemIn(i, values)
     {
         IStringSet & cur = values.item(i);

+ 0 - 1
common/fileview2/fvidxsource.ipp

@@ -80,7 +80,6 @@ protected:
     HqlExprAttr diskRecord;
     Owned<IDistributedFile> df;
     Linked<FVDataSource> original;
-    ITypeInfo * fileposFieldType;
     unsigned __int64 totalRows;
     unsigned __int64 nextRowToRead;
     unsigned keyedSize;

+ 5 - 0
common/fileview2/fvsource.cpp

@@ -458,6 +458,11 @@ unsigned DataSourceMetaData::numColumns() const
     return fields.ordinality() - numFieldsToIgnore;
 }
 
+void DataSourceMetaData::patchIndexFileposition()
+{
+    fields.tos().type.setown(makeSwapIntType(8, false));
+}
+
 ITypeInfo * DataSourceMetaData::queryType(unsigned column) const
 {
     return fields.item(column).type;

+ 1 - 0
common/fileview2/fvsource.ipp

@@ -123,6 +123,7 @@ public:
     void addFileposition();
     void addGrouping();
     void addVirtualField(const char * name, const char * xpath, ITypeInfo * type);
+    void patchIndexFileposition();
 
     void extractKeyedInfo(UnsignedArray & offsets, TypeInfoArray & types);
     unsigned fixedSize() { return minRecordSize; }

+ 6 - 4
ecl/hthor/hthorkey.cpp

@@ -744,11 +744,12 @@ void CHThorIndexReadActivityBase::verifyIndex(IKeyIndex * idx)
     keySize = idx->keySize();
     if (eclKeySize.isFixedSize())
     {
+        unsigned fileposSize = idx->hasSpecialFileposition() ? sizeof(offset_t) : 0;
         if(layoutTrans)
             layoutTrans->checkSizes(df->queryLogicalName(), eclKeySize.getFixedSize(), keySize);
         else
-            if (keySize != eclKeySize.getFixedSize())
-                throw MakeStringException(0, "Key size mismatch reading index %s: index indicates size %u, ECL indicates size %u", df->queryLogicalName(), keySize, eclKeySize.getFixedSize());
+            if (keySize != eclKeySize.getFixedSize() + fileposSize)
+                throw MakeStringException(0, "Key size mismatch reading index %s: index indicates size %u, ECL indicates size %u", df->queryLogicalName(), keySize, eclKeySize.getFixedSize() + fileposSize);
     }
 }
 
@@ -4050,11 +4051,12 @@ protected:
     {
         if (eclKeySize.isFixedSize())
         {
+            unsigned fileposSize = idx->hasSpecialFileposition() ? sizeof(offset_t) : 0;
             if(trans)
                 trans->checkSizes(f->queryLogicalName(), eclKeySize.getFixedSize(), idx->keySize());
             else
-                if(idx->keySize() != eclKeySize.getFixedSize())
-                    throw MakeStringException(1002, "Key size mismatch on key %s: key file indicates record size should be %u, but ECL declaration was %u", f->queryLogicalName(), idx->keySize(), eclKeySize.getFixedSize());
+                if(idx->keySize() != eclKeySize.getFixedSize() + fileposSize)
+                    throw MakeStringException(1002, "Key size mismatch on key %s: key file indicates record size should be %u, but ECL declaration was %u", f->queryLogicalName(), idx->keySize(), eclKeySize.getFixedSize() + fileposSize);
         }
     }
 

+ 34 - 6
system/jhtree/ctfile.cpp

@@ -742,12 +742,34 @@ bool CJHTreeNode::getValueAt(unsigned int index, char *dst) const
     if (index >= hdr.numKeys) return false;
     if (dst)
     {
-        if (rowexp.get()) {
-            rowexp->expandRow(dst,index,sizeof(__int64),keyLen);
+        if (keyHdr->hasSpecialFileposition())
+        {
+            //It would make sense to have the fileposition at the start of the row from he perspective of the
+            //internal representation, but that would complicate everything else which assumes the keyed
+            //fields start at the begining of the row.
+            if (rowexp.get())
+            {
+                rowexp->expandRow(dst,index,sizeof(offset_t),keyLen);
+                rowexp->expandRow(dst+keyLen,index,0,sizeof(offset_t));
+            }
+            else
+            {
+                const char * p = keyBuf + index*keyRecLen;
+                memcpy(dst, p + sizeof(offset_t), keyLen);
+                memcpy(dst+keyLen, p, sizeof(offset_t));
+            }
         }
-        else {
-            const char * p = keyBuf + index*keyRecLen + sizeof(__int64);
-            memcpy(dst, p, keyLen);
+        else
+        {
+            if (rowexp.get())
+            {
+                rowexp->expandRow(dst,index,0,keyLen);
+            }
+            else
+            {
+                const char * p = keyBuf + index*keyRecLen;
+                memcpy(dst, p, keyLen);
+            }
         }
     }
     return true;
@@ -913,7 +935,13 @@ bool CJHVarTreeNode::getValueAt(unsigned int num, char *dst) const
         const char * p = recArray[num];
         KEYRECSIZE_T reclen = ((KEYRECSIZE_T *) p)[-1];
         _WINREV(reclen);
-        memcpy(dst, p + sizeof(offset_t), reclen);
+        if (keyHdr->hasSpecialFileposition())
+        {
+            memcpy(dst, p + sizeof(offset_t), reclen);
+            memcpy(dst+reclen, p, sizeof(offset_t));
+        }
+        else
+            memcpy(dst, p, reclen);
     }
     return true;
 }

+ 1 - 0
system/jhtree/ctfile.hpp

@@ -150,6 +150,7 @@ public:
     inline KeyHdr *getHdrStruct() { return &hdr; }
     inline static size32_t getSize() { return sizeof(KeyHdr); }
     inline unsigned getNodeSize() { return hdr.nodeSize; }
+    inline bool hasSpecialFileposition() const { return true; }
 };
 
 class jhtree_decl CNodeBase : public CInterface

+ 13 - 10
system/jhtree/jhtree.cpp

@@ -639,14 +639,6 @@ public:
                 assertex(keyedSize==ki->keyedSize());
                 assertex(keySize==ki->keySize());
             }
-            if (eclKeySize && (0 == (ki->getFlags() & HTREE_VARSIZE)) && (eclKeySize != keySize))
-            {
-                StringBuffer err;
-                err.appendf("Key size mismatch - key file (%s) indicates record size should be %d, but ECL declaration was %d", keyName.get(), keySize, eclKeySize);
-                IException *e = MakeStringExceptionDirect(1000, err.str());
-                EXCLOG(e, err.str());
-                throw e;
-            }
         }
     }
 
@@ -1613,9 +1605,15 @@ void CKeyIndex::dumpNode(FILE *out, offset_t pos, unsigned count, bool isRaw)
     ::dumpNode(out, node, keySize(), count, isRaw);
 }
 
+bool CKeyIndex::hasSpecialFileposition() const
+{
+    return keyHdr->hasSpecialFileposition();
+}
+
 size32_t CKeyIndex::keySize()
 {
-    return keyHdr->getMaxKeyLength();
+    size32_t fileposSize = keyHdr->hasSpecialFileposition() ? 8 : 0;
+    return keyHdr->getMaxKeyLength() + fileposSize;
 }
 
 size32_t CKeyIndex::keyedSize()
@@ -2091,6 +2089,7 @@ public:
     virtual IPropertyTree * getMetadata() { return checkOpen().getMetadata(); }
     virtual unsigned getNodeSize() { return checkOpen().getNodeSize(); }
     virtual const IFileIO *queryFileIO() const override { return iFileIO; } // NB: if not yet opened, will be null
+    virtual bool hasSpecialFileposition() const { return realKey ? realKey->hasSpecialFileposition() : false; }
 };
 
 extern jhtree_decl IKeyIndex *createKeyIndex(const char *keyfile, unsigned crc, IFileIO &iFileIO, bool isTLK, bool preloadAllowed)
@@ -2369,6 +2368,7 @@ class CKeyMerger : public CKeyLevelManager
     unsigned *mergeheap;
     unsigned numkeys;
     unsigned activekeys;
+    unsigned compareSize = 0;
     IArrayOf<IKeyCursor> cursorArray;
     PointerArray bufferArray;
     PointerArray fixedArray;
@@ -2392,7 +2392,9 @@ class CKeyMerger : public CKeyLevelManager
     {
         const char *c1 = buffers[mergeheap[a]];
         const char *c2 = buffers[mergeheap[b]];
-        int ret = memcmp(c1+sortFieldOffset, c2+sortFieldOffset, keySize-sortFieldOffset); // NOTE - compare whole key not just keyed part.
+        //Backwards compatibility - do not compare the fileposition field, even if it would be significant.
+        //int ret = memcmp(c1+sortFieldOffset, c2+sortFieldOffset, keySize-sortFieldOffset); // NOTE - compare whole key not just keyed part.
+        int ret = memcmp(c1+sortFieldOffset, c2+sortFieldOffset, compareSize-sortFieldOffset); // NOTE - compare whole key not just keyed part.
         if (!ret && sortFieldOffset)
             ret = memcmp(c1, c2, sortFieldOffset);
         return ret;
@@ -2646,6 +2648,7 @@ public:
                 throw MakeStringException(0, "Invalid key size 0 in key %s", ki->queryFileName());
             keyedSize = ki->keyedSize();
             numkeys = _keyset->numParts();
+            compareSize = keySize - (ki->hasSpecialFileposition() ? sizeof(offset_t) : 0);
         }
         else
             numkeys = 0;

+ 1 - 0
system/jhtree/jhtree.hpp

@@ -81,6 +81,7 @@ interface jhtree_decl IKeyIndex : public IKeyIndexBase
     virtual IPropertyTree * getMetadata() = 0;
     virtual unsigned getNodeSize() = 0;
     virtual const IFileIO *queryFileIO() const = 0;
+    virtual bool hasSpecialFileposition() const = 0;
 };
 
 interface IKeyArray : extends IInterface

+ 1 - 0
system/jhtree/jhtree.ipp

@@ -123,6 +123,7 @@ public:
     virtual offset_t queryMetadataHead();
     virtual IPropertyTree * getMetadata();
     virtual unsigned getNodeSize() { return keyHdr->getNodeSize(); }
+    virtual bool hasSpecialFileposition() const;
  
  // INodeLoader impl.
     virtual CJHTreeNode *loadNode(offset_t offset) = 0;