Преглед изворни кода

HPCC-10456 Codegen support for streamed datasets from embedded

Provide hash function information to dictionary type info structures.

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman пре 11 година
родитељ
комит
d23748ede0

+ 1 - 0
ecl/hqlcpp/hqlcpp.ipp

@@ -48,6 +48,7 @@ enum GraphLocalisation {
 
 
 enum { 
 enum { 
     EclTextPrio = 1000,         // has no dependencies on anything else
     EclTextPrio = 1000,         // has no dependencies on anything else
+    HashFunctionPrio = 1100,
     TypeInfoPrio = 1200,
     TypeInfoPrio = 1200,
     RowMetaPrio = 1500,         
     RowMetaPrio = 1500,         
     XmlTransformerPrio = 1700,
     XmlTransformerPrio = 1700,

+ 5 - 2
ecl/hqlcpp/hqlhtcpp.cpp

@@ -3899,6 +3899,9 @@ unsigned HqlCppTranslator::buildRtlType(StringBuffer & instanceName, ITypeInfo *
             childType = buildRtlType(arguments, ::queryRecordType(type));
             childType = buildRtlType(arguments, ::queryRecordType(type));
             if (hasLinkCountedModifier(type))
             if (hasLinkCountedModifier(type))
                 fieldType |= RFTMlinkcounted;
                 fieldType |= RFTMlinkcounted;
+            StringBuffer lookupHelperName;
+            buildDictionaryHashClass(::queryRecord(type), lookupHelperName);
+            arguments.append(",&").append(lookupHelperName.str());
             break;
             break;
         }
         }
     case type_set:
     case type_set:
@@ -5349,9 +5352,9 @@ void HqlCppTranslator::buildDictionaryHashClass(IHqlExpression *record, StringBu
         appendUniqueId(lookupHelperName.append("lu"), getConsistentUID(record));
         appendUniqueId(lookupHelperName.append("lu"), getConsistentUID(record));
 
 
         BuildCtx classctx(declarectx);
         BuildCtx classctx(declarectx);
-        //I suspect all the priorities should be killed.  This is here because you can have meta functions accessing the
+        //I suspect all the priorities should be killed.  This is here because you can have type info constructors accessing the
         //dictionary hash functions.
         //dictionary hash functions.
-        classctx.setNextPriority(RowMetaPrio);
+        classctx.setNextPriority(HashFunctionPrio);
 
 
         beginNestedClass(classctx, lookupHelperName, "IHThorHashLookupInfo");
         beginNestedClass(classctx, lookupHelperName, "IHThorHashLookupInfo");
         OwnedHqlExpr searchRecord = getDictionarySearchRecord(record);
         OwnedHqlExpr searchRecord = getDictionarySearchRecord(record);

+ 5 - 4
plugins/pyembed/pyembed.cpp

@@ -781,9 +781,10 @@ public:
     virtual IRowStream *getDatasetResult(IEngineRowAllocator * _resultAllocator)
     virtual IRowStream *getDatasetResult(IEngineRowAllocator * _resultAllocator)
     {
     {
         assertex(result && result != Py_None);
         assertex(result && result != Py_None);
-        if (!PyList_Check(result))
-            typeError("list", NULL);
-        resultIterator = PyObject_GetIter(result);
+        if (!PyList_Check(result) && !PyGen_Check(result))  // MORE - should I remove this check, and just say if it is iterable, it's good?
+            typeError("list or generator", NULL);
+        resultIterator.setown(PyObject_GetIter(result));
+        checkPythonError();
         resultAllocator.set(_resultAllocator);
         resultAllocator.set(_resultAllocator);
         return LINK(this);
         return LINK(this);
     }
     }
@@ -951,7 +952,7 @@ protected:
     OwnedPyObject script;
     OwnedPyObject script;
 
 
     Linked<IEngineRowAllocator> resultAllocator;
     Linked<IEngineRowAllocator> resultAllocator;
-    PyObject *resultIterator;
+    OwnedPyObject resultIterator;
 };
 };
 
 
 class Python27EmbedScriptContext : public Python27EmbedContextBase
 class Python27EmbedScriptContext : public Python27EmbedContextBase

+ 12 - 1
plugins/v8embed/v8embed.cpp

@@ -437,9 +437,20 @@ public:
 
 
     virtual IRowStream *getDatasetResult(IEngineRowAllocator * _resultAllocator)
     virtual IRowStream *getDatasetResult(IEngineRowAllocator * _resultAllocator)
     {
     {
+        assertex (!result.IsEmpty());
+        if (!result->IsArray())
+            rtlFail(0, "v8embed: type mismatch - return value was not an array");
+        UNIMPLEMENTED;
+//        resultIterator.setown(new ArrayIterator(result);
+//        resultAllocator.set(_resultAllocator);
+//        return LINK(this);
+    }
+    virtual const void *nextRow()
+    {
+//        assertex(resultAllocator);
+//        assertex(resultIterator);
         UNIMPLEMENTED;
         UNIMPLEMENTED;
     }
     }
-
     virtual void compileEmbeddedScript(size32_t lenChars, const char *utf)
     virtual void compileEmbeddedScript(size32_t lenChars, const char *utf)
     {
     {
         v8::HandleScope handle_scope;
         v8::HandleScope handle_scope;

+ 21 - 4
rtl/eclrtl/rtlfield.cpp

@@ -448,11 +448,10 @@ size32_t RtlVarStringTypeInfo::build(ARowBuilder &builder, size32_t offset, cons
     char *value;
     char *value;
     source.getStringResult(field, size, value);
     source.getStringResult(field, size, value);
     byte *dest = builder.getSelf()+offset;
     byte *dest = builder.getSelf()+offset;
-    if (isEbcdic())
-        UNIMPLEMENTED;
     if (!isFixedSize())
     if (!isFixedSize())
     {
     {
         builder.ensureCapacity(offset+size+1, field->name->str());
         builder.ensureCapacity(offset+size+1, field->name->str());
+        // See notes re EBCDIC conversion in RtlStringTypeInfo code
         memcpy(dest, value, size);
         memcpy(dest, value, size);
         dest[size] = '\0';
         dest[size] = '\0';
         offset += size+1;
         offset += size+1;
@@ -1218,12 +1217,30 @@ size32_t RtlDictionaryTypeInfo::size(const byte * self, const byte * selfrow) co
 
 
 size32_t RtlDictionaryTypeInfo::build(ARowBuilder &builder, size32_t offset, const RtlFieldInfo *field, IFieldSource &source) const
 size32_t RtlDictionaryTypeInfo::build(ARowBuilder &builder, size32_t offset, const RtlFieldInfo *field, IFieldSource &source) const
 {
 {
+    source.processBeginDataset(field);
     if (isLinkCounted())
     if (isLinkCounted())
     {
     {
-        UNIMPLEMENTED;
+        // a 32-bit record count, and a pointer to an hash table with record pointers
+        size32_t sizeInBytes = sizeof(size32_t) + sizeof(void *);
+        builder.ensureCapacity(offset+sizeInBytes, field->name->str());
+        IEngineRowAllocator *childAllocator = builder.queryAllocator()->createChildRowAllocator(child);
+        RtlLinkedDictionaryBuilder dictBuilder(childAllocator, hashInfo);
+        RtlFieldStrInfo dummyField("<nested row>", NULL, child);
+        while (source.processNextRow(field))
+        {
+            RtlDynamicRowBuilder childBuilder(childAllocator);
+            size32_t childLen = child->build(childBuilder, 0, &dummyField, source);
+            dictBuilder.appendOwn((void *) childBuilder.finalizeRowClear(childLen));
+        }
+        // Go back in and patch the count
+        rtlWriteInt4(builder.getSelf()+offset, dictBuilder.getcount());
+        * ( const void * * ) (builder.getSelf()+offset+sizeof(size32_t)) = dictBuilder.linkrows();
+        offset += sizeInBytes;
     }
     }
     else
     else
-        UNIMPLEMENTED;
+        UNIMPLEMENTED;  // And may never be...
+    source.processEndDataset(field);
+    return offset;
 }
 }
 
 
 size32_t RtlDictionaryTypeInfo::process(const byte * self, const byte * selfrow, const RtlFieldInfo * field, IFieldProcessor & target) const
 size32_t RtlDictionaryTypeInfo::process(const byte * self, const byte * selfrow, const RtlFieldInfo * field, IFieldProcessor & target) const

+ 3 - 1
rtl/eclrtl/rtlfield_imp.hpp

@@ -250,7 +250,9 @@ struct ECLRTL_API RtlDatasetTypeInfo : public RtlCompoundTypeInfo
 
 
 struct ECLRTL_API RtlDictionaryTypeInfo : public RtlCompoundTypeInfo
 struct ECLRTL_API RtlDictionaryTypeInfo : public RtlCompoundTypeInfo
 {
 {
-    inline RtlDictionaryTypeInfo(unsigned _fieldType, unsigned _length, const RtlTypeInfo * _child) : RtlCompoundTypeInfo(_fieldType, _length, _child) {}
+    inline RtlDictionaryTypeInfo(unsigned _fieldType, unsigned _length, const RtlTypeInfo * _child, IHThorHashLookupInfo *_hashInfo)
+    : RtlCompoundTypeInfo(_fieldType, _length, _child), hashInfo(_hashInfo) {}
+    IHThorHashLookupInfo * hashInfo;
 
 
     virtual size32_t size(const byte * self, const byte * selfrow) const;
     virtual size32_t size(const byte * self, const byte * selfrow) const;
     virtual size32_t build(ARowBuilder &builder, size32_t offset, const RtlFieldInfo *field, IFieldSource &source) const;
     virtual size32_t build(ARowBuilder &builder, size32_t offset, const RtlFieldInfo *field, IFieldSource &source) const;

+ 15 - 4
testing/ecl/streame.ecl

@@ -25,7 +25,7 @@ namesRecord := RECORD
     STRING name1;
     STRING name1;
     STRING10 name2;
     STRING10 name2;
     LINKCOUNTED DATASET(childrec) childnames;
     LINKCOUNTED DATASET(childrec) childnames;
-//    DICTIONARY(childrec) childdict;
+    LINKCOUNTED DICTIONARY(childrec) childdict{linkcounted};
     childrec r;
     childrec r;
     unsigned1 val1;
     unsigned1 val1;
     integer1   val2;
     integer1   val2;
@@ -48,8 +48,19 @@ ENDEMBED;
 
 
 dataset(namesRecord) streamedNames(data d, utf8 u) := EMBED(Python)
 dataset(namesRecord) streamedNames(data d, utf8 u) := EMBED(Python)
   return [  \
   return [  \
-     ("Gavin", "Halliday", [("a", 1)], ("b", 2), 250, -1,  U'là',  U'là',  U'là', 0x01000000, d, False, {"1","2"}), \
-     ("John", "Smith", [], ("c", 3), 250, -1,  U'là',  U'là',  u, 0x02000000, d, True, [])]
+     ("Gavin", "Halliday", [("a", 1)], [("aa", 11)], ("b", 2), 250, -1,  U'là',  U'là',  U'là', 0x01000000, d, False, {"1","2"}), \
+     ("John", "Smith", [], [], ("c", 3), 250, -1,  U'là',  U'là',  u, 0x02000000, d, True, []) \
+     ]
 ENDEMBED;
 ENDEMBED;
 
 
-output(streamedNames(d'AA', u'là'));
+// Test use of Python generator object for lazy evaluation...
+
+dataset(childrec) testGenerator(unsigned lim) := EMBED(Python)
+  num = 0
+  while num < lim:
+    yield ("Generate:", num)
+    num += 1
+ENDEMBED;
+
+output(streamedNames(d'AA', u'là'));
+output (testGenerator(10));