Browse Source

HPCC-25275 WsSQL nested dataset support

- Adds support for files containing nested DS
- Reports nested columns' data as XML blob
- Reports error if any logic is applied to nested column

Signed-off-by: Rodrigo Pastrana <rodrigo.pastrana@lexisnexisrisk.com>
Rodrigo Pastrana 4 năm trước cách đây
mục cha
commit
fc8017ae9a

+ 1 - 0
esp/scm/ws_sql.ecm

@@ -25,6 +25,7 @@ ESPStruct HPCCColumn
 {
    string Name;
    string Type;
+   ESParray<ESPstruct HPCCColumn, Column> Columns;
 };
 
 ESPStruct HPCCTable

+ 8 - 3
esp/services/ws_sql/SQL2ECL/ECLEngine.cpp

@@ -191,6 +191,8 @@ void ECLEngine::generateSelectECL(HPCCSQLTreeWalker * selectsqlobj, StringBuffer
     out.clear();
     out.append("import std;\n"); /* ALL Generated ECL will import std, even if std lib not used */
 
+    out.append("NormalizeToXMLString(cds) := FUNCTIONMACRO\n\tP := PROJECT(cds, TRANSFORM({UTF8 x}, SELF.x := TOXML(LEFT)));\n\tR := ROLLUP(P,TRUE,TRANSFORM({UTF8 x}, SELF.x := LEFT.x + RIGHT.x));\n\tRETURN R[1].x;\nENDMACRO;\n");
+
     //Prepared statement parameters are handled by ECL STORED service workflow statements
     if (selectsqlobj->hasWhereClause())
         selectsqlobj->getWhereClause()->eclDeclarePlaceHolders(out, 0,0);
@@ -613,7 +615,10 @@ void ECLEngine::generateSelectStruct(HPCCSQLTreeWalker * selectsqlobj, IProperti
         else
         {
             eclEntities->setProp("NONSCALAREXPECTED", "TRUE");
-            selectStructSB.appendf("%s %s := %s.%s;", col->getECLType(), col->getNameOrAlias(), datasource, col->getName());
+            if (strncmp(col->getECLType(), "DATASET", 7)==0)
+                selectStructSB.appendf("UTF8 %s := NormalizeToXMLString(%s.%s);", col->getNameOrAlias(), datasource, col->getName());
+            else
+                selectStructSB.appendf("%s %s := %s.%s;", col->getECLType(), col->getNameOrAlias(), datasource, col->getName());
         }
 
         selectStructSB.append("\n");
@@ -685,7 +690,7 @@ bool ECLEngine::processIndex(HPCCFile * indexfiletouse, StringBuffer & keyedandw
         bool currfilterfieldexistsinindexfile = false;
         for (int indexfilecolumnindex = 0; indexfilecolumnindex < indexfilecolumns->length(); indexfilecolumnindex++)
         {
-            HPCCColumnMetaData currcol = indexfilecolumns->item(indexfilecolumnindex);
+            HPCCColumnMetaData & currcol = indexfilecolumns->item(indexfilecolumnindex);
             const char * currindexfilecolname = currcol.getColumnName();
             if(stricmp( filterclauseuniquenames.item(uniquenamesidx), currindexfilecolname)==0)
             {
@@ -848,7 +853,7 @@ void ECLEngine::findAppropriateIndex(StringArray * relindexes, HPCCSQLTreeWalker
                 IArrayOf<HPCCColumnMetaData> * columns = indexfile->getColumns();
                 ForEachItemIn(colidx, *columns)
                 {
-                    HPCCColumnMetaData currcol = columns->item(colidx);
+                    HPCCColumnMetaData & currcol = columns->item(colidx);
                     if (currcol.isKeyedField())
                     {
                         ForEachItemIn(uniqueidx, uniquenames)

+ 45 - 13
esp/services/ws_sql/SQL2ECL/HPCCFile.cpp

@@ -127,6 +127,37 @@ bool HPCCFile::getFileRecDefwithIndexpos(HPCCColumnMetaData * fieldMetaData, Str
     return false;
 }
 
+bool setChildColumns(HPCCColumnMetaData * parent, IPropertyTree * fieldtree)
+{
+    StringBuffer ecltype;
+    StringBuffer colname;
+
+    if (parent == nullptr || fieldtree == nullptr)
+    {
+        ESPLOG(LogMin, "Could not set HPCC file childcolumns!");
+        return false;
+    }
+
+    Owned<IPropertyTreeIterator> fields = fieldtree->getElements("Field");
+    ForEach(*fields)
+    {
+        IPropertyTree & curField = fields->query();
+        curField.getProp("@ecltype", ecltype.clear());
+        curField.getProp("@name", colname.clear());
+
+        Owned<HPCCColumnMetaData> col = HPCCColumnMetaData::createHPCCColumnMetaData(colname.str());
+        col->setIndex(curField.getPropInt("@position", -1));
+
+        if (strncmp(ecltype, "table of", 8)==0)
+        {
+            setChildColumns(col.get(), &curField);
+        }
+        col->setColumnType(ecltype.str());
+        parent->setChildCol(col);
+    }
+    return true;
+}
+
 bool HPCCFile::setFileColumns(const char * eclString)
 {
     StringBuffer text(eclString);
@@ -151,26 +182,27 @@ bool HPCCFile::setFileColumns(const char * eclString)
 
     StringBuffer ecltype;
     StringBuffer colname;
-    int colsize;
-    int colindex;
 
     Owned<IPropertyTreeIterator> fields = rectree->getElements("Field");
     ForEach(*fields)
     {
-      fields->query().getProp("@ecltype", ecltype.clear());
-      if (strncmp(ecltype, "table of", 8)==0)
-          setHasNestedColumns(true);
+        IPropertyTree & curField = fields->query();
 
-      fields->query().getProp("@name", colname.clear());
-      colsize = fields->query().getPropInt("@size", -1);
-      colindex = fields->query().getPropInt("@position", -1);
+        curField.getProp("@ecltype", ecltype.clear());
+        curField.getProp("@name", colname.clear());
 
-      Owned<HPCCColumnMetaData> col = HPCCColumnMetaData::createHPCCColumnMetaData(colname.str());
-      col->setColumnType(ecltype.str());
-      col->setIndex(colindex);
-      col->setTableName(this->fullname.str());
+        Owned<HPCCColumnMetaData> col = HPCCColumnMetaData::createHPCCColumnMetaData(colname.str());
+        col->setIndex(curField.getPropInt("@position", -1));
+        col->setTableName(this->fullname.str());
 
-      columns.append(*LINK(col));
+        if (strncmp(ecltype, "table of", 8)==0)
+        {
+            setHasNestedColumns(true);
+            if(!setChildColumns(col.get(), &curField))
+                return false;
+        }
+        col->setColumnType(ecltype.str());
+        columns.append(*LINK(col));
     }
 
     return true;

+ 30 - 3
esp/services/ws_sql/SQL2ECL/HPCCFileCache.cpp

@@ -23,6 +23,26 @@ HPCCFileCache * HPCCFileCache::createFileCache(const char * username, const char
     ESPLOG(LogMax, "WsSQL: Creating new HPCC FILE CACHE");
     return new HPCCFileCache(username,passwd);
 }
+void populateColums(IArrayOf<HPCCColumnMetaData> * cols, IArrayOf<IEspHPCCColumn> & pColumns)
+{
+    for (int i = 0; i < cols->length(); i++)
+    {
+       Owned<IEspHPCCColumn> pCol = createHPCCColumn();
+       HPCCColumnMetaData& currcol = cols->item(i);
+
+       IArrayOf<HPCCColumnMetaData> * ccols = currcol.getChildColumns();
+       if (ccols != nullptr && ccols->length() !=  0)
+       {
+           IArrayOf<IEspHPCCColumn> pCColumns;
+           populateColums(ccols, pCColumns);
+           pCol->setColumns(pCColumns);
+       }
+
+       pCol->setName(currcol.getColumnName());
+       pCol->setType(currcol.getColumnType());
+       pColumns.append(*pCol.getLink());
+    }
+}
 
 bool HPCCFileCache::populateTablesResponse(IEspGetDBMetaDataResponse & tablesrespstruct, const char * filterby)
 {
@@ -58,7 +78,15 @@ bool HPCCFileCache::populateTablesResponse(IEspGetDBMetaDataResponse & tablesres
                for (int i = 0; i < cols->length(); i++)
                {
                    Owned<IEspHPCCColumn> pCol = createHPCCColumn();
-                   HPCCColumnMetaData currcol = cols->item(i);
+                   HPCCColumnMetaData& currcol = cols->item(i);
+
+                   IArrayOf<HPCCColumnMetaData> * ccols = currcol.getChildColumns();
+                   if (ccols->length() != 0)
+                   {
+                       IArrayOf<IEspHPCCColumn> pCColumns;
+                       populateColums(ccols, pCColumns);
+                       pCol->setColumns(pCColumns);
+                   }
                    pCol->setName(currcol.getColumnName());
                    pCol->setType(currcol.getColumnType());
                    pColumns.append(*pCol.getLink());
@@ -114,7 +142,6 @@ bool HPCCFileCache::cacheAllHpccFiles(const char * filterby)
        StringBuffer name(attr.queryProp("@name"));
 
        if (name.length()>0 && HPCCFile::validateFileName(name.str()))
-       //if (name.length()>0)
        {
            const char * cachedKey = cacheHpccFileByName(name.str(), true);
            success &= (cachedKey && *cachedKey);
@@ -284,7 +311,7 @@ HPCCFile * HPCCFileCache::fetchHpccFileByName(const char * filename, const char
             file->setFormat(format);
         }
 
-        if (file && ( file->containsNestedColumns() || strncmp(file->getFormat(), "XML", 3)==0))
+        if (file && (strncmp(file->getFormat(), "XML", 3)==0))
             throw MakeStringException(-1,"Nested data files not supported: %s.",filename);
     }
 

+ 9 - 2
esp/services/ws_sql/SQL2ECL/HPCCSQLTreeWalker.cpp

@@ -236,7 +236,7 @@ ISQLExpression * HPCCSQLTreeWalker::expressionTreeWalker(pANTLR3_BASE_TREE exprA
                                 IArrayOf<HPCCColumnMetaData> * cols = file->getColumns();
                                 ForEachItemIn(colidx, *cols)
                                 {
-                                    HPCCColumnMetaData col = cols->item(colidx);
+                                    HPCCColumnMetaData & col = cols->item(colidx);
                                     Owned<ISQLExpression> fve = new SQLFieldValueExpression(file->getFullname(),col.getColumnName());
                                     tmpexp2->addParams(fve.getLink());
                                 }
@@ -304,7 +304,12 @@ ISQLExpression * HPCCSQLTreeWalker::expressionTreeWalker(pANTLR3_BASE_TREE exprA
             case LIKE_SYM:
             case NOT_LIKE:
                 leftexp.set(expressionTreeWalker((pANTLR3_BASE_TREE)(exprAST->getChild(exprAST, 0)),exprAST));
+                if (leftexp->getExpType() != Value_ExpressionType && strnicmp(leftexp->getECLType(),"DATASET",7)==0)
+                    throw MakeStringException(-1, "Cannot apply arithmetic logic to normalized nested column: '%s'!\n", leftexp->getName());
+
                 rightexp.set(expressionTreeWalker((pANTLR3_BASE_TREE)(exprAST->getChild(exprAST, 1)),exprAST));
+                if (rightexp->getExpType() != Value_ExpressionType && strnicmp(rightexp->getECLType(),"DATASET",7)==0)
+                    throw MakeStringException(-1, "Cannot apply arithmetic logic to normalized nested column: '%s'!\n", rightexp->getName());
 
                 tmpexp.setown( new SQLBinaryExpression(exptype,leftexp, rightexp));
                 if (parameterizeStaticValues)
@@ -336,6 +341,8 @@ ISQLExpression * HPCCSQLTreeWalker::expressionTreeWalker(pANTLR3_BASE_TREE exprA
             case NOT_SYM:
             {
                 tmpexp.setown(new SQLUnaryExpression(expressionTreeWalker((pANTLR3_BASE_TREE)(exprAST->getChild(exprAST, 0)),exprAST), exptype ));
+                if (tmpexp->getExpType() != Value_ExpressionType && strnicmp(tmpexp->getECLType(),"DATASET",7)==0)
+                    throw MakeStringException(-1, "Cannot apply arithmetic logic to normalized nested column: '%s'!\n", rightexp->getName());
                 break;
             }
             //case PARENEXP: ANTLR idiosyncrasy prevented using imaginary token as root node
@@ -1190,7 +1197,7 @@ void HPCCSQLTreeWalker::expandWildCardColumn()
                         IArrayOf<HPCCColumnMetaData> * cols = file->getColumns();
                         ForEachItemIn(colidx, *cols)
                         {
-                            HPCCColumnMetaData col = cols->item(colidx);
+                            HPCCColumnMetaData & col = cols->item(colidx);
                             Owned<ISQLExpression> fve = new SQLFieldValueExpression(file->getFullname(),col.getColumnName());
                             if (tableidx == 0 && colidx == 0)
                             {

+ 29 - 2
esp/services/ws_sql/SQL2ECL/SQLColumn.hpp

@@ -124,6 +124,7 @@ private:
     int decimalDigits;
     StringBuffer columnType;
     bool keyedField;
+    IArrayOf<HPCCColumnMetaData> childColumns;
 
 public:
     IMPLEMENT_IINTERFACE;
@@ -146,8 +147,9 @@ public:
     virtual ~HPCCColumnMetaData()
     {
 #ifdef _DEBUG
-        fprintf(stderr, "leaving columnmetadata.");
+        fprintf(stderr, "leaving %s columnmetadata.\n", columnName.str());
 #endif
+        childColumns.kill(false);
     }
 
     StringBuffer &toEclRecString(StringBuffer &result)
@@ -166,7 +168,21 @@ public:
 
     void setColumnType(const char* columnType)
     {
-        this->columnType.set(columnType);
+        if (strncmp(columnType, "table of", 8)==0)
+        {
+            StringBuffer result;
+            result.append("DATASET({");
+            ForEachItemIn(childIndex, childColumns)
+            {
+               this->childColumns.item(childIndex).toEclRecString(result);
+               if (childIndex < childColumns.length()-1)
+                   result.append(", ");
+            }
+            result.append("})");
+            this->columnType.set(result);
+        }
+        else
+            this->columnType.set(columnType);
     }
 
     int getDecimalDigits() const
@@ -213,6 +229,17 @@ public:
     {
         return columnName.str();
     }
+
+    void setChildCol(HPCCColumnMetaData * child)
+    {
+        childColumns.append(*LINK(child));
+    }
+
+    IArrayOf<HPCCColumnMetaData> * getChildColumns()
+    {
+        return &childColumns;
+    }
+
 };
 
 #endif /* SQLCOLUMN_HPP_ */

+ 1 - 22
esp/services/ws_sql/SQL2ECL/SQLExpression.cpp

@@ -52,28 +52,7 @@ SQLFieldsExpression::~SQLFieldsExpression()
 
  SQLLogicType SQLFieldValueExpression::getLogicType()
  {
-     const char * type = field.getColumnType();
-
-     if (strnicmp(type,"STRING",6)==0)
-         return String_LogicType;
-     else if (strnicmp(type,"QSTRING",7)==0)
-         return QSstring_LogicType;
-     else if (strnicmp(type,"UNICODE",7)==0)
-         return Unicode_LogicType;
-     else if (strnicmp(type,"VARUNICODE",10)==0)
-         return Unicode_LogicType;
-     else if (strnicmp(type,"VARSTRING",9)==0)
-         return String_LogicType;
-     else if (strnicmp(type,"BOOLEAN",7)==0)
-         return Bool_LogicType;
-     else if (strnicmp(type,"UNSIGNED",8)==0)
-         return Integer_LogicType;
-     else if (strnicmp(type,"REAL",4)==0)
-         return Decimal_LogicType;
-     else if (strnicmp(type,"DECIMAL",7)==0)
-         return Decimal_LogicType;
-     else
-         return Unknown_LogicType;
+	 return ISQLExpression::getLogicTypeFromName(field.getColumnType());
  }
 
  void SQLFieldValueExpression::toECLStringTranslateSource(

+ 33 - 23
esp/services/ws_sql/SQL2ECL/SQLExpression.hpp

@@ -55,7 +55,8 @@ typedef enum _SQLLogicType
     Unicode_LogicType,
     Numeric_LogicType,
     Integer_LogicType,
-    Decimal_LogicType
+    Decimal_LogicType,
+    NestedDS_LogicType
 } SQLLogicType;
 
 interface ISQLExpression : public CInterface, public IInterface
@@ -178,6 +179,36 @@ public:
     * Get this placeholder's generated name
     */
     virtual const char * getPlaceHolderName(){UNIMPLEMENTED; return nullptr;}
+
+    static SQLLogicType getLogicTypeFromName(const char * type)
+    {
+        if (!type || !*type)
+            return Unknown_LogicType;
+
+        if (strnicmp(type,"STRING",6)==0)
+            return String_LogicType;
+        else if (strnicmp(type,"QSTRING",7)==0)
+            return QSstring_LogicType;
+        else if (strnicmp(type,"UNICODE",7)==0)
+            return Unicode_LogicType;
+        else if (strnicmp(type,"VARUNICODE",10)==0)
+            return Unicode_LogicType;
+        else if (strnicmp(type,"VARSTRING",9)==0)
+            return String_LogicType;
+        else if (strnicmp(type,"BOOLEAN",7)==0)
+            return Bool_LogicType;
+        else if (strnicmp(type,"UNSIGNED",8)==0)
+            return Integer_LogicType;
+        else if (strnicmp(type,"REAL",4)==0)
+            return Decimal_LogicType;
+        else if (strnicmp(type,"DECIMAL",7)==0)
+            return Decimal_LogicType;
+        else if (strnicmp(type,"DATASET",7)==0)
+            return NestedDS_LogicType;
+        else
+            return Unknown_LogicType;
+    }
+
 };
 
 /*************************************************************************************************/
@@ -471,28 +502,7 @@ public:
 
     virtual SQLLogicType getLogicType()
     {
-        const char * type = field.getColumnType();
-
-        if (strnicmp(type,"STRING",6)==0)
-            return String_LogicType;
-        else if (strnicmp(type,"QSTRING",7)==0)
-            return QSstring_LogicType;
-        else if (strnicmp(type,"UNICODE",7)==0)
-            return Unicode_LogicType;
-        else if (strnicmp(type,"VARUNICODE",10)==0)
-            return Unicode_LogicType;
-        else if (strnicmp(type,"VARSTRING",9)==0)
-            return String_LogicType;
-        else if (strnicmp(type,"BOOLEAN",7)==0)
-            return Bool_LogicType;
-        else if (strnicmp(type,"UNSIGNED",8)==0)
-            return Integer_LogicType;
-        else if (strnicmp(type,"REAL",4)==0)
-            return Decimal_LogicType;
-        else if (strnicmp(type,"DECIMAL",7)==0)
-            return Decimal_LogicType;
-        else
-            return Unknown_LogicType;
+        return ISQLExpression::getLogicTypeFromName(field.getColumnType());
     }
 
     virtual int setParameterizedNames(int currentindex);