Browse Source

HPCC-16077 Add option to download WU result in CSV format

The option can be used by WsWorkunits.WUResultBin.format = 'csv'.

Signed-off-by: wangkx <kevin.wang@lexisnexis.com>
wangkx 8 years ago
parent
commit
807f0a9e1b

+ 2 - 0
common/fileview2/fvresultset.ipp

@@ -108,6 +108,8 @@ public:
 
     void getXmlSchema(ISchemaBuilder & builder, bool useXPath) const;
     unsigned queryColumnIndex(unsigned firstField, const char * fieldName) const;
+    inline IFvDataSourceMetaData* getMeta() const { return meta; };
+    inline CResultSetColumnInfo& getColumn(int column) const { return columns.item(column); };
 
 protected:
     IFvDataSourceMetaData * meta;

+ 680 - 1
common/thorhelper/thorxmlwrite.cpp

@@ -685,6 +685,686 @@ StringBuffer &buildJsonFooter(StringBuffer  &footer, const char *suppliedFooter,
     return footer.append((rowTag && *rowTag) ? "]}" : "]");
 }
 
+static char thorHelperhexchar[] = "0123456789ABCDEF";
+//=====================================================================================
+
+static char csvQuote = '\"';
+
+CommonCSVWriter::CommonCSVWriter(unsigned _flags, CSVOptions& _options, IXmlStreamFlusher* _flusher)
+{
+    flusher = _flusher;
+    flags = _flags;
+
+    options.terminator.set(_options.terminator.get());
+    options.delimiter.set(_options.delimiter.get());
+    options.includeHeader = _options.includeHeader;  //output CSV headers
+    recordCount = headerColumnID = 0;
+    nestedHeaderLayerID = 0;
+    readingCSVHeader = true;
+    addingSimpleNestedContent = false; //Set by CommonCSVWriter::checkHeaderName()
+}
+
+CommonCSVWriter::~CommonCSVWriter()
+{
+    flush(true);
+}
+
+void CommonCSVWriter::outputString(unsigned len, const char* field, const char* fieldName)
+{
+    if (!checkHeaderName(fieldName))
+        return;
+    addStringField(len, field, fieldName);
+}
+
+void CommonCSVWriter::outputBool(bool field, const char* fieldName)
+{
+    if (!checkHeaderName(fieldName))
+        return;
+    addContentField((field) ? "true" : "false", fieldName);
+}
+
+void CommonCSVWriter::outputData(unsigned len, const void* field, const char* fieldName)
+{
+    if (!checkHeaderName(fieldName))
+        return;
+
+    StringBuffer v;
+    const unsigned char *value = (const unsigned char *) field;
+    for (unsigned int i = 0; i < len; i++)
+        v.append(thorHelperhexchar[value[i] >> 4]).append(thorHelperhexchar[value[i] & 0x0f]);
+    addContentField(v.str(), fieldName);
+}
+
+void CommonCSVWriter::outputInt(__int64 field, unsigned size, const char* fieldName)
+{
+    if (!checkHeaderName(fieldName))
+        return;
+
+    StringBuffer v;
+    v.append(field);
+    addContentField(v.str(), fieldName);
+}
+
+void CommonCSVWriter::outputUInt(unsigned __int64 field, unsigned size, const char* fieldName)
+{
+    if (!checkHeaderName(fieldName))
+        return;
+
+    StringBuffer v;
+    v.append(field);
+    addContentField(v.str(), fieldName);
+}
+
+void CommonCSVWriter::outputReal(double field, const char *fieldName)
+{
+    if (!checkHeaderName(fieldName))
+        return;
+
+    StringBuffer v;
+    v.append(field);
+    addContentField(v.str(), fieldName);
+}
+
+void CommonCSVWriter::outputDecimal(const void* field, unsigned size, unsigned precision, const char* fieldName)
+{
+    if (!checkHeaderName(fieldName))
+        return;
+
+    StringBuffer v;
+    char dec[50];
+    BcdCriticalBlock bcdBlock;
+    if (DecValid(true, size*2-1, field))
+    {
+        DecPushDecimal(field, size, precision);
+        DecPopCString(sizeof(dec), dec);
+        const char *finger = dec;
+        while(isspace(*finger)) finger++;
+        v.append(finger);
+    }
+    addContentField(v.str(), fieldName);
+}
+
+void CommonCSVWriter::outputUDecimal(const void* field, unsigned size, unsigned precision, const char* fieldName)
+{
+    if (!checkHeaderName(fieldName))
+        return;
+
+    StringBuffer v;
+    char dec[50];
+    BcdCriticalBlock bcdBlock;
+    if (DecValid(false, size*2, field))
+    {
+        DecPushUDecimal(field, size, precision);
+        DecPopCString(sizeof(dec), dec);
+        const char *finger = dec;
+        while(isspace(*finger)) finger++;
+        v.append(finger);
+    }
+    addContentField(v.str(), fieldName);
+}
+
+void CommonCSVWriter::outputUnicode(unsigned len, const UChar* field, const char* fieldName)
+{
+    if (!checkHeaderName(fieldName))
+        return;
+
+    StringBuffer v;
+    char * buff = 0;
+    unsigned bufflen = 0;
+    rtlUnicodeToCodepageX(bufflen, buff, len, field, "utf-8");
+    addStringField(bufflen, buff, fieldName);
+    rtlFree(buff);
+}
+
+void CommonCSVWriter::outputQString(unsigned len, const char* field, const char* fieldName)
+{
+    if (!checkHeaderName(fieldName))
+        return;
+
+    MemoryAttr tempBuffer;
+    char * temp;
+    if (len <= 100)
+        temp = (char *)alloca(len);
+    else
+        temp = (char *)tempBuffer.allocate(len);
+    rtlQStrToStr(len, temp, len, field);
+    addStringField(len, temp, fieldName);
+}
+
+void CommonCSVWriter::outputUtf8(unsigned len, const char* field, const char* fieldName)
+{
+    if (!checkHeaderName(fieldName))
+        return;
+
+    addStringField(rtlUtf8Size(len, field), field, fieldName);
+}
+
+void CommonCSVWriter::outputNumericString(const char* field, const char* fieldName)
+{
+    if (!checkHeaderName(fieldName))
+        return;
+
+    addStringField((size32_t)strlen(field), field, fieldName);
+}
+
+void CommonCSVWriter::appendDataXPathItem(const char* fieldName, bool isArray)
+{
+    Owned<CXPathItem> item = new CXPathItem(fieldName, isArray);
+    dataXPath.append(*item.getClear());
+}
+
+bool CommonCSVWriter::isDataRow(const char* fieldName)
+{
+    if (dataXPath.empty())
+        return false;
+
+    CXPathItem& xPathItem = dataXPath.item(dataXPath.length() - 1);
+    return xPathItem.getIsArray() && strieq(fieldName, xPathItem.getPath());
+}
+
+void CommonCSVWriter::outputBeginNested(const char* fieldName, bool simpleNested, bool outputHeader)
+{
+    //This method is called when retrieving csv headers.
+    if (!fieldName || !*fieldName || !readingCSVHeader)
+        return;
+
+    addCSVHeader(fieldName, NULL, true, simpleNested, outputHeader);
+    if (simpleNested) //ECL SET has only one column (parent name should be used as column name).
+        headerColumnID++;
+
+    //nestedHeaderLayerID is used as row ID when output CSV headers.
+    if (outputHeader)
+        nestedHeaderLayerID++;
+    addFieldToParentXPath(fieldName);
+}
+
+void CommonCSVWriter::outputEndNested(const char* fieldName, bool outputHeader)
+{
+    //This method is called when retrieving csv headers.
+    if (!fieldName || !*fieldName || !readingCSVHeader)
+        return;
+
+    removeFieldFromCurrentParentXPath(fieldName);
+    if (outputHeader)
+        nestedHeaderLayerID--;
+}
+
+void CommonCSVWriter::outputBeginNested(const char* fieldName, bool simpleNested)
+{
+    if (!fieldName || !*fieldName || readingCSVHeader)
+        return;
+
+    if (!isDataRow(fieldName))
+    {//A nested item begins.
+        //Call appendDataXPathItem() after the isDataRpw()
+        //because previous data xpath is used in isDataRpw().
+        appendDataXPathItem(fieldName, false);
+        addFieldToParentXPath(fieldName);
+    }
+    else
+    {//A new row begins inside a nested item.
+        appendDataXPathItem(fieldName, false);
+
+        if (!currentParentXPath.isEmpty())
+        {
+            //Add row xpath if it is not the 1st xpath.
+            addFieldToParentXPath(fieldName);
+
+            CCSVItem* item = getParentCSVItem();
+            if (!item)
+                return;
+
+            //Check row count for the ParentCSVItem.
+            //If this is not the first row, all children of the ParentCSVItem should
+            //start from the MaxNextRowID of the last row.
+            unsigned rowCount = item->getRowCount();
+            if (rowCount > 0)
+            {//Starting from the second result row, the NextRowIDs of every children are reset based on the last result row.
+                StringBuffer path = currentParentXPath;
+                path.setLength(path.length() - 1);
+                setChildrenNextRowID(path.str(), getChildrenMaxNextRowID(path.str()));
+            }
+
+            item->setCurrentRowEmpty(true);
+        }
+    }
+}
+
+void CommonCSVWriter::outputEndNested(const char* fieldName)
+{
+    if (!fieldName || !*fieldName || readingCSVHeader)
+        return;
+
+    dataXPath.pop();
+    if (!isDataRow(fieldName))
+    {//This is an end of a nested item.
+        removeFieldFromCurrentParentXPath(fieldName);
+    }
+    else
+    {//A row ends inside the nested item
+        //Set row count for ParentCSVItem of this field.
+        if (!currentParentXPath.isEmpty())
+        {
+            CCSVItem* item = getParentCSVItem();
+            if (item && !item->getCurrentRowEmpty())
+            {
+                //Increase row count for this item
+                item->incrementRowCount();
+                item->setCurrentRowEmpty(true);
+            }
+        }
+
+        removeFieldFromCurrentParentXPath(fieldName);
+        //if dataXPath.length() back to 1, this should be the end of a content result row.
+        if (dataXPath.length() == 1)
+            finishContentResultRow();
+    }
+}
+
+void CommonCSVWriter::outputBeginArray(const char* fieldName)
+{
+    appendDataXPathItem(fieldName, true);
+};
+
+void CommonCSVWriter::outputEndArray(const char* fieldName)
+{
+    dataXPath.pop();
+};
+
+void CommonCSVWriter::outputBeginDataset(const char* dsname, bool nestChildren)
+{
+    //This is called to add a <Dataset> tag outside of a wu result xml. No need for csv.
+};
+
+void CommonCSVWriter::outputEndDataset(const char* dsname)
+{
+};
+
+IXmlWriterExt& CommonCSVWriter::clear()
+{
+    recordCount = /*rowCount =*/ headerColumnID = 0;
+    nestedHeaderLayerID = 0;
+    readingCSVHeader = true;
+
+    addingSimpleNestedContent = false;
+    currentParentXPath.clear();
+    headerXPathList.kill();
+    topHeaderNameMap.kill();
+    contentRowsBuffer.clear();
+    csvItems.kill();
+    out.clear();
+    auditOut.clear();
+    return *this;
+};
+
+void CommonCSVWriter::outputCSVHeader(const char* name, const char* type)
+{
+    if (!name || !*name)
+        return;
+
+    addCSVHeader(name, type, false, false, true);
+    headerColumnID++;
+}
+
+void CommonCSVWriter::finishCSVHeaders()
+{
+    if (options.includeHeader)
+        outputHeadersToBuffer();
+    readingCSVHeader = false;
+    currentParentXPath.clear();
+
+#ifdef _DEBUG
+    auditHeaderInfo();
+#endif
+}
+
+void CommonCSVWriter::outputHeadersToBuffer()
+{
+    CIArrayOf<CCSVRow> rows;
+    ForEachItemIn(i, headerXPathList)
+    {
+        const char* path = headerXPathList.item(i);
+        CCSVItem* item = csvItems.getValue(path);
+        if (!item || !item->checkOutputHeader())
+            continue;
+
+        unsigned colID = item->getColumnID();
+        if (item->checkIsNestedItem())
+        {
+            unsigned maxColumnID = colID;
+            getChildrenMaxColumnID(item, maxColumnID);
+            colID += (maxColumnID - colID)/2;
+        }
+        addColumnToRow(rows, item->getNestedLayer(), colID, item->getName(), NULL);
+    }
+
+    outputCSVRows(rows, true);
+}
+
+//Go through every children to find out MaxColumnID.
+unsigned CommonCSVWriter::getChildrenMaxColumnID(CCSVItem* item, unsigned& maxColumnID)
+{
+    StringBuffer path = item->getParentXPath();
+    path.append(item->getName());
+
+    StringArray& names = item->getChildrenNames();
+    ForEachItemIn(i, names)
+    {
+        StringBuffer childPath = path;
+        childPath.append("/").append(names.item(i));
+        CCSVItem* childItem = csvItems.getValue(childPath.str());
+        if (!childItem)
+            continue;
+
+        if (childItem->checkIsNestedItem())
+            maxColumnID = getChildrenMaxColumnID(childItem, maxColumnID);
+        else
+        {
+            unsigned columnID = childItem->getColumnID();
+            if (columnID > maxColumnID)
+                maxColumnID = columnID;
+        }
+    }
+    return maxColumnID;
+}
+
+void CommonCSVWriter::escapeQuoted(unsigned len, char const* in, StringBuffer& out)
+{
+    char const* finger = in;
+    while (len--)
+    {
+        //RFC-4180, paragraph "If double-quotes are used to enclose fields, then a double-quote
+        //appearing inside a field must be escaped by preceding it with another double quote."
+        //unsigned newLen = 0;
+        if (*finger == '"')
+            out.append('"');
+        out.append(*finger);
+        finger++;
+    }
+}
+
+CCSVItem* CommonCSVWriter::getParentCSVItem()
+{
+    if (currentParentXPath.isEmpty())
+        return NULL;
+
+    StringBuffer path = currentParentXPath;
+    path.setLength(path.length() - 1);
+    return csvItems.getValue(path.str());
+}
+
+CCSVItem* CommonCSVWriter::getCSVItemByFieldName(const char* name)
+{
+    StringBuffer path;
+    if (currentParentXPath.isEmpty())
+        path.append(name);
+    else
+        path.append(currentParentXPath.str()).append(name);
+    return csvItems.getValue(path.str());
+}
+
+bool CommonCSVWriter::checkHeaderName(const char* name)
+{
+    if (!name || !*name)
+        return false;
+
+    if (currentParentXPath.isEmpty())
+    {
+        bool* found = topHeaderNameMap.getValue(name);
+        return (found && *found);
+    }
+
+    CCSVItem* item = getParentCSVItem();
+    if (!item)
+        return false;
+
+    addingSimpleNestedContent = item->checkSimpleNested();
+    if (addingSimpleNestedContent) //ECL: SET OF string, int, etc
+        return true;
+
+    return item->hasChildName(name);
+}
+
+void CommonCSVWriter::addColumnToRow(CIArrayOf<CCSVRow>& rows, unsigned rowID, unsigned colID, const char* columnValue, const char* columnName)
+{
+    if (!columnValue)
+        columnValue = "";
+    if (rowID < rows.length())
+    { //add the column to existing row
+        CCSVRow& row = rows.item(rowID);
+        row.setColumn(colID, NULL, columnValue);
+    }
+    else
+    { //new row
+        Owned<CCSVRow> newRow = new CCSVRow(rowID);
+        newRow->setColumn(colID, NULL, columnValue);
+        rows.append(*newRow.getClear());
+    }
+
+    if (currentParentXPath.isEmpty())
+        return;
+
+    if (!addingSimpleNestedContent && columnName && *columnName)
+    {
+        CCSVItem* item = getCSVItemByFieldName(columnName);
+        if (item)
+            item->incrementNextRowID();
+    }
+
+    CCSVItem* parentItem = getParentCSVItem();
+    if (parentItem)
+    {
+        if (addingSimpleNestedContent) //ECL: SET OF string, int, etc. NextRowID should be stored in Parent item.
+            parentItem->incrementNextRowID();
+        setParentItemRowEmpty(parentItem, false);
+    }
+}
+
+void CommonCSVWriter::setParentItemRowEmpty(CCSVItem* item, bool empty)
+{
+    item->setCurrentRowEmpty(empty);
+    StringBuffer parentXPath = item->getParentXPath();
+    if (parentXPath.isEmpty())
+        return;
+    //If this item is not empty, its parent is not empty.
+    parentXPath.setLength(parentXPath.length() - 1);
+    setParentItemRowEmpty(csvItems.getValue(parentXPath), empty);
+}
+
+void CommonCSVWriter::addCSVHeader(const char* name, const char* type, bool isNested, bool simpleNested, bool outputHeader)
+{
+    if (checkHeaderName(name))
+        return;//Duplicated header. Should never happen.
+
+    Owned<CCSVItem> headerItem = new CCSVItem();
+    headerItem->setName(name);
+    headerItem->setIsNestedItem(isNested);
+    headerItem->setSimpleNested(simpleNested);
+    headerItem->setOutputHeader(outputHeader);
+    headerItem->setColumnID(headerColumnID);
+    headerItem->setNestedLayer(nestedHeaderLayerID);
+    headerItem->setParentXPath(currentParentXPath.str());
+    StringBuffer xPath = currentParentXPath;
+    xPath.append(name);
+    csvItems.setValue(xPath.str(), headerItem);
+
+    headerXPathList.append(xPath.str());
+    addChildNameToParentCSVItem(name);
+    if (currentParentXPath.isEmpty())
+        topHeaderNameMap.setValue(name, true);
+}
+
+void CommonCSVWriter::addContentField(const char* field, const char* fieldName)
+{
+    CCSVItem* item = NULL;
+    if (addingSimpleNestedContent) //ECL: SET OF string, int, etc. ColumnID should be stored in Parent item.
+        item = getParentCSVItem();
+    else
+        item = getCSVItemByFieldName(fieldName);
+
+    addColumnToRow(contentRowsBuffer, item ? item->getNextRowID() : 0, item ? item->getColumnID() : 0, field, fieldName);
+}
+
+void CommonCSVWriter::addStringField(unsigned len, const char* field, const char* fieldName)
+{
+    StringBuffer v;
+    v.append(csvQuote);
+    escapeQuoted(len, field, v);
+    v.append(csvQuote);
+    addContentField(v.str(), fieldName);
+}
+
+unsigned CommonCSVWriter::getChildrenMaxNextRowID(const char* path)
+{
+    CCSVItem* item = csvItems.getValue(path);
+    if (!item)
+        return 0; //Should never happen
+
+    if (!item->checkIsNestedItem())
+        return item->getNextRowID();
+
+    unsigned maxRowID = item->getNextRowID();
+    StringBuffer basePath = path;
+    basePath.append("/");
+    StringArray& names = item->getChildrenNames();
+    ForEachItemIn(i, names)
+    {
+        StringBuffer childPath = basePath;
+        childPath.append(names.item(i));
+        unsigned rowID = getChildrenMaxNextRowID(childPath.str());
+        if (rowID > maxRowID)
+            maxRowID = rowID;
+    }
+    return maxRowID;
+}
+
+void CommonCSVWriter::setChildrenNextRowID(const char* path, unsigned rowID)
+{
+    CCSVItem* item = csvItems.getValue(path);
+    if (!item)
+        return;
+
+    if (!item->checkIsNestedItem())
+    {
+        item->setNextRowID(rowID);
+        return;
+    }
+
+    StringArray& names = item->getChildrenNames();
+    ForEachItemIn(i, names)
+    {
+        StringBuffer childPath = path;
+        childPath.append("/").append(names.item(i));
+        CCSVItem* childItem = csvItems.getValue(childPath.str());
+        if (!childItem)
+            continue;
+
+        childItem->setNextRowID(rowID);//for possible new row
+        if (childItem->checkIsNestedItem())
+        {
+            childItem->setRowCount(0);
+            setChildrenNextRowID(childPath.str(), rowID);
+        }
+    }
+}
+
+void CommonCSVWriter::addChildNameToParentCSVItem(const char* name)
+{
+    if (!name || !*name)
+        return;
+
+    if (currentParentXPath.isEmpty())
+        return;
+
+    CCSVItem* item = getParentCSVItem();
+    if (item)
+        item->addChildName(name);
+}
+
+void CommonCSVWriter::addFieldToParentXPath(const char* fieldName)
+{
+    currentParentXPath.append(fieldName).append("/");
+}
+
+void CommonCSVWriter::removeFieldFromCurrentParentXPath(const char* fieldName)
+{
+    unsigned len = strlen(fieldName);
+    if (currentParentXPath.length() > len+1)
+        currentParentXPath.setLength(currentParentXPath.length() - len - 1);
+    else
+        currentParentXPath.setLength(0);
+}
+
+void CommonCSVWriter::outputCSVRows(CIArrayOf<CCSVRow>& rows, bool isHeader)
+{
+    bool firstRow = true;
+    ForEachItemIn(i, rows)
+    {
+        if (firstRow && !isHeader)
+        {
+            out.append(recordCount);
+            firstRow = false;
+        }
+
+        CCSVRow& row = rows.item(i);
+        unsigned len = row.getColumnCount();
+        for (unsigned col = 0; col < len; col++)
+            out.append(options.delimiter.get()).append(row.getColumnValue(col));
+        out.append(options.terminator.get());
+    }
+}
+
+void CommonCSVWriter::finishContentResultRow()
+{
+    recordCount++;
+    outputCSVRows(contentRowsBuffer, false);
+
+    //Prepare for possible next record
+    currentParentXPath.setLength(0);
+    contentRowsBuffer.kill();
+    ForEachItemIn(i, headerXPathList)
+    {
+        const char* path = headerXPathList.item(i);
+        CCSVItem* item = csvItems.getValue(path);
+        if (item)
+            item->clearContentVariables();
+    }
+};
+
+void CCSVRow::setColumn(unsigned columnID, const char* columnName, const char* columnValue)
+{
+    unsigned len = columns.length();
+    if (columnID < len)
+    {
+        CCSVItem& column = columns.item(columnID);
+        if (columnName && *columnName)
+            column.setName(columnName);
+        column.setValue(columnValue);
+    }
+    else
+    {
+        for (unsigned i = len; i <= columnID; i++)
+        {
+            Owned<CCSVItem> column = new CCSVItem();
+            if (i == columnID)
+            {
+                if (columnName && *columnName)
+                    column->setName(columnName);
+                column->setValue(columnValue);
+            }
+            columns.append(*column.getClear());
+        }
+    }
+}
+
+const char* CCSVRow::getColumnValue(unsigned columnID) const
+{
+    if (columnID >= columns.length())
+        return ""; //This should never happens.
+    CCSVItem& column = columns.item(columnID);
+    return column.getValue();
+};
+
 //=====================================================================================
 
 inline void outputEncodedXmlString(unsigned len, const char *field, const char *fieldname, StringBuffer &out)
@@ -705,7 +1385,6 @@ inline void outputEncodedXmlBool(bool field, const char *fieldname, StringBuffer
         out.append(text);
 }
 
-static char thorHelperhexchar[] = "0123456789ABCDEF";
 inline void outputEncodedXmlData(unsigned len, const void *_field, const char *fieldname, StringBuffer &out)
 {
     const unsigned char *field = (const unsigned char *) _field;

+ 220 - 0
common/thorhelper/thorxmlwrite.hpp

@@ -356,4 +356,224 @@ extern thorhelper_decl void printKeyedValues(StringBuffer &out, IIndexReadContex
 extern thorhelper_decl void convertRowToXML(size32_t & lenResult, char * & result, IOutputMetaData & info, const void * row, unsigned flags = (unsigned)-1);
 extern thorhelper_decl void convertRowToJSON(size32_t & lenResult, char * & result, IOutputMetaData & info, const void * row, unsigned flags = (unsigned)-1);
 
+struct CSVOptions
+{
+    StringAttr delimiter, terminator;
+    bool includeHeader;
+};
+
+class CCSVItem : public CInterface, implements IInterface
+{
+    unsigned columnID, nextRowID, rowCount, nestedLayer;
+    StringAttr name, type, value, parentXPath;
+    StringArray childNames;
+    MapStringTo<bool> childNameMap;
+    bool isNestedItem, simpleNested, currentRowEmpty, outputHeader;
+public:
+    CCSVItem() : columnID(0), nestedLayer(0), nextRowID(0), rowCount(0), isNestedItem(false),
+        simpleNested(false), currentRowEmpty(true) { };
+
+    IMPLEMENT_IINTERFACE;
+    inline const char* getName() const { return name.get(); };
+    inline void setName(const char* _name) { name.set(_name); };
+    inline const char* getValue() const { return value.get(); };
+    inline void setValue(const char* _value) { value.set(_value); };
+    inline unsigned getColumnID() const { return columnID; };
+    inline void setColumnID(unsigned _columnID) { columnID = _columnID; };
+
+    inline unsigned getNextRowID() const { return nextRowID; };
+    inline void setNextRowID(unsigned _rowID) { nextRowID = _rowID; };
+    inline void incrementNextRowID() { nextRowID++; };
+    inline unsigned getRowCount() const { return rowCount; };
+    inline void setRowCount(unsigned _rowCount) { rowCount = _rowCount; };
+    inline void incrementRowCount() { rowCount++; };
+    inline bool getCurrentRowEmpty() const { return currentRowEmpty; };
+    inline void setCurrentRowEmpty(bool _currentRowEmpty) { currentRowEmpty = _currentRowEmpty; };
+
+    inline unsigned getNestedLayer() const { return nestedLayer; };
+    inline void setNestedLayer(unsigned _nestedLayer) { nestedLayer = _nestedLayer; };
+    inline bool checkIsNestedItem() const { return isNestedItem; };
+    inline void setIsNestedItem(bool _isNestedItem) { isNestedItem = _isNestedItem; };
+    inline bool checkSimpleNested() const { return simpleNested; };
+    inline void setSimpleNested(bool _simpleNested) { simpleNested = _simpleNested; };
+    inline bool checkOutputHeader() const { return outputHeader; };
+    inline void setOutputHeader(bool _outputHeader) { outputHeader = _outputHeader; };
+    inline const char* getParentXPath() const { return parentXPath.str(); };
+    inline void setParentXPath(const char* _parentXPath) { parentXPath.set(_parentXPath); };
+    inline StringArray& getChildrenNames() { return childNames; };
+    inline void addChildName(const char* name)
+    {
+        if (hasChildName(name))
+            return;
+        childNameMap.setValue(name, true);
+        childNames.append(name);
+    };
+    inline bool hasChildName(const char* name)
+    {
+        bool* found = childNameMap.getValue(name);
+        return (found && *found);
+    };
+    inline void clearContentVariables()
+    {
+        nextRowID = rowCount = 0;
+        currentRowEmpty = true;
+    };
+};
+
+class CCSVRow : public CInterface, implements IInterface
+{
+    unsigned rowID;
+    CIArrayOf<CCSVItem> columns;
+public:
+    CCSVRow(unsigned _rowID) : rowID(_rowID) {};
+    IMPLEMENT_IINTERFACE;
+
+    inline unsigned getRowID() const { return rowID; };
+    inline void setRowID(unsigned _rowID) { rowID = _rowID; };
+    inline unsigned getColumnCount() const { return columns.length(); };
+
+    const char* getColumnValue(unsigned columnID) const;
+    void setColumn(unsigned columnID, const char* columnName, const char* columnValue);
+};
+
+//CommonCSVWriter is used to output a WU result in CSV format.
+//Read CSV header information;
+//If needed, output CSV headers into the 'out' buffer;
+//Read each row (a record) of the WU result and output into the 'out' buffer;
+//The 'out' buffer can be accessed through the str() method.
+class thorhelper_decl CommonCSVWriter: public CInterface, implements IXmlWriterExt
+{
+    class CXPathItem : public CInterface, implements IInterface
+    {
+        bool isArray;
+        StringAttr path;
+    public:
+        CXPathItem(const char* _path, bool _isArray) : path(_path), isArray(_isArray) { };
+
+        IMPLEMENT_IINTERFACE;
+        inline const char* getPath() const { return path.get(); };
+        inline bool getIsArray() const { return isArray; };
+    };
+    CSVOptions options;
+    bool readingCSVHeader, addingSimpleNestedContent;
+    unsigned recordCount, headerColumnID, nestedHeaderLayerID;
+    StringBuffer currentParentXPath, auditOut;
+    StringArray headerXPathList;
+    MapStringTo<bool> topHeaderNameMap;
+    MapStringToMyClass<CCSVItem> csvItems;
+    CIArrayOf<CCSVRow> contentRowsBuffer;
+    CIArrayOf<CXPathItem> dataXPath;//xpath in caller
+
+    void escapeQuoted(unsigned len, char const* in, StringBuffer& out);
+    bool checkHeaderName(const char* name);
+    CCSVItem* getParentCSVItem();
+    CCSVItem* getCSVItemByFieldName(const char* name);
+    void addColumnToRow(CIArrayOf<CCSVRow>& rows, unsigned rowID, unsigned colID, const char* columnValue, const char* columnName);
+    void addCSVHeader(const char* name, const char* type, bool isNested, bool simpleNested, bool outputHeader);
+    void addContentField(const char* field, const char* fieldName);
+    void addStringField(unsigned len, const char* field, const char* fieldName);
+    void setChildrenNextRowID(const char* path, unsigned rowID);
+    unsigned getChildrenMaxNextRowID(const char* path);
+    unsigned getChildrenMaxColumnID(CCSVItem* item, unsigned& maxColumnID);
+    void addChildNameToParentCSVItem(const char* name);
+    void setParentItemRowEmpty(CCSVItem* item, bool empty);
+    void addFieldToParentXPath(const char* fieldName);
+    void removeFieldFromCurrentParentXPath(const char* fieldName);
+    void appendDataXPathItem(const char* fieldName, bool isArray);
+    bool isDataRow(const char* fieldName);
+    void outputCSVRows(CIArrayOf<CCSVRow>& rows, bool isHeader);
+    void outputHeadersToBuffer();
+    void finishContentResultRow();
+
+    void auditHeaderInfo()
+    {
+        ForEachItemIn(i, headerXPathList)
+        {
+            const char* path = headerXPathList.item(i);
+            CCSVItem* item = csvItems.getValue(path);
+            if (!item)
+                continue;
+            if (!item->checkIsNestedItem())
+            {
+                auditOut.appendf("dumpHeaderInfo path<%s> next row<%d> col<%d>: name<%s> - value<%s>\n", path, item->getNextRowID(),
+                    item->getColumnID(), item->getName() ? item->getName() : "", item->getValue() ? item->getValue() : "");
+            }
+            else
+            {
+                auditOut.appendf("dumpHeaderInfo path<%s> next row<%d> col<%d>: name<%s> - value<%s>\n", path, item->getNextRowID(),
+                    item->getColumnID(), item->getName() ? item->getName() : "", item->getValue() ? item->getValue() : "");
+            }
+        }
+    }
+
+public:
+    CommonCSVWriter(unsigned _flags, CSVOptions& _options, IXmlStreamFlusher* _flusher = NULL);
+    ~CommonCSVWriter();
+
+    IMPLEMENT_IINTERFACE;
+
+    inline void flush(bool isClose)
+    {
+        if (flusher)
+            flusher->flushXML(out, isClose);
+    }
+
+    virtual unsigned length() const { return out.length(); }
+    virtual const char* str() const { return out.str(); }
+    virtual void rewindTo(IInterface* location) { };
+    virtual IInterface* saveLocation() const
+    {
+        if (flusher)
+            throwUnexpected();
+        return NULL;
+    };
+
+    //IXmlWriter
+    virtual void outputString(unsigned len, const char* field, const char* fieldName);
+    virtual void outputBool(bool field, const char* fieldName);
+    virtual void outputData(unsigned len, const void* field, const char* fieldName);
+    virtual void outputInt(__int64 field, unsigned size, const char* fieldName);
+    virtual void outputUInt(unsigned __int64 field, unsigned size, const char* fieldName);
+    virtual void outputReal(double field, const char *fieldName);
+    virtual void outputDecimal(const void* field, unsigned size, unsigned precision, const char* fieldName);
+    virtual void outputUDecimal(const void* field, unsigned size, unsigned precision, const char* fieldName);
+    virtual void outputUnicode(unsigned len, const UChar* field, const char* fieldName);
+    virtual void outputQString(unsigned len, const char* field, const char* fieldName);
+    virtual void outputUtf8(unsigned len, const char* field, const char* fieldName);
+    virtual void outputBeginNested(const char* fieldName, bool simpleNested);
+    virtual void outputEndNested(const char* fieldName);
+    virtual void outputBeginDataset(const char* dsname, bool nestChildren);
+    virtual void outputEndDataset(const char* dsname);
+    virtual void outputBeginArray(const char* fieldName);
+    virtual void outputEndArray(const char* fieldName);
+    virtual void outputSetAll() { };
+    virtual void outputXmlns(const char* name, const char* uri) { };
+    virtual void outputQuoted(const char* text)
+    {
+        //No fieldName. Is it valid for CSV?
+    };
+    virtual void outputInlineXml(const char* text)//for appending raw xml content
+    {
+        //Dynamically add a new header 'xml' and insert the header.
+        //But, not sure we want to do that for a big WU result.
+        //if (text && *text)
+          //outputUtf8(strlen(text), text, "xml");
+    };
+
+    //IXmlWriterExt
+    virtual void outputNumericString(const char* field, const char* fieldName);
+    virtual IXmlWriterExt& clear();
+
+    void outputBeginNested(const char* fieldName, bool simpleNested, bool outputHeader);
+    void outputEndNested(const char* fieldName, bool outputHeader);
+    void outputCSVHeader(const char* name, const char* type);
+    void finishCSVHeaders();
+    const char* auditStr() const { return auditOut.str(); }
+
+protected:
+    IXmlStreamFlusher* flusher;
+    StringBuffer out;
+    unsigned flags;
+};
+
 #endif // THORXMLWRITE_HPP

+ 2 - 1
esp/scm/esp.ecm

@@ -51,7 +51,8 @@ typedef enum ESPSerializationFormat_
 {
     ESPSerializationANY,
     ESPSerializationXML,
-    ESPSerializationJSON
+    ESPSerializationJSON,
+    ESPSerializationCSV
 } ESPSerializationFormat;
 
 #define ESPCTX_NO_NAMESPACES    0x00000001

+ 2 - 0
esp/services/ws_workunits/CMakeLists.txt

@@ -63,6 +63,7 @@ include_directories (
          ./../../../system/security/securesocket
          ./../../../system/security/shared
          ./../../../system/include
+         ./../../../common/thorhelper
          ./../../../common/workunit
          ./../../../common/wuwebview
          ./../../../ecl/schedulectrl
@@ -106,6 +107,7 @@ target_link_libraries ( ws_workunits
          hql
          jhtree
          fileview2
+         thorhelper
     )
 
 IF (USE_OPENSSL)

+ 158 - 2
esp/services/ws_workunits/ws_workunitsService.cpp

@@ -40,6 +40,11 @@
 #include "thorplugin.hpp"
 #include "roxiecontrol.hpp"
 
+#include "deftype.hpp"
+#include "thorxmlwrite.hpp"
+#include "fvdatasource.hpp"
+#include "fvresultset.ipp"
+
 #include "package.h"
 
 #ifdef _USE_ZLIB
@@ -2470,6 +2475,146 @@ bool CWsWorkunitsEx::onWULightWeightQuery(IEspContext &context, IEspWULightWeigh
     return true;
 }
 
+ITypeInfo * containsSingleSimpleFieldBlankXPath(IResultSetMetaData * meta)
+{
+    if (meta->getColumnCount() != 1)
+        return NULL;
+
+    CResultSetMetaData * castMeta = static_cast<CResultSetMetaData *>(meta);
+    const char * xpath = castMeta->queryXPath(0);
+    if (xpath && (*xpath == 0))
+    {
+        return castMeta->queryType(0);
+    }
+    return NULL;
+}
+
+void csvSplitXPath(const char *xpath, StringBuffer &s, const char *&name, const char **childname=NULL)
+{
+    if (!xpath)
+        return;
+    const char * slash = strchr(xpath, '/');
+    if (!slash)
+    {
+        name = xpath;
+        if (childname)
+            *childname = NULL;
+    }
+    else
+    {
+        if (!childname || strchr(slash+1, '/')) //output ignores xpaths that are too deep
+            return;
+        name = s.clear().append(slash-xpath, xpath).str();
+        *childname = slash+1;
+    }
+}
+
+void getCSVHeaders(const IResultSetMetaData& metaIn, CommonCSVWriter* writer, unsigned& layer)
+{
+    StringBuffer xname;
+    const CResultSetMetaData& cMeta = static_cast<const CResultSetMetaData &>(metaIn);
+    IFvDataSourceMetaData* meta = cMeta.getMeta();
+
+    int columnCount = metaIn.getColumnCount();
+    for (unsigned idx = 0; idx < columnCount; idx++)
+    {
+        const CResultSetColumnInfo& column = cMeta.getColumn(idx);
+        unsigned flag = column.flag;
+        const char * name = meta->queryName(idx);
+        const char * childname = NULL;
+        switch (flag)
+        {
+        case FVFFbeginif:
+        case FVFFendif:
+            break;
+        case FVFFbeginrecord:
+            csvSplitXPath(meta->queryXPath(idx), xname, name);
+            writer->outputBeginNested(name, false, true);
+            break;
+        case FVFFendrecord:
+            csvSplitXPath(meta->queryXPath(idx), xname, name);
+            writer->outputEndNested(name, true);
+            break;
+        case FVFFdataset:
+            {
+                childname = "Row";
+                csvSplitXPath(meta->queryXPath(idx), xname, name, &childname);
+                ITypeInfo* singleFieldType = (name && *name && childname && *childname)
+                    ? containsSingleSimpleFieldBlankXPath(column.childMeta.get()) : NULL;
+                if (!singleFieldType)
+                {
+                    bool nameValid = (name && *name);
+                    if (nameValid || (childname && *childname))
+                    {
+                        if (nameValid)
+                            writer->outputBeginNested(name, false, true);
+                        if (childname && *childname)
+                            writer->outputBeginNested(childname, false, !nameValid);
+
+                        const CResultSetMetaData *childMeta = static_cast<const CResultSetMetaData *>(column.childMeta.get());
+                        getCSVHeaders(*childMeta, writer, ++layer);
+                        layer--;
+
+                        if (childname && *childname)
+                            writer->outputEndNested(childname, !nameValid);
+                        if (nameValid)
+                            writer->outputEndNested(name, true);
+                    }
+                }
+                break;
+            }
+        case FVFFblob: //for now FileViewer will output the string "[blob]"
+            {
+                Owned<ITypeInfo> stringType = makeStringType(UNKNOWN_LENGTH, NULL, NULL);
+                csvSplitXPath(meta->queryXPath(idx), xname, name);
+
+                StringBuffer eclTypeName;
+                stringType->getECLType(eclTypeName);
+                writer->outputCSVHeader(name, eclTypeName.str());
+            }
+            break;
+        default:
+            {
+                ITypeInfo & type = *column.type;
+                if (type.getTypeCode() == type_set)
+                {
+                    childname = "Item";
+                    csvSplitXPath(meta->queryXPath(idx), xname, name, &childname);
+                    writer->outputBeginNested(name, true, true);
+                    writer->outputEndNested(name, true);
+                }
+                else
+                {
+                    csvSplitXPath(meta->queryXPath(idx), xname, name);
+
+                    StringBuffer eclTypeName;
+                    type.getECLType(eclTypeName);
+                    writer->outputCSVHeader(name, eclTypeName.str());
+                }
+                break;
+            }
+        }
+    }
+}
+
+unsigned getResultCSV(IStringVal& ret, INewResultSet* result, const char* name, __int64 start, unsigned& count)
+{
+    unsigned headerLayer = 0;
+    CSVOptions csvOptions;
+    csvOptions.delimiter.set(",");
+    csvOptions.terminator.set("\n");
+    csvOptions.includeHeader = true;
+    Owned<CommonCSVWriter> writer = new CommonCSVWriter(XWFtrim, csvOptions);
+    Owned<IResultSetCursor> cursor = result->createCursor();
+    const IResultSetMetaData & meta = cursor->queryResultSet()->getMetaData();
+    getCSVHeaders(meta, writer, headerLayer);
+    writer->finishCSVHeaders();
+
+    count = writeResultCursorXml(*writer, cursor, name, start, count, NULL);
+    ret.set(writer->str());
+    return count;
+}
+
 void appendResultSet(MemoryBuffer& mb, INewResultSet* result, const char *name, __int64 start, unsigned& count, __int64& total, bool bin, bool xsd, ESPSerializationFormat fmt, const IProperties *xmlns)
 {
     if (!result)
@@ -2495,7 +2640,9 @@ void appendResultSet(MemoryBuffer& mb, INewResultSet* result, const char *name,
             MemoryBuffer & buffer;
         } adaptor(mb);
 
-        if (fmt==ESPSerializationJSON)
+        if (fmt==ESPSerializationCSV)
+            count = getResultCSV(adaptor, result, name, (unsigned) start, count);
+        else if (fmt==ESPSerializationJSON)
             count = getResultJSON(adaptor, result, name, (unsigned) start, count, (xsd) ? "myschema" : NULL);
         else
             count = getResultXml(adaptor, result, name, (unsigned) start, count, (xsd) ? "myschema" : NULL, xmlns);
@@ -2840,6 +2987,9 @@ bool CWsWorkunitsEx::onWUResultBin(IEspContext &context,IEspWUResultBinRequest &
         IArrayOf<IConstNamedValue>* filterBy = &req.getFilterBy();
         SCMStringBuffer name;
 
+        if(strieq(req.getFormat(),"csv"))
+            context.setResponseFormat(ESPSerializationCSV);
+
         WUState wuState = WUStateUnknown;
         bool bin = (req.getFormat() && strieq(req.getFormat(),"raw"));
         if (notEmpty(wuidIn) && notEmpty(req.getResultName()))
@@ -2865,7 +3015,13 @@ bool CWsWorkunitsEx::onWUResultBin(IEspContext &context,IEspWUResultBinRequest &
         else
             throw MakeStringException(ECLWATCH_CANNOT_GET_WU_RESULT,"Cannot open the workunit result.");
 
-        if(stricmp(req.getFormat(),"xls")==0)
+        if(strieq(req.getFormat(),"csv"))
+        {
+            resp.setResult(mb);
+            resp.setResult_mimetype("text/csv");
+            context.addCustomerHeader("Content-disposition", "attachment;filename=WUResult.csv");
+        }
+        else if(stricmp(req.getFormat(),"xls")==0)
         {
             Owned<IProperties> params(createProperties());
             params->setProp("showCount",0);