Explorar el Código

Merge pull request #713 from ghalliday/issue610

Fix set of string problems and move code to plugin

Reviewed-By: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman hace 13 años
padre
commit
b19ae9b9ab

+ 12 - 0
ecl/hql/hqlutil.cpp

@@ -7213,6 +7213,7 @@ bool ConstantRowCreator::processElement(IHqlExpression * expr, IHqlExpression *
     }
 }
 
+
 bool ConstantRowCreator::processRecord(IHqlExpression * record, IHqlExpression * parentSelector)
 {
     ForEachChild(idx, record)
@@ -7366,3 +7367,14 @@ IHqlExpression * convertAttributeToQuery(IHqlExpression * expr, HqlLookupContext
         query.set(transformed);
     }
 }
+
+bool isSetWithUnknownElementSize(ITypeInfo * type)
+{
+    switch (type->getTypeCode())
+    {
+    case type_set:
+    case type_array:
+        return isUnknownSize(type->queryChildType());
+    }
+    return false;
+}

+ 1 - 0
ecl/hql/hqlutil.hpp

@@ -619,6 +619,7 @@ extern HQL_API bool createConstantNullRow(MemoryBuffer & target, IHqlExpression
 extern HQL_API IHqlExpression * createConstantRowExpr(IHqlExpression * transform);
 extern HQL_API IHqlExpression * createConstantNullRowExpr(IHqlExpression * record);
 extern HQL_API IHqlExpression * ensureOwned(IHqlExpression * expr);
+extern HQL_API bool isSetWithUnknownElementSize(ITypeInfo * type);
 
 //In hqlgram2.cpp
 extern HQL_API IPropertyTree * queryEnsureArchiveModule(IPropertyTree * archive, const char * name, IHqlScope * rScope);

+ 69 - 25
ecl/hqlcpp/hqlcpp.cpp

@@ -5844,6 +5844,12 @@ void HqlCppTranslator::doBuildCall(BuildCtx & ctx, const CHqlBoundTarget * tgt,
                     buildExprEnsureType(ctx, castParam, bound, argType);
                 else
                     buildExpr(ctx, castParam, bound);
+
+                if (isUnknownSize(elemType))
+                {
+                    ITypeInfo * boundElemType = bound.queryType()->queryChildType();
+                    assertex(!boundElemType || isUnknownSize(boundElemType));
+                }
                 normalizeBoundExpr(ctx, bound);
                 break;
             }
@@ -7232,7 +7238,7 @@ void HqlCppTranslator::doBuildAssignIndex(BuildCtx & ctx, const CHqlBoundTarget
 //---------------------------------------------------------------------------
 //-- no_list --
 
-bool isComplexSet(ITypeInfo * type)
+bool isComplexSet(ITypeInfo * type, bool isConstant)
 {
     ITypeInfo * childType = type->queryChildType();
     if (!childType)
@@ -7247,7 +7253,7 @@ bool isComplexSet(ITypeInfo * type)
     case type_unicode:
     case type_varstring:
     case type_varunicode:
-        return (childType->getSize() == UNKNOWN_LENGTH);
+        return isUnknownSize(childType) && !isConstant;
     case type_utf8:
     case type_swapint:
     case type_packedint:
@@ -7265,7 +7271,7 @@ bool isComplexSet(ITypeInfo * type)
 
 bool isComplexSet(IHqlExpression * expr)
 {
-    return isComplexSet(expr->queryType());
+    return isComplexSet(expr->queryType(), isConstantSet(expr));
 }
 
 bool isConstantSet(IHqlExpression * expr)
@@ -7282,6 +7288,30 @@ bool isConstantSet(IHqlExpression * expr)
 }
 
 
+bool createUnknownLengthStringSet(MemoryBuffer & target, IHqlExpression * set)
+{
+    ITypeInfo * elemType = set->queryType()->queryChildType();
+    type_t tc = elemType->getTypeCode();
+
+    ForEachChild(i, set)
+    {
+        IHqlExpression * cur = set->queryChild(i);
+        IValue * curValue = cur->queryValue();
+        if (!curValue)
+            return false;
+
+        size32_t sizeValue = curValue->getSize();
+        if ((tc != type_varstring) && (tc != type_varunicode))
+        {
+            size32_t lenValue = curValue->queryType()->getStringLen();
+            rtlWriteInt4(target.reserve(sizeof(size32_t)), lenValue);
+        }
+        curValue->toMem(target.reserve(sizeValue));
+    }
+    return true;
+}
+
+
 void HqlCppTranslator::doBuildExprConstList(BuildCtx & ctx, IHqlExpression * expr, CHqlBoundExpr & tgt)
 {
     BuildCtx declareCtx(*code, literalAtom);
@@ -7293,11 +7323,29 @@ void HqlCppTranslator::doBuildExprConstList(BuildCtx & ctx, IHqlExpression * exp
         if (!elementType)
             throwError(HQLERR_NullSetCannotGenerate);
         Owned<ITypeInfo> transferType;
-
+        OwnedHqlExpr table;
+        OwnedHqlExpr declare;
+        unsigned numElements = expr->numChildren();
         LinkedHqlExpr values = expr;
-        if ((isTypePassedByAddress(elementType) && (elementType->getTypeCode() != type_varstring)))
+        if ((isTypePassedByAddress(elementType) && ((elementType->getTypeCode() != type_varstring) || isUnknownSize(elementType))))
         {
-            if (elementType->isReference())
+            if (isUnknownSize(elementType))
+            {
+                MemoryBuffer serialized;
+                bool ok = createUnknownLengthStringSet(serialized, values);
+                assertex(ok);
+                Owned<IValue> value = createDataValue(serialized.toByteArray(), serialized.length());
+                OwnedHqlExpr constValue = createConstant(LINK(value));
+                OwnedHqlExpr initializer = createValue(no_create_initializer, constValue->getType(), LINK(constValue));
+
+                Owned<ITypeInfo> declareType = makeConstantModifier(constValue->getType());
+                declare.setown(declareCtx.getTempDeclare(declareType, initializer));
+
+                ITypeInfo * arrayType = makeReferenceModifier(makeSetType(LINK(elementType)));
+                table.setown(createValue(no_typetransfer, arrayType, LINK(declare)));
+                tgt.length.setown(getSizetConstant(serialized.length()));
+            }
+            else if (elementType->isReference())
             {
                 // use a var string type to get better C++ generated...
                 transferType.set(elementType);
@@ -7317,18 +7365,20 @@ void HqlCppTranslator::doBuildExprConstList(BuildCtx & ctx, IHqlExpression * exp
             }
         }
 
-        unsigned numElements = expr->numChildren();
-        Owned<ITypeInfo> t = makeConstantModifier(makeArrayType(LINK(elementType), numElements));
-        IHqlExpression * table = declareCtx.getTempDeclare(t, values);
-
-        if (transferType)
+        if (!declare)
         {
-            ITypeInfo * arrayType = makeArrayType(LINK(transferType), numElements);
-            table = createValue(no_typetransfer, arrayType, table);
+            Owned<ITypeInfo> t = makeConstantModifier(makeArrayType(LINK(elementType), numElements));
+            declare.setown(declareCtx.getTempDeclare(t, values));
+
+            if (transferType)
+            {
+                ITypeInfo * arrayType = makeArrayType(LINK(transferType), numElements);
+                table.setown(createValue(no_typetransfer, arrayType, LINK(declare)));
+            }
         }
 
         tgt.count.setown(getSizetConstant(numElements));
-        tgt.expr.setown(table);
+        tgt.expr.set(table ? table : declare);
 
         //make sure tables get added before any global functions
         declareCtx.associateExpr(expr, tgt);
@@ -7336,7 +7386,7 @@ void HqlCppTranslator::doBuildExprConstList(BuildCtx & ctx, IHqlExpression * exp
         if (options.spanMultipleCpp)
         {
             BuildCtx protoctx(*code, mainprototypesAtom);
-            protoctx.addDeclareExternal(table);
+            protoctx.addDeclareExternal(declare);
         }
     }
 }
@@ -7370,9 +7420,8 @@ void HqlCppTranslator::doBuildExprDynList(BuildCtx & ctx, IHqlExpression * expr,
     }
 }
 
-void HqlCppTranslator::doBuildExprList(BuildCtx & ctx, IHqlExpression * _expr, CHqlBoundExpr & tgt)
+void HqlCppTranslator::doBuildExprList(BuildCtx & ctx, IHqlExpression * expr, CHqlBoundExpr & tgt)
 {
-    OwnedHqlExpr expr = simplifyFixedLengthList(_expr);
     ITypeInfo * type = expr->queryType();
     switch (type->getTypeCode())
     {
@@ -7394,12 +7443,6 @@ void HqlCppTranslator::doBuildExprList(BuildCtx & ctx, IHqlExpression * _expr, C
                 buildTempExpr(ctx, expr, tgt);
                 return;
             }
-            else if (childType->getSize() == 0)
-            {
-                //codes := [''] convert it to a set of a single char string for the moment.
-                Owned<ITypeInfo> newType = makeSetType(getStretchedType(1, childType));
-                values.setown(ensureExprType(expr, newType));
-            }
 
             if (isConstantSet(expr))
                 doBuildExprConstList(ctx, values, tgt);
@@ -7455,8 +7498,9 @@ void HqlCppTranslator::doBuildAssignList(BuildCtx & ctx, const CHqlBoundTarget &
     else
     {
         OwnedHqlExpr cast = ensureExprType(expr, target.queryType());
+        OwnedHqlExpr simpleCast = simplifyFixedLengthList(cast);
         // can do a direct assignment without any casts
-        doBuildExprAssign(ctx, target, cast);
+        doBuildExprAssign(ctx, target, simpleCast);
     }
 }
 void HqlCppTranslator::doBuildExprAll(BuildCtx & ctx, IHqlExpression * expr, CHqlBoundExpr & tgt)
@@ -8914,7 +8958,7 @@ void HqlCppTranslator::doBuildExprOrdered(BuildCtx & ctx, IHqlExpression * expr,
         return;
 
     bool ascending = true;
-    IHqlExpression * list = expr->queryChild(0);
+    OwnedHqlExpr list = simplifyFixedLengthList(expr->queryChild(0));
     IHqlExpression * attr = expr->queryChild(1);
     if (attr && attr->isAttribute() && (attr->queryName() == descAtom))
         ascending = false;

+ 1 - 1
ecl/hqlcpp/hqlcpp.ipp

@@ -1970,7 +1970,7 @@ extern HQLCPP_API IHqlExpression * getPointer(IHqlExpression * source);
 extern IHqlExpression * adjustIndexBaseToZero(IHqlExpression * index);
 extern IHqlExpression * adjustIndexBaseToOne(IHqlExpression * index);
 extern IHqlExpression * multiplyValue(IHqlExpression * expr, unsigned __int64 value);
-extern bool isComplexSet(ITypeInfo * type);
+extern bool isComplexSet(ITypeInfo * type, bool isConstant);
 extern bool isComplexSet(IHqlExpression * expr);
 extern bool isConstantSet(IHqlExpression * expr);
 

+ 1 - 1
ecl/hqlcpp/hqlhtcpp.cpp

@@ -5032,7 +5032,7 @@ void HqlCppTranslator::buildSetResultInfo(BuildCtx & ctx, IHqlExpression * origi
         if (options.spotCSE)
             cseValue.setown(spotScalarCSE(cseValue));
 
-        if ((retType == type_set) && isComplexSet(resultType) && castValue->getOperator() == no_list && !isNullList(castValue))
+        if ((retType == type_set) && isComplexSet(resultType, false) && castValue->getOperator() == no_list && !isNullList(castValue))
         {
             CHqlBoundTarget tempTarget;
             createTempFor(ctx, resultType, tempTarget, typemod_none, FormatBlockedDataset);

+ 2 - 0
ecl/hqlcpp/hqlstmt.cpp

@@ -210,6 +210,7 @@ IHqlStmt * BuildCtx::addContinue()
 
 IHqlStmt * BuildCtx::addDeclare(IHqlExpression * name, IHqlExpression * value)
 {
+    assertex(name->getOperator() == no_variable);
     if (ignoreInput)
         return NULL;
     HqlStmt * next = new HqlStmt(declare_stmt, curStmts);
@@ -224,6 +225,7 @@ IHqlStmt * BuildCtx::addDeclare(IHqlExpression * name, IHqlExpression * value)
 
 IHqlStmt * BuildCtx::addDeclareExternal(IHqlExpression * name)
 {
+    assertex(name->getOperator() == no_variable);
     if (ignoreInput)
         return NULL;
     HqlStmt * next = new HqlStmt(external_stmt, curStmts);

+ 16 - 133
ecllibrary/std/Str.ecl

@@ -230,7 +230,7 @@ export BOOLEAN EndsWith(STRING src, STRING suffix) := src[LENGTH(TRIM(src))-LENG
  * @param src           The string being searched in.
  * @param suffix        The prefix to search for.
  */
-export STRING RemoveSuffix(STRING src, STRING suffix) :=
+EXPORT STRING RemoveSuffix(STRING src, STRING suffix) :=
             IF(EndsWith(src, suffix), src[1..length(trim(src))-length(trim(suffix))], src);
 
 
@@ -242,139 +242,10 @@ EXPORT STRING ExtractMultiple(string src, unsigned8 mask) := lib_stringlib.Strin
  * 
  * @param src           The string being searched in.
  * @param separator     The string used to separate words
+ * @param allow_blank   Indicates if empty/blank string items are included in the results.
  */
 
-export UNSIGNED4 CountWords(STRING src, STRING separator) := BEGINC++
-    if (lenSrc == 0)
-        return 0;
-            
-    if ((lenSeparator == 0) || (lenSrc < lenSeparator))
-        return 1;
-    
-    unsigned numWords=0;
-    const char * end = src + lenSrc;
-    const char * max = end - (lenSeparator - 1);
-    const char * cur = src;
-    const char * startWord = NULL;
-    //MORE: optimize lenSeparator == 1!
-    while (cur < max)
-    {
-        if (memcmp(cur, separator, lenSeparator) == 0)
-        {
-            if (startWord)
-            {
-                numWords++;
-                startWord = NULL;
-            }
-            cur += lenSeparator;
-        }
-        else
-        {
-            if (!startWord)
-                startWord = cur;
-            cur++;
-        }
-    }
-    if (startWord || (cur != end))
-        numWords++;
-    return numWords;
-ENDC++;
-
-
-SHARED UNSIGNED4 calcWordSetSize(STRING src, STRING separator) := BEGINC++
-    if (lenSrc == 0)
-        return 0;
-            
-    if ((lenSeparator == 0) || (lenSrc < lenSeparator))
-        return sizeof(size32_t) + lenSrc;
-    
-    unsigned sizeWords=0;
-    const char * end = src + lenSrc;
-    const char * max = end - (lenSeparator - 1);
-    const char * cur = src;
-    const char * startWord = NULL;
-    //MORE: optimize lenSeparator == 1!
-    while (cur < max)
-    {
-        if (memcmp(cur, separator, lenSeparator) == 0)
-        {
-            if (startWord)
-            {
-                sizeWords += sizeof(size32_t) + (cur - startWord);
-                startWord = NULL;
-            }
-            cur += lenSeparator;
-        }
-        else
-        {
-            if (!startWord)
-                startWord = cur;
-            cur++;
-        }
-    }
-    if (startWord || (cur != end))
-    {
-        if (!startWord)
-            startWord = cur;
-        sizeWords += sizeof(size32_t) + (end - startWord);
-    }
-    return sizeWords;
-ENDC++;
-
-
-//Should be moved into the stringlib helper dll + single character case optimized.
-SHARED SET OF STRING doSplitWords(STRING src, STRING separator, unsigned calculatedSize) := BEGINC++
-    char * result = static_cast<char *>(rtlMalloc(calculatedsize));
-    __isAllResult = false;
-    __lenResult = calculatedsize;
-    __result = result;
-    
-    if (lenSrc == 0)
-        return;
-            
-    if ((lenSeparator == 0) || (lenSrc < lenSeparator))
-    {
-        rtlWriteSize32t(result, lenSrc);
-        memcpy(result+sizeof(size32_t), src, lenSrc);
-        return;
-    }
-    
-    unsigned sizeWords=0;
-    const char * end = src + lenSrc;
-    const char * max = end - (lenSeparator - 1);
-    const char * cur = src;
-    const char * startWord = NULL;
-    //MORE: optimize lenSeparator == 1!
-    while (cur < max)
-    {
-        if (memcmp(cur, separator, lenSeparator) == 0)
-        {
-            if (startWord)
-            {
-                size32_t len = (cur - startWord);
-                rtlWriteSize32t(result, len);
-                memcpy(result+sizeof(size32_t), startWord, len);
-                result += sizeof(size32_t) + len;
-                startWord = NULL;
-            }
-            cur += lenSeparator;
-        }
-        else
-        {
-            if (!startWord)
-                startWord = cur;
-            cur++;
-        }
-    }
-    if (startWord || (cur != end))
-    {
-        if (!startWord)
-            startWord = cur;
-        size32_t len = (end - startWord);
-        rtlWriteSize32t(result, len);
-        memcpy(result+sizeof(size32_t), startWord, len);
-    }
-ENDC++;
+EXPORT UNSIGNED4 CountWords(STRING src, STRING separator, BOOLEAN allow_blank = FALSE) := lib_stringlib.StringLib.CountWords(src, separator, allow_blank);
 
 /**
  * Returns the list of words extracted from the string.  Words are separated by one or more separator strings. No 
@@ -382,9 +253,21 @@ ENDC++;
  * 
  * @param src           The string being searched in.
  * @param separator     The string used to separate words
+ * @param allow_blank   Indicates if empty/blank string items are included in the results.
  */
  
-EXPORT SET OF STRING SplitWords(STRING src, STRING separator) := doSplitWords(src, separator, calcWordSetSize(src, separator));
+EXPORT SET OF STRING SplitWords(STRING src, STRING separator, BOOLEAN allow_blank = FALSE) := lib_stringlib.StringLib.SplitWords(src, separator, allow_blank);
+
+
+/**
+ * Returns the list of words extracted from the string.  Words are separated by one or more separator strings. No
+ * spaces are stripped from either string before matching.
+ *
+ * @param words         The set of strings to be combined
+ * @param separator     The string used to separate words
+ */
+
+EXPORT STRING CombineWords(SET OF STRING words, STRING separator) := lib_stringlib.StringLib.CombineWords(words, separator);
 
 
 /**

+ 16 - 0
ecllibrary/teststd/str/TestCombineWords.ecl

@@ -0,0 +1,16 @@
+/*##############################################################################
+## Copyright (c) 2011 HPCC Systems.  All rights reserved.
+############################################################################## */
+
+IMPORT Std.Str;
+
+EXPORT TestCombineWords := MODULE
+  EXPORT TestRuntime := MODULE
+    EXPORT Test01 := ASSERT(Str.CombineWords([],',') = '');
+    EXPORT Test02 := ASSERT(Str.CombineWords(['x'],',') = 'x');
+    EXPORT Test03 := ASSERT(Str.CombineWords(['x','y'],',') = 'x,y');
+    EXPORT Test04 := ASSERT(Str.CombineWords(['',''],',') = ',');
+    EXPORT Test05 := ASSERT(Str.CombineWords(['',''],'') = '');
+    EXPORT Test06 := ASSERT(Str.CombineWords(['abc','def','ghi'],'') = 'abcdefghi');
+  END;
+END;

+ 3 - 0
ecllibrary/teststd/str/TestCountWords.ecl

@@ -27,6 +27,9 @@ EXPORT TestCountWords := MODULE
     EXPORT Test18 := ASSERT(Str.CountWords('$$x$$', '$$') = 1);
     EXPORT Test19 := ASSERT(Str.CountWords('$$x$$y', '$$') = 2);
     EXPORT Test20 := ASSERT(Str.CountWords('$$x$$xy', '$$') = 2);
+    EXPORT Test21 := ASSERT(Str.CountWords('a,c,d', ',', TRUE) = 3);
+    EXPORT Test22 := ASSERT(Str.CountWords('a,,d', ',', TRUE) = 3);
+    EXPORT Test23 := ASSERT(Str.CountWords(',,,', ',', TRUE) = 4);
   END;
 
 END;

+ 22 - 20
ecllibrary/teststd/str/TestSplitWords.ecl

@@ -7,26 +7,28 @@ IMPORT Std.Str;
 EXPORT TestSplitWords := MODULE
 
   EXPORT TestRuntime := MODULE
-    EXPORT Test01 := ASSERT(Str.SplitWords('', '') = global([]));
-    EXPORT Test02 := ASSERT(Str.SplitWords('x', '') = global(['x']));
-    EXPORT Test03 := ASSERT(Str.SplitWords('x', ' ') = global(['x']));
-    EXPORT Test04 := ASSERT(Str.SplitWords(' ', ' ') = global([]));
-    EXPORT Test05 := ASSERT(Str.SplitWords('  ', ' ') = global([]));
-    EXPORT Test06 := ASSERT(Str.SplitWords('x ', ' ') = global(['x']));
-    EXPORT Test07 := ASSERT(Str.SplitWords(' x', ' ') = global(['x']));
-    EXPORT Test08 := ASSERT(Str.SplitWords(' x ', ' ') = global(['x']));
-    EXPORT Test09 := ASSERT(Str.SplitWords(' abc def ', ' ') = global(['abc','def']));
-    EXPORT Test10 := ASSERT(Str.SplitWords(' abc   def ', ' ') = global(['abc','def']));
-    EXPORT Test11 := ASSERT(Str.SplitWords(' a b c   def ', ' ') = global(['a','b','c','def']));
-    EXPORT Test12 := ASSERT(Str.SplitWords(' abc   def', ' ') = global(['abc','def']));
-    EXPORT Test13 := ASSERT(Str.SplitWords('$', '$$') = global(['$']));
-    EXPORT Test14 := ASSERT(Str.SplitWords('$x', '$$') = global(['$x']));
-    EXPORT Test15 := ASSERT(Str.SplitWords('$$', '$$') = global([]));
-    EXPORT Test16 := ASSERT(Str.SplitWords('$$$', '$$') = global(['$']));
-    EXPORT Test17 := ASSERT(Str.SplitWords('$$$$', '$$') = global([]));
-    EXPORT Test18 := ASSERT(Str.SplitWords('$$x$$', '$$') = global(['x']));
-    EXPORT Test19 := ASSERT(Str.SplitWords('$$x$$y', '$$') = global(['x','y']));
-    EXPORT Test20 := ASSERT(Str.SplitWords('$$x$$xy', '$$') = global(['x','xy']));
+    EXPORT Test01 := ASSERT(Str.SplitWords('', '') = []);
+    EXPORT Test02 := ASSERT(Str.SplitWords('x', '') = ['x']);
+    EXPORT Test03 := ASSERT(Str.SplitWords('x', ' ') = ['x']);
+    EXPORT Test04 := ASSERT(Str.SplitWords(' ', ' ') = []);
+    EXPORT Test05 := ASSERT(Str.SplitWords('  ', ' ') = []);
+    EXPORT Test06 := ASSERT(Str.SplitWords('x ', ' ') = ['x']);
+    EXPORT Test07 := ASSERT(Str.SplitWords(' x', ' ') = ['x']);
+    EXPORT Test08 := ASSERT(Str.SplitWords(' x ', ' ') = ['x']);
+    EXPORT Test09 := ASSERT(Str.SplitWords(' abc def ', ' ') = ['abc','def']);
+    EXPORT Test10 := ASSERT(Str.SplitWords(' abc   def ', ' ') = ['abc','def']);
+    EXPORT Test11 := ASSERT(Str.SplitWords(' a b c   def ', ' ') = ['a','b','c','def']);
+    EXPORT Test12 := ASSERT(Str.SplitWords(' abc   def', ' ') = ['abc','def']);
+    EXPORT Test13 := ASSERT(Str.SplitWords('$', '$$') = ['$']);
+    EXPORT Test14 := ASSERT(Str.SplitWords('$x', '$$') = ['$x']);
+    EXPORT Test15 := ASSERT(Str.SplitWords('$$', '$$') = []);
+    EXPORT Test16 := ASSERT(Str.SplitWords('$$$', '$$') = ['$']);
+    EXPORT Test17 := ASSERT(Str.SplitWords('$$$$', '$$') = []);
+    EXPORT Test18 := ASSERT(Str.SplitWords('$$x$$', '$$') = ['x']);
+    EXPORT Test19 := ASSERT(Str.SplitWords('$$x$$y', '$$') = ['x','y']);
+    EXPORT Test20 := ASSERT(Str.SplitWords('$$x$$xy', '$$') = ['x','xy']);
+    EXPORT Test21 := ASSERT(Str.SplitWords('$$x$$xy', '$$', TRUE) = ['','x','xy']);
+    EXPORT Test22 := ASSERT(Str.SplitWords('$$$$', '$$',TRUE) = ['','','']);
   END;
 
 END;

+ 202 - 1
plugins/stringlib/stringlib.cpp

@@ -20,6 +20,8 @@
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
+#include <assert.h>
+
 #include "stringlib.hpp"
 #include "wildmatch.tpp"
 
@@ -32,9 +34,10 @@ static const char * compatibleVersions[] = {
     "STRINGLIB 1.1.10",
     "STRINGLIB 1.1.11",
     "STRINGLIB 1.1.12",
+    "STRINGLIB 1.1.13",
     NULL };
 
-#define STRINGLIB_VERSION "STRINGLIB 1.1.13"
+#define STRINGLIB_VERSION "STRINGLIB 1.1.14"
 
 const char * EclDefinition =  
 "export StringLib := SERVICE\n"
@@ -72,6 +75,9 @@ const char * EclDefinition =
 "  boolean EditDistanceWithinRadiusV2(const string l, const string r, unsigned4 radius) : c,pure,entrypoint='slEditDistanceWithinRadiusV2'; \n"
 "  string StringGetNthWord(const string src, unsigned4 n) : c, pure,entrypoint='slStringGetNthWord'; \n"
 "  unsigned4 StringWordCount(const string src) : c, pure,entrypoint='slStringWordCount'; \n"
+"  unsigned4 CountWords(const string src, const string _separator, BOOLEAN allow_blanks) : c, pure,entrypoint='slCountWords'; \n"
+"  SET OF STRING SplitWords(const string src, const string _separator, BOOLEAN allow_blanks) : c, pure,entrypoint='slSplitWords'; \n"
+"  STRING CombineWords(set of string src, const string _separator) : c, pure,entrypoint='slCombineWords'; \n"
 "END;";
 
 STRINGLIB_API bool getECLPluginDefinition(ECLPluginDefinitionBlock *pb) 
@@ -1188,6 +1194,201 @@ STRINGLIB_API unsigned STRINGLIB_CALL slStringWordCount(unsigned srcLen,const ch
     return word_count;
 }
 
+//--------------------------------------------------------------------------------------------------------------------
+
+STRINGLIB_API unsigned STRINGLIB_CALL slCountWords(size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems)
+{
+    if (lenSrc == 0)
+        return 0;
+
+    if ((lenSeparator == 0) || (lenSrc < lenSeparator))
+        return 1;
+
+    unsigned numWords=0;
+    const char * end = src + lenSrc;
+    const char * max = end - (lenSeparator - 1);
+    const char * cur = src;
+    const char * startWord = NULL;
+    //MORE: optimize lenSeparator == 1!
+    while (cur < max)
+    {
+        if (memcmp(cur, separator, lenSeparator) == 0)
+        {
+            if (startWord || allowBlankItems)
+            {
+                numWords++;
+                startWord = NULL;
+            }
+            cur += lenSeparator;
+        }
+        else
+        {
+            if (!startWord)
+                startWord = cur;
+            cur++;
+        }
+    }
+    if (startWord || (cur != end) || allowBlankItems)
+        numWords++;
+    return numWords;
+}
+
+
+static unsigned calcWordSetSize(size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems)
+{
+    if (lenSrc == 0)
+        return 0;
+
+    if ((lenSeparator == 0) || (lenSrc < lenSeparator))
+        return sizeof(size32_t) + lenSrc;
+
+    unsigned sizeWords=0;
+    const char * end = src + lenSrc;
+    const char * max = end - (lenSeparator - 1);
+    const char * cur = src;
+    const char * startWord = NULL;
+    //MORE: optimize lenSeparator == 1!
+    while (cur < max)
+    {
+        if (memcmp(cur, separator, lenSeparator) == 0)
+        {
+            if (startWord)
+            {
+                sizeWords += sizeof(size32_t) + (cur - startWord);
+                startWord = NULL;
+            }
+            else if (allowBlankItems)
+                sizeWords += sizeof(size32_t);
+
+            cur += lenSeparator;
+        }
+        else
+        {
+            if (!startWord)
+                startWord = cur;
+            cur++;
+        }
+    }
+    if (startWord || (cur != end) || allowBlankItems)
+    {
+        if (!startWord)
+            startWord = cur;
+        sizeWords += sizeof(size32_t) + (end - startWord);
+    }
+    return sizeWords;
+}
+
+STRINGLIB_API void STRINGLIB_CALL slSplitWords(bool & __isAllResult, size32_t & __lenResult, void * & __result, size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems)
+{
+    unsigned sizeRequired = calcWordSetSize(lenSrc, src, lenSeparator, separator, allowBlankItems);
+    char * const result = static_cast<char *>(CTXMALLOC(parentCtx, sizeRequired));
+    __isAllResult = false;
+    __lenResult = sizeRequired;
+    __result = result;
+
+    if (lenSrc == 0)
+        return;
+
+    if ((lenSeparator == 0) || (lenSrc < lenSeparator))
+    {
+        *((size32_t *)result) = lenSrc;
+        memcpy(result+sizeof(size32_t), src, lenSrc);
+        return;
+    }
+
+    unsigned sizeWords=0;
+    char * target = result;
+    const char * end = src + lenSrc;
+    const char * max = end - (lenSeparator - 1);
+    const char * cur = src;
+    const char * startWord = NULL;
+    //MORE: optimize lenSeparator == 1!
+    while (cur < max)
+    {
+        if (memcmp(cur, separator, lenSeparator) == 0)
+        {
+            if (startWord || allowBlankItems)
+            {
+                size32_t len = startWord ? (cur - startWord) : 0;
+                memcpy(target, &len, sizeof(len));
+                memcpy(target+sizeof(size32_t), startWord, len);
+                target += sizeof(size32_t) + len;
+                startWord = NULL;
+            }
+
+            cur += lenSeparator;
+        }
+        else
+        {
+            if (!startWord)
+                startWord = cur;
+            cur++;
+        }
+    }
+    if (startWord || (cur != end) || allowBlankItems)
+    {
+        if (!startWord)
+            startWord = cur;
+        size32_t len = (end - startWord);
+        memcpy(target, &len, sizeof(len));
+        memcpy(target+sizeof(size32_t), startWord, len);
+        target += sizeof(size32_t) + len;
+    }
+    assert(target == result + sizeRequired);
+//        ctx->fail(1, "Size mismatch in StringLib.SplitWords");
+}
+
+
+static unsigned countWords(size32_t lenSrc, const char * src)
+{
+    unsigned count = 0;
+    unsigned offset = 0;
+    while (offset < lenSrc)
+    {
+        size32_t len;
+        memcpy(&len, src+offset, sizeof(len));
+        offset += sizeof(len) + len;
+        count++;
+    }
+    return count;
+}
+
+
+STRINGLIB_API void STRINGLIB_CALL slCombineWords(size32_t & __lenResult, void * & __result, bool isAllSrc, size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems)
+{
+    if (lenSrc == 0)
+    {
+        __lenResult = 0;
+        __result = NULL;
+        return;
+    }
+
+    unsigned numWords = countWords(lenSrc, src);
+    size32_t sizeRequired = lenSrc - numWords * sizeof(size32_t) + (numWords-1) * lenSeparator;
+    char * const result = static_cast<char *>(CTXMALLOC(parentCtx, sizeRequired));
+    __lenResult = sizeRequired;
+    __result = result;
+
+    char * target = result;
+    unsigned offset = 0;
+    while (offset < lenSrc)
+    {
+        if ((offset != 0) && lenSeparator)
+        {
+            memcpy(target, separator, lenSeparator);
+            target += lenSeparator;
+        }
+
+        size32_t len;
+        memcpy(&len, src+offset, sizeof(len));
+        offset += sizeof(len);
+        memcpy(target, src+offset, len);
+        target += len;
+        offset += len;
+    }
+    assert(target == result + sizeRequired);
+}
+
 
 //--------------------------------------------------------------------------------------------------------------------
 //--------------------------------------------------------------------------------------------------------------------

+ 3 - 0
plugins/stringlib/stringlib.hpp

@@ -77,6 +77,9 @@ STRINGLIB_API unsigned STRINGLIB_CALL slEditDistanceV2(unsigned leftLen, const c
 STRINGLIB_API bool STRINGLIB_CALL slEditDistanceWithinRadiusV2(unsigned leftLen, const char * left, unsigned rightLen, const char * right, unsigned radius);
 STRINGLIB_API void STRINGLIB_CALL slStringGetNthWord(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned n);
 STRINGLIB_API unsigned STRINGLIB_CALL slStringWordCount(unsigned srcLen, const char * src);
+STRINGLIB_API unsigned STRINGLIB_CALL slCountWords(size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems);
+STRINGLIB_API void STRINGLIB_CALL slSplitWords(bool & __isAllResult, size32_t & __lenResult, void * & __result, size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems);
+STRINGLIB_API void STRINGLIB_CALL slCombineWords(size32_t & __lenResult, void * & __result, bool isAllSrc, size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems);
 }
 
 #endif