浏览代码

HPCC-19054 Add option to TRIM to remove all whitespace, not just spaces

Also fixes some issues with TRIM(varstring, RIGHT) and one or two others
spotted during coding/testing.

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 7 年之前
父节点
当前提交
32c338f1b7

+ 10 - 4
common/deftype/defvalue.cpp

@@ -3041,7 +3041,7 @@ IValue * substringValue(IValue * v, IValue * lower, IValue * higher)
     return ret;
 }
 
-IValue * trimStringValue(IValue * v, char typecode)
+IValue * trimStringValue(IValue * v, char typecode, bool whitespace)
 {
     ITypeInfo * type = v->queryType();
     type_t tc = type->getTypeCode();
@@ -3053,7 +3053,9 @@ IValue * trimStringValue(IValue * v, char typecode)
         if (tc == type_utf8)
         {
             char const * str = (char const *)v->queryValue();
-            switch(typecode) 
+            if (whitespace)
+                rtlTrimUtf8WS(tlen, resultstr.refstr(), len, str, typecode=='B'||typecode=='L', typecode=='A', typecode=='B'||typecode=='R');
+            else switch(typecode)
             {
             case 'A':
                 rtlTrimUtf8All(tlen, resultstr.refstr(), len, str);
@@ -3075,7 +3077,9 @@ IValue * trimStringValue(IValue * v, char typecode)
         else
         {
             UChar const * str = (UChar const *)v->queryValue();
-            switch(typecode) 
+            if (whitespace)
+                rtlTrimUnicodeWS(tlen, resultstr.refustr(), len, str, typecode=='B'||typecode=='L', typecode=='A', typecode=='B'||typecode=='R');
+            else switch(typecode)
             {
             case 'A':
                 rtlTrimUnicodeAll(tlen, resultstr.refustr(), len, str);
@@ -3106,7 +3110,9 @@ IValue * trimStringValue(IValue * v, char typecode)
         rtlDataAttr resultstr;
         unsigned len = s.length();
         char const * str = s.str();
-        switch(typecode)
+        if (whitespace)
+            rtlTrimWS(tlen, resultstr.refstr(), len, str, typecode=='B'||typecode=='L', typecode=='A', typecode=='B'||typecode=='R');
+        else switch(typecode)
         {
         case 'A':
             rtlTrimAll(tlen, resultstr.refstr(), len, str);

+ 1 - 1
common/deftype/defvalue.hpp

@@ -116,7 +116,7 @@ extern DEFTYPE_API IValue * shiftLeftValues(IValue * left, IValue * right);
 extern DEFTYPE_API IValue * shiftRightValues(IValue * left, IValue * right);
 
 extern DEFTYPE_API IValue * substringValue(IValue * v, IValue *lower, IValue *higher);
-extern DEFTYPE_API IValue * trimStringValue(IValue * v, char typecode);
+extern DEFTYPE_API IValue * trimStringValue(IValue * v, char typecode, bool whitespace);
 extern DEFTYPE_API IValue * concatValues(IValue * left, IValue * right);
 extern DEFTYPE_API IValue * negateValue(IValue * v);
 extern DEFTYPE_API IValue * expValue(IValue * v);

+ 2 - 0
ecl/hql/hqlatoms.cpp

@@ -455,6 +455,7 @@ IAtom * volatileAtom;
 IAtom * _volatileId_Atom;
 IAtom * warningAtom;
 IAtom * webserviceAtom;
+IAtom * whitespaceAtom;
 IAtom * wholeAtom;
 IAtom * widthAtom;
 IAtom * wipeAtom;
@@ -913,6 +914,7 @@ MODULE_INIT(INIT_PRIORITY_HQLATOM)
     MAKESYSATOM(volatileId);
     MAKEATOM(warning);
     MAKEATOM(webservice);
+    MAKEATOM(whitespace);
     MAKEATOM(whole);
     MAKEATOM(width);
     MAKEATOM(wipe);

+ 1 - 0
ecl/hql/hqlatoms.hpp

@@ -460,6 +460,7 @@ extern HQL_API IAtom * volatileAtom;
 extern HQL_API IAtom * _volatileId_Atom;
 extern HQL_API IAtom * warningAtom;
 extern HQL_API IAtom * webserviceAtom;
+extern HQL_API IAtom * whitespaceAtom;
 extern HQL_API IAtom * wholeAtom;
 extern HQL_API IAtom * widthAtom;
 extern HQL_API IAtom * wipeAtom;

+ 2 - 2
ecl/hql/hqlfold.cpp

@@ -3822,12 +3822,12 @@ IHqlExpression * foldConstantOperator(IHqlExpression * expr, unsigned foldOption
             IValue * constValue = child->queryValue();
             IValue* resultstr = NULL;
             if (constValue) 
-                resultstr = trimStringValue(constValue, typecode);
+                resultstr = trimStringValue(constValue, typecode, expr->hasAttribute(whitespaceAtom));
 
             if (resultstr) 
                 return createConstant(resultstr);
 
-            //extendin a string won't change the alue of trim(x), unless not trimming the rhs
+            //extending a string won't change the value of trim(x), unless not trimming the rhs
             //i.e., trim((string60)string12expression)  => trim(string12expression);
             if ((typecode != 'L') && isCast(child))
             {

+ 2 - 0
ecl/hql/hqlgram.y

@@ -477,6 +477,7 @@ static void eclsyntaxerror(HqlGram * parser, const char * s, short yystate, int
   TOK_WARNING
   WHEN
   WHICH
+  WHITESPACE
   WIDTH
   WILD
   WITHIN
@@ -10936,6 +10937,7 @@ TrimFlag
     : LEFT              {   $$.setExpr(createAttribute(leftAtom)); }
     | RIGHT             {   $$.setExpr(createAttribute(rightAtom)); }
     | ALL               {   $$.setExpr(createAttribute(allAtom)); }
+    | WHITESPACE        {   $$.setExpr(createAttribute(whitespaceAtom)); }
     ;
 
 optSortList

+ 1 - 0
ecl/hql/hqlgram2.cpp

@@ -11188,6 +11188,7 @@ static void getTokenText(StringBuffer & msg, int token)
     case TOK_WARNING: msg.append("WARNING"); break;
     case WHEN: msg.append("WHEN"); break;
     case WHICH: msg.append("WHICH"); break;
+    case WHITESPACE: msg.append("WHITESPACE"); break;
     case WIDTH: msg.append("WIDTH"); break;
     case WILD: msg.append("WILD"); break;
     case WITHIN: msg.append("WITHIN"); break;

+ 1 - 0
ecl/hql/hqllex.l

@@ -982,6 +982,7 @@ WAIT                { RETURNSYM(WAIT); }
 WARNING             { RETURNSYM(TOK_WARNING); }
 WHEN                { RETURNSYM(WHEN); }
 WHICH               { RETURNSYM(WHICH); }
+WHITESPACE          { RETURNSYM(WHITESPACE); }
 WIDTH               { RETURNSYM(WIDTH); }
 WILD                { RETURNSYM(WILD); }
 WITHIN              { RETURNSYM(WITHIN); }

+ 1 - 0
ecl/hql/reservedwords.cpp

@@ -471,6 +471,7 @@ static const char * eclReserved14[] = { //Attribute functions (some might actual
     "use",
     "validate",
     "virtual",
+    "whitespace",
     "width",
     "wild",
     "xml",

+ 10 - 0
ecl/hqlcpp/hqlcatom.cpp

@@ -647,26 +647,31 @@ IIdAtom * trimBothId;
 IIdAtom * trimLeftId;
 IIdAtom * trimRightId;
 IIdAtom * trimStrLenId;
+IIdAtom * trimWSId;
 IIdAtom * trimUnicodeAllId;
 IIdAtom * trimUnicodeBothId;
 IIdAtom * trimUnicodeLeftId;
 IIdAtom * trimUnicodeRightId;
 IIdAtom * trimUnicodeStrLenId;
+IIdAtom * trimUnicodeWSId;
 IIdAtom * trimUtf8AllId;
 IIdAtom * trimUtf8BothId;
 IIdAtom * trimUtf8LeftId;
 IIdAtom * trimUtf8RightId;
 IIdAtom * trimUtf8StrLenId;
+IIdAtom * trimUtf8WSId;
 IIdAtom * trimVAllId;
 IIdAtom * trimVBothId;
 IIdAtom * trimVLeftId;
 IIdAtom * trimVRightId;
 IIdAtom * trimVStrLenId;
+IIdAtom * trimVWSId;
 IIdAtom * trimVUnicodeAllId;
 IIdAtom * trimVUnicodeBothId;
 IIdAtom * trimVUnicodeLeftId;
 IIdAtom * trimVUnicodeRightId;
 IIdAtom * trimVUnicodeStrLenId;
+IIdAtom * trimVUnicodeWSId;
 IIdAtom * truncateId;
 IIdAtom * UCharId;
 IIdAtom * unicode2CodepageId;
@@ -1334,26 +1339,31 @@ MODULE_INIT(INIT_PRIORITY_HQLATOM-1)
     MAKEID(trimLeft);
     MAKEID(trimRight);
     MAKEID(trimStrLen);
+    MAKEID(trimWS);
     MAKEID(trimUnicodeAll);
     MAKEID(trimUnicodeBoth);
     MAKEID(trimUnicodeLeft);
     MAKEID(trimUnicodeRight);
     MAKEID(trimUnicodeStrLen);
+    MAKEID(trimUnicodeWS);
     MAKEID(trimUtf8All);
     MAKEID(trimUtf8Both);
     MAKEID(trimUtf8Left);
     MAKEID(trimUtf8Right);
     MAKEID(trimUtf8StrLen);
+    MAKEID(trimUtf8WS);
     MAKEID(trimVAll);
     MAKEID(trimVBoth);
     MAKEID(trimVLeft);
     MAKEID(trimVRight);
     MAKEID(trimVStrLen);
+    MAKEID(trimVWS);
     MAKEID(trimVUnicodeAll);
     MAKEID(trimVUnicodeBoth);
     MAKEID(trimVUnicodeLeft);
     MAKEID(trimVUnicodeRight);
     MAKEID(trimVUnicodeStrLen);
+    MAKEID(trimVUnicodeWS);
     truncateId = createIdAtom("_truncate");
     MAKEID(UChar);
     MAKEID(unicode2Codepage);

+ 5 - 0
ecl/hqlcpp/hqlcatom.hpp

@@ -645,26 +645,31 @@ extern IIdAtom * trimBothId;
 extern IIdAtom * trimLeftId;
 extern IIdAtom * trimRightId;
 extern IIdAtom * trimStrLenId;
+extern IIdAtom * trimWSId;
 extern IIdAtom * trimUnicodeAllId;
 extern IIdAtom * trimUnicodeBothId;
 extern IIdAtom * trimUnicodeLeftId;
 extern IIdAtom * trimUnicodeRightId;
 extern IIdAtom * trimUnicodeStrLenId;
+extern IIdAtom * trimUnicodeWSId;
 extern IIdAtom * trimUtf8AllId;
 extern IIdAtom * trimUtf8BothId;
 extern IIdAtom * trimUtf8LeftId;
 extern IIdAtom * trimUtf8RightId;
 extern IIdAtom * trimUtf8StrLenId;
+extern IIdAtom * trimUtf8WSId;
 extern IIdAtom * trimVAllId;
 extern IIdAtom * trimVBothId;
 extern IIdAtom * trimVLeftId;
 extern IIdAtom * trimVRightId;
 extern IIdAtom * trimVStrLenId;
+extern IIdAtom * trimVWSId;
 extern IIdAtom * trimVUnicodeAllId;
 extern IIdAtom * trimVUnicodeBothId;
 extern IIdAtom * trimVUnicodeLeftId;
 extern IIdAtom * trimVUnicodeRightId;
 extern IIdAtom * trimVUnicodeStrLenId;
+extern IIdAtom * trimVUnicodeWSId;
 extern IIdAtom * truncateId;
 extern IIdAtom * UCharId;
 extern IIdAtom * unicode2CodepageId;

+ 60 - 15
ecl/hqlcpp/hqlcpp.cpp

@@ -10068,11 +10068,14 @@ void HqlCppTranslator::doBuildAssignTrim(BuildCtx & ctx, const CHqlBoundTarget &
     IIdAtom * func;
     bool hasAll = expr->hasAttribute(allAtom);
     bool hasLeft = expr->hasAttribute(leftAtom);
-    bool hasRight = expr->hasAttribute(rightAtom);
+    bool hasRight = expr->hasAttribute(rightAtom) || !(hasAll || hasLeft);
+    bool hasWS = expr->hasAttribute(whitespaceAtom);
 
     if (str->queryType()->getTypeCode() == type_varstring)
     {
-        if(hasAll)
+        if(hasWS)
+            func = trimVWSId;
+        else if(hasAll)
             func = trimVAllId;
         else if(hasLeft && hasRight)
             func = trimVBothId;
@@ -10083,7 +10086,9 @@ void HqlCppTranslator::doBuildAssignTrim(BuildCtx & ctx, const CHqlBoundTarget &
     }
     else if(str->queryType()->getTypeCode() == type_unicode)
     {
-        if(hasAll)
+        if(hasWS)
+            func = trimUnicodeWSId;
+        else if(hasAll)
             func = trimUnicodeAllId;
         else if(hasLeft && hasRight)
             func = trimUnicodeBothId;
@@ -10094,7 +10099,9 @@ void HqlCppTranslator::doBuildAssignTrim(BuildCtx & ctx, const CHqlBoundTarget &
     }
     else if(str->queryType()->getTypeCode() == type_varunicode)
     {
-        if(hasAll)
+        if(hasWS)
+            func = trimVUnicodeWSId;
+        else if(hasAll)
             func = trimVUnicodeAllId;
         else if(hasLeft && hasRight)
             func = trimVUnicodeBothId;
@@ -10105,7 +10112,9 @@ void HqlCppTranslator::doBuildAssignTrim(BuildCtx & ctx, const CHqlBoundTarget &
     }
     else if(str->queryType()->getTypeCode() == type_utf8)
     {
-        if(hasAll)
+        if(hasWS)
+            func = trimUtf8WSId;
+        else if(hasAll)
             func = trimUtf8AllId;
         else if(hasLeft && hasRight)
             func = trimUtf8BothId;
@@ -10116,7 +10125,9 @@ void HqlCppTranslator::doBuildAssignTrim(BuildCtx & ctx, const CHqlBoundTarget &
     }
     else
     {
-        if(hasAll)
+        if(hasWS)
+            func = trimWSId;
+        else if(hasAll)
             func = trimAllId;
         else if(hasLeft && hasRight)
             func = trimBothId;
@@ -10128,6 +10139,12 @@ void HqlCppTranslator::doBuildAssignTrim(BuildCtx & ctx, const CHqlBoundTarget &
 
     HqlExprArray args;
     args.append(*LINK(str));
+    if (hasWS)
+    {
+        args.append(*LINK(queryBoolExpr(hasLeft)));
+        args.append(*LINK(queryBoolExpr(hasAll)));
+        args.append(*LINK(queryBoolExpr(hasRight)));
+    }
     OwnedHqlExpr call = bindFunctionCall(func, args);
     buildExprAssign(ctx, target, call);
 }
@@ -10145,26 +10162,36 @@ void HqlCppTranslator::doBuildExprTrim(BuildCtx & ctx, IHqlExpression * expr, CH
     
     bool hasAll = expr->hasAttribute(allAtom);
     bool hasLeft = expr->hasAttribute(leftAtom);
-    bool hasRight = expr->hasAttribute(rightAtom);
+    bool hasRight = expr->hasAttribute(rightAtom) || !(hasAll || hasLeft);
+    bool hasWS = expr->hasAttribute(whitespaceAtom);
     
     type_t btc = bound.expr->queryType()->getTypeCode();
-    if(hasAll || hasLeft) 
+    if(hasAll || hasLeft || hasWS || btc == type_varstring || btc == type_varunicode)
     {
         if (btc == type_varstring)
         {
-            if(hasAll) {
+            if(hasWS) {
+                func = trimVWSId;
+            }
+            else if(hasAll) {
                 func = trimVAllId;
             }
             else if(hasLeft && hasRight) {
                 func = trimVBothId;
             }
-            else {
+            else if (hasLeft) {
                 func = trimVLeftId;
             }
+            else {
+                func = trimVRightId;
+            }
         }
         else if (btc == type_unicode)
         {
-            if(hasAll) {
+            if(hasWS) {
+                func = trimUnicodeWSId;
+            }
+            else if(hasAll) {
                 func = trimUnicodeAllId;
             }
             else if(hasLeft && hasRight) {
@@ -10176,19 +10203,28 @@ void HqlCppTranslator::doBuildExprTrim(BuildCtx & ctx, IHqlExpression * expr, CH
         }
         else if (btc == type_varunicode)
         {
-            if(hasAll) {
+            if(hasWS) {
+                func = trimVUnicodeWSId;
+            }
+            else if(hasAll) {
                 func = trimVUnicodeAllId;
             }
             else if(hasLeft && hasRight) {
                 func = trimVUnicodeBothId;
             }
-            else {
+            else if(hasLeft) {
                 func = trimVUnicodeLeftId;
             }
+            else {
+                func = trimVUnicodeRightId;
+            }
         }
         else if (btc == type_utf8)
         {
-            if(hasAll) {
+            if(hasWS) {
+                func = trimUtf8WSId;
+            }
+            else if(hasAll) {
                 func = trimUtf8AllId;
             }
             else if(hasLeft && hasRight) {
@@ -10200,7 +10236,10 @@ void HqlCppTranslator::doBuildExprTrim(BuildCtx & ctx, IHqlExpression * expr, CH
         }
         else
         {
-            if(hasAll) {
+            if(hasWS) {
+                func = trimWSId;
+            }
+            else if(hasAll) {
                 func = trimAllId;
             }
             else if(hasLeft && hasRight) {
@@ -10212,6 +10251,12 @@ void HqlCppTranslator::doBuildExprTrim(BuildCtx & ctx, IHqlExpression * expr, CH
         }
 
         args.append(*bound.getTranslatedExpr());
+        if (hasWS)
+        {
+            args.append(*LINK(queryBoolExpr(hasLeft)));
+            args.append(*LINK(queryBoolExpr(hasAll)));
+            args.append(*LINK(queryBoolExpr(hasRight)));
+        }
         OwnedHqlExpr call = bindFunctionCall(func, args);
         buildExpr(ctx, call, tgt);
     }

+ 6 - 1
ecl/hqlcpp/hqlcppsys.ecl

@@ -191,7 +191,7 @@ const char * cppSystemText[]  = {
     "   unicode trimVUnicodeRight(const varunicode src) : eclrtl,pure,library='eclrtl',entrypoint='rtlTrimVUnicodeRight';",
     "   string trimLeft(const string src) : eclrtl,pure,library='eclrtl',entrypoint='rtlTrimLeft';",
     "   unicode trimUnicodeLeft(const unicode src) : eclrtl,pure,library='eclrtl',entrypoint='rtlTrimUnicodeLeft';",
-    "   unicode trimUtf8Left(const utf8 src) : eclrtl,pure,library='eclrtl',entrypoint='rtlTrimUtf8Left';",
+    "   utf8 trimUtf8Left(const utf8 src) : eclrtl,pure,library='eclrtl',entrypoint='rtlTrimUtf8Left';",
     "   string trimVLeft(const varstring src) : eclrtl,pure,library='eclrtl',entrypoint='rtlTrimVLeft';",
     "   unicode trimVUnicodeLeft(const varunicode src) : eclrtl,pure,library='eclrtl',entrypoint='rtlTrimVUnicodeLeft';",
     "   string trimBoth(const string src) : eclrtl,pure,library='eclrtl',entrypoint='rtlTrimBoth';",
@@ -200,10 +200,15 @@ const char * cppSystemText[]  = {
     "   string trimVBoth(const varstring src) : eclrtl,pure,library='eclrtl',entrypoint='rtlTrimVBoth';",
     "   unicode trimVUnicodeBoth(const varunicode src) : eclrtl,pure,library='eclrtl',entrypoint='rtlTrimVUnicodeBoth';",
     "   string trimAll(const string src) : eclrtl,pure,library='eclrtl',entrypoint='rtlTrimAll';",
+    "   string trimWS(const string src, boolean left, boolean _all, boolean right) : eclrtl,pure,library='eclrtl',entrypoint='rtlTrimWS';",
     "   unicode trimUnicodeAll(const unicode src) : eclrtl,pure,library='eclrtl',entrypoint='rtlTrimUnicodeAll';",
+    "   unicode trimUnicodeWS(const unicode src, boolean left, boolean _all, boolean right) : eclrtl,pure,library='eclrtl',entrypoint='rtlTrimUnicodeWS';",
     "   utf8 trimUtf8All(const utf8 src) : eclrtl,pure,library='eclrtl',entrypoint='rtlTrimUtf8All';",
+    "   utf8 trimUtf8WS(const utf8 src, boolean left, boolean _all, boolean right) : eclrtl,pure,library='eclrtl',entrypoint='rtlTrimUtf8WS';",
     "   string trimVAll(const varstring src) : eclrtl,pure,library='eclrtl',entrypoint='rtlTrimVAll';",
+    "   string trimVWS(const varstring src, boolean left, boolean _all, boolean right) : eclrtl,pure,library='eclrtl',entrypoint='rtlTrimVWS';",
     "   unicode trimVUnicodeAll(const varunicode src) : eclrtl,pure,library='eclrtl',entrypoint='rtlTrimVUnicodeAll';",
+    "   unicode trimVUnicodeWS(const varunicode src, boolean left, boolean _all, boolean right) : eclrtl,pure,library='eclrtl',entrypoint='rtlTrimVUnicodeWS';",
     "   data subDataFTX(const data src, unsigned4 from, unsigned4 to) : eclrtl,pure,library='eclrtl',entrypoint='rtlSubDataFTX';",
     "   data subDataFX(const data src, unsigned4 from) : eclrtl,pure,library='eclrtl',entrypoint='rtlSubDataFX';",
     "   subDataFT(noconst data tgt, const data src, unsigned4 from, unsigned4 to) : eclrtl,pure,library='eclrtl',entrypoint='rtlSubDataFT';",

+ 3 - 2
ecl/hqlcpp/hqlttcpp.cpp

@@ -12722,10 +12722,11 @@ IHqlExpression * HqlTreeNormalizer::createTransformedBody(IHqlExpression * expr)
         }
     case no_trim:
         //TRIM(x,RIGHT) should be represented the same way as TRIM(x) - and it's more efficient
-        if ((expr->numChildren() == 2) && (expr->queryChild(1)->queryName() == rightAtom))
+        if (expr->hasAttribute(rightAtom) && !expr->hasAttribute(leftAtom))
         {
             HqlExprArray children;
-            children.append(*transform(expr->queryChild(0)));
+            transformChildren(expr, children);
+            removeAttribute(children, rightAtom);
             return expr->clone(children);
         }
         break;

+ 171 - 10
rtl/eclrtl/eclrtl.cpp

@@ -1661,7 +1661,7 @@ void rtlConcatVUnicodeF(unsigned tlen, UChar * tgt, ...)
 #endif
 
 //------------------------------------------------------------------------------------------------
-// The followinf concat functions are all deprecated in favour of the variable number of argument
+// The following concat functions are all deprecated in favour of the variable number of argument
 // versions
 unsigned rtlConcatStrToStr(unsigned tlen, char * tgt, unsigned idx, unsigned slen, const char * src)
 {
@@ -1969,6 +1969,22 @@ unsigned rtlTrimUnicodeStrLen(size32_t l, UChar const * t)
 #endif
 }
 
+unsigned rtlTrimUnicodeStrLenWS(size32_t l, UChar const * t)
+{
+#ifdef _USE_ICU
+    if (!l)
+        return 0;
+    UCharCharacterIterator iter(t, l);
+    for(iter.last32(); iter.hasPrevious(); iter.previous32())
+        if(!u_isspace(iter.current32()))
+            break;
+    if(u_isspace(iter.current32())) return iter.getIndex(); // required as the reverse iteration above doesn't hit the first character
+    return iter.getIndex() + 1;
+#else
+    return rtlQuickTrimUnicode(l, t);
+#endif
+}
+
 unsigned rtlTrimVStrLen(const char * t)
 {
     const char * first = t;
@@ -2008,6 +2024,19 @@ inline unsigned rtlLeftTrimUnicodeStrStart(size32_t slen, UChar const * src)
 #endif
 }
 
+inline unsigned rtlLeftTrimUnicodeStrStartWS(size32_t slen, UChar const * src)
+{
+#ifdef _USE_ICU
+    UCharCharacterIterator iter(src, slen);
+    for(iter.first32(); iter.hasNext(); iter.next32())
+        if(!u_isspace(iter.current32()))
+            break;
+    return iter.getIndex();
+#else
+    return slen;
+#endif
+}
+
 inline unsigned rtlLeftTrimVStrStart(const char * src)
 {
     unsigned i = 0;
@@ -2277,6 +2306,19 @@ void rtlAssignTrimVBothV(size32_t tlen, char * tgt, const char * src)
 
 //-----------------------------------------------------------------------------
 // Functions used to trim off all blank spaces in a string.
+unsigned rtlTrimStrLenNonWhitespace(size32_t l, const char * t)
+{
+    unsigned len = 0;
+
+    while (l)
+    {
+        l--;
+        if (!isspace(t[l]))
+            len++;
+    }
+    return len;
+}
+
 unsigned rtlTrimStrLenNonBlank(size32_t l, const char * t)
 {
     unsigned len = 0;
@@ -2306,7 +2348,7 @@ unsigned rtlTrimVStrLenNonBlank(const char * t)
 void rtlTrimAll(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
 {
     tlen = rtlTrimStrLenNonBlank(slen, src);
-    char * buffer = (char *)rtlMalloc(tlen + 1);
+    char * buffer = (char *)rtlMalloc(tlen);
     int ind = 0;
     for(unsigned i = 0; i < slen; i++) {
         if(src[i] != ' ') {
@@ -2314,10 +2356,41 @@ void rtlTrimAll(unsigned & tlen, char * & tgt, unsigned slen, const char * src)
             ind++;
         }
     }
-    buffer[tlen] = 0;
     tgt = buffer;
 }
 
+void rtlTrimWS(unsigned & tlen, char * & tgt, unsigned slen, const char * src, bool left, bool all, bool right)
+{
+    if (all)
+    {
+        tlen = rtlTrimStrLenNonWhitespace(slen, src);
+        char * buffer = (char *)rtlMalloc(tlen);
+        int ind = 0;
+        for(unsigned i = 0; i < slen; i++) {
+            if (!isspace(src[i]))
+            {
+                buffer[ind] = src[i];
+                ind++;
+            }
+        }
+        tgt = buffer;
+    }
+    else
+    {
+        unsigned start = 0;
+        while (right && slen)
+        {
+            if (!isspace(src[slen-1]))
+                break;
+            slen--;
+        }
+        while (left && start < slen && isspace(src[start]))
+            start++;
+        tlen = slen - start;
+        tgt = rtlDupSubString(src + start, tlen);
+    }
+}
+
 void rtlTrimUnicodeAll(unsigned & tlen, UChar * & tgt, unsigned slen, const UChar * src)
 {
 #ifdef _USE_ICU
@@ -2329,9 +2402,37 @@ void rtlTrimUnicodeAll(unsigned & tlen, UChar * & tgt, unsigned slen, const UCha
     UnicodeString tgtStr;
     normalizeUnicodeString(rawStr, tgtStr); // normalized in case crazy string like [combining accent] [space] [vowel]
     tlen = tgtStr.length();
-    tgt = (UChar *)rtlMalloc((tlen+1)*2);
+    tgt = (UChar *)rtlMalloc(tlen*2);
     tgtStr.extract(0, tlen, tgt);
-    tgt[tlen] = 0x0000;
+#else
+    rtlThrowNoUnicode();
+#endif
+}
+
+void rtlTrimUnicodeWS(unsigned & tlen, UChar * & tgt, unsigned slen, const UChar * src, bool left, bool all, bool right)
+{
+#ifdef _USE_ICU
+    if (all)
+    {
+        UnicodeString rawStr;
+        UCharCharacterIterator iter(src, slen);
+        for(iter.first32(); iter.hasNext(); iter.next32())
+            if(!u_isspace(iter.current32()))
+                rawStr.append(iter.current32());
+        UnicodeString tgtStr;
+        normalizeUnicodeString(rawStr, tgtStr); // normalized in case crazy string like [combining accent] [space] [vowel]
+        tlen = tgtStr.length();
+        tgt = (UChar *)rtlMalloc(tlen*2);
+        tgtStr.extract(0, tlen, tgt);
+    }
+    else
+    {
+        if (right)
+            slen = rtlTrimUnicodeStrLenWS(slen, src);
+        unsigned start = (left && slen) ? rtlLeftTrimUnicodeStrStartWS(slen, src) : 0;
+        tlen = slen - start;
+        tgt = rtlDupSubUnicode(src + start, slen);
+    }
 #else
     rtlThrowNoUnicode();
 #endif
@@ -2340,7 +2441,7 @@ void rtlTrimUnicodeAll(unsigned & tlen, UChar * & tgt, unsigned slen, const UCha
 void rtlTrimVAll(unsigned & tlen, char * & tgt, const char * src)
 {
     tlen = rtlTrimVStrLenNonBlank(src);
-    char * buffer = (char *)rtlMalloc(tlen + 1);
+    char * buffer = (char *)rtlMalloc(tlen);
     int ind = 0;
     int i = 0;
     while(src[i] != 0) {
@@ -2350,27 +2451,87 @@ void rtlTrimVAll(unsigned & tlen, char * & tgt, const char * src)
         }
         i++;
     }
-    buffer[tlen] = 0;
     tgt = buffer;
 }
 
+void rtlTrimVWS(unsigned & tlen, char * & tgt, const char * src, bool left, bool all, bool right)
+{
+    rtlTrimWS(tlen, tgt, strlen(src), src, left, all, right);
+}
+
 void rtlTrimVUnicodeAll(unsigned & tlen, UChar * & tgt, const UChar * src)
 {
     rtlTrimUnicodeAll(tlen, tgt, rtlUnicodeStrlen(src), src);
 }
 
+void rtlTrimVUnicodeWS(unsigned & tlen, UChar * & tgt, const UChar * src, bool left, bool all, bool right)
+{
+    rtlTrimUnicodeWS(tlen, tgt, rtlUnicodeStrlen(src), src, left, all, right);
+}
+
 ECLRTL_API void rtlTrimUtf8All(unsigned &tlen, char * &tgt, unsigned slen, const char * src)
 {
     //Go via unicode because of possibility of combining accents etc.
-    rtlDataAttr temp1(slen*sizeof(UChar));
-    rtlUtf8ToUnicode(slen, temp1.getustr(), slen, src);
+    rtlDataAttr temp1;
+    unsigned temp1len;
+    rtlUtf8ToUnicodeX(temp1len, temp1.refustr(), slen, src);
 
     unsigned trimLen;
     rtlDataAttr trimText;
-    rtlTrimUnicodeAll(trimLen, trimText.refustr(), slen, temp1.getustr());
+    rtlTrimUnicodeAll(trimLen, trimText.refustr(), temp1len, temp1.getustr());
     rtlUnicodeToUtf8X(tlen, tgt, trimLen, trimText.getustr());
 }
 
+void rtlTrimUtf8WS(unsigned & tlen, char * & tgt, unsigned slen, const char * src, bool left, bool all, bool right)
+{
+    if (all)
+    {
+        //Go via unicode because of possibility of combining accents etc.
+        rtlDataAttr temp1;
+        unsigned temp1len;
+        rtlUtf8ToUnicodeX(temp1len, temp1.refustr(), slen, src);
+
+        unsigned trimLen;
+        rtlDataAttr trimText;
+        rtlTrimUnicodeWS(trimLen, trimText.refustr(), temp1len, temp1.getustr(), left, all, right);
+        rtlUnicodeToUtf8X(tlen, tgt, trimLen, trimText.getustr());
+    }
+    else
+    {
+        const byte * start = (const byte *) src;
+        const byte * cur = start;
+
+        while (left && slen && u_isspace(readUtf8Character(UTF8_MAXSIZE, cur)))
+        {
+            slen--;
+            start = cur;
+        }
+        if (right)
+        {
+            cur = start;
+            unsigned rtrimLength = 0;
+            const byte * trimEnd = cur;
+            for (unsigned i=0; i < slen; i++)
+            {
+                unsigned next = readUtf8Character(UTF8_MAXSIZE, cur);
+                if (!u_isspace(next))
+                {
+                    rtrimLength = i+1;
+                    trimEnd = cur;
+                }
+            }
+            tlen = rtrimLength;
+            tgt = rtlDupSubString((const char *) start, trimEnd-start);
+        }
+        else
+        {
+            tlen = slen;
+            tgt = rtlDupSubString((const char *) start, rtlUtf8Size(slen, start));
+        }
+    }
+}
+
+
 void rtlAssignTrimAllV(unsigned tlen, char * tgt, unsigned slen, const char * src)
 {
     unsigned to = 0;

+ 5 - 0
rtl/eclrtl/eclrtl.hpp

@@ -285,6 +285,11 @@ ECLRTL_API void rtlTrimUnicodeAll(unsigned &tlen, UChar * &tgt, unsigned slen, U
 ECLRTL_API void rtlTrimUtf8All(unsigned &tlen, char * &tgt, unsigned slen, const char * src);
 ECLRTL_API void rtlTrimVAll(unsigned &tlen, char * &tgt, const char * src); // YMA
 ECLRTL_API void rtlTrimVUnicodeAll(unsigned &tlen, UChar * &tgt, UChar const * src);
+ECLRTL_API void rtlTrimWS(unsigned &tlen, char * &tgt, unsigned slen, const char * src, bool left, bool all, bool right);
+ECLRTL_API void rtlTrimUnicodeWS(unsigned &tlen, UChar * &tgt, unsigned slen, UChar const * src, bool left, bool all, bool right);
+ECLRTL_API void rtlTrimUtf8WS(unsigned &tlen, char * &tgt, unsigned slen, const char * src, bool left, bool all, bool right);
+ECLRTL_API void rtlTrimVWS(unsigned &tlen, char * &tgt, const char * src, bool left, bool all, bool right);
+ECLRTL_API void rtlTrimVUnicodeWS(unsigned &tlen, UChar * &tgt, UChar const* src, bool left, bool all, bool right);
 ECLRTL_API unsigned rtlTrimStrLenNonBlank(size32_t l, const char * t);
 ECLRTL_API unsigned rtlTrimVStrLenNonBlank(const char * t);
 

+ 60 - 0
testing/regress/ecl/key/trim.xml

@@ -0,0 +1,60 @@
+<Dataset name='Result 1'>
+ <Row><s1></s1><s2></s2><s3></s3><s4></s4><s5></s5><s6></s6><s7></s7><s8></s8></Row>
+ <Row><s1></s1><s2></s2><s3></s3><s4></s4><s5></s5><s6></s6><s7></s7><s8></s8></Row>
+ <Row><s1>	 a b	c 	 </s1><s2> 	 a b	c 	</s2><s3>	 a b	c 	</s3><s4>	ab	c	</s4><s5>a b	c 	 </s5><s6> 	 a b	c</s6><s7>a b	c</s7><s8>abc</s8></Row>
+ <Row><s1>abc</s1><s2>abc</s2><s3>abc</s3><s4>abc</s4><s5>abc</s5><s6>abc</s6><s7>abc</s7><s8>abc</s8></Row>
+</Dataset>
+<Dataset name='Result 2'>
+ <Row><s1></s1><s2></s2><s3></s3><s4></s4><s5></s5><s6></s6><s7></s7><s8></s8></Row>
+ <Row><s1></s1><s2></s2><s3></s3><s4></s4><s5></s5><s6></s6><s7></s7><s8></s8></Row>
+ <Row><s1>	 a b	c 	 </s1><s2> 	 a b	c 	</s2><s3>	 a b	c 	</s3><s4>	ab	c	</s4><s5>a b	c 	 </s5><s6> 	 a b	c</s6><s7>a b	c</s7><s8>abc</s8></Row>
+ <Row><s1>abc</s1><s2>abc</s2><s3>abc</s3><s4>abc</s4><s5>abc</s5><s6>abc</s6><s7>abc</s7><s8>abc</s8></Row>
+</Dataset>
+<Dataset name='Result 3'>
+ <Row><s1></s1><s2></s2><s3></s3><s4></s4><s5></s5><s6></s6><s7></s7><s8></s8></Row>
+ <Row><s1></s1><s2></s2><s3></s3><s4></s4><s5></s5><s6></s6><s7></s7><s8></s8></Row>
+ <Row><s1>	 a b	c 	 </s1><s2> 	 a b	c 	</s2><s3>	 a b	c 	</s3><s4>	ab	c	</s4><s5>a b	c 	 </s5><s6> 	 a b	c</s6><s7>a b	c</s7><s8>abc</s8></Row>
+ <Row><s1>abc</s1><s2>abc</s2><s3>abc</s3><s4>abc</s4><s5>abc</s5><s6>abc</s6><s7>abc</s7><s8>abc</s8></Row>
+</Dataset>
+<Dataset name='Result 4'>
+ <Row><s1></s1><s2></s2><s3></s3><s4></s4><s5></s5><s6></s6><s7></s7><s8></s8></Row>
+ <Row><s1></s1><s2></s2><s3></s3><s4></s4><s5></s5><s6></s6><s7></s7><s8></s8></Row>
+ <Row><s1>	 a b	c 	 </s1><s2> 	 a b	c 	</s2><s3>	 a b	c 	</s3><s4>	ab	c	</s4><s5>a b	c 	 </s5><s6> 	 a b	c</s6><s7>a b	c</s7><s8>abc</s8></Row>
+ <Row><s1>abc</s1><s2>abc</s2><s3>abc</s3><s4>abc</s4><s5>abc</s5><s6>abc</s6><s7>abc</s7><s8>abc</s8></Row>
+</Dataset>
+<Dataset name='Result 5'>
+ <Row><s1></s1><s2></s2><s3></s3><s4></s4><s5></s5><s6></s6><s7></s7><s8></s8></Row>
+ <Row><s1></s1><s2></s2><s3></s3><s4></s4><s5></s5><s6></s6><s7></s7><s8></s8></Row>
+ <Row><s1>	 a b	c 	 </s1><s2> 	 a b	c 	</s2><s3>	 a b	c 	</s3><s4>	ab	c	</s4><s5>a b	c 	 </s5><s6> 	 a b	c</s6><s7>a b	c</s7><s8>abc</s8></Row>
+ <Row><s1>abc</s1><s2>abc</s2><s3>abc</s3><s4>abc</s4><s5>abc</s5><s6>abc</s6><s7>abc</s7><s8>abc</s8></Row>
+</Dataset>
+<Dataset name='Result 6'>
+ <Row><s1></s1><s2></s2><s3></s3><s4></s4><s5></s5><s6></s6><s7></s7><s8></s8></Row>
+ <Row><s1></s1><s2></s2><s3></s3><s4></s4><s5></s5><s6></s6><s7></s7><s8></s8></Row>
+ <Row><s1>	 a b	c 	 </s1><s2> 	 a b	c 	</s2><s3>	 a b	c 	</s3><s4>	ab	c	</s4><s5>a b	c 	 </s5><s6> 	 a b	c</s6><s7>a b	c</s7><s8>abc</s8></Row>
+ <Row><s1>abc</s1><s2>abc</s2><s3>abc</s3><s4>abc</s4><s5>abc</s5><s6>abc</s6><s7>abc</s7><s8>abc</s8></Row>
+</Dataset>
+<Dataset name='Result 7'>
+ <Row><s1></s1><s2></s2><s3></s3><s4></s4><s5></s5><s6></s6><s7></s7><s8></s8></Row>
+ <Row><s1></s1><s2></s2><s3></s3><s4></s4><s5></s5><s6></s6><s7></s7><s8></s8></Row>
+ <Row><s1>	 a b	c 	 </s1><s2> 	 a b	c 	</s2><s3>	 a b	c 	</s3><s4>	ab	c	</s4><s5>a b	c 	 </s5><s6> 	 a b	c</s6><s7>a b	c</s7><s8>abc</s8></Row>
+ <Row><s1>abc</s1><s2>abc</s2><s3>abc</s3><s4>abc</s4><s5>abc</s5><s6>abc</s6><s7>abc</s7><s8>abc</s8></Row>
+</Dataset>
+<Dataset name='Result 8'>
+ <Row><s1></s1><s2></s2><s3></s3><s4></s4><s5></s5><s6></s6><s7></s7><s8></s8></Row>
+ <Row><s1></s1><s2></s2><s3></s3><s4></s4><s5></s5><s6></s6><s7></s7><s8></s8></Row>
+ <Row><s1>	 a b	c 	 </s1><s2> 	 a b	c 	</s2><s3>	 a b	c 	</s3><s4>	ab	c	</s4><s5>a b	c 	 </s5><s6> 	 a b	c</s6><s7>a b	c</s7><s8>abc</s8></Row>
+ <Row><s1>abc</s1><s2>abc</s2><s3>abc</s3><s4>abc</s4><s5>abc</s5><s6>abc</s6><s7>abc</s7><s8>abc</s8></Row>
+</Dataset>
+<Dataset name='Result 9'>
+ <Row><s1></s1><s2></s2><s3></s3><s4></s4><s5></s5><s6></s6><s7></s7><s8></s8></Row>
+ <Row><s1></s1><s2></s2><s3></s3><s4></s4><s5></s5><s6></s6><s7></s7><s8></s8></Row>
+ <Row><s1>	 a b	c 	 </s1><s2> 	 a b	c 	</s2><s3>	 a b	c 	</s3><s4>	ab	c	</s4><s5>a b	c 	 </s5><s6> 	 a b	c</s6><s7>a b	c</s7><s8>abc</s8></Row>
+ <Row><s1>abc</s1><s2>abc</s2><s3>abc</s3><s4>abc</s4><s5>abc</s5><s6>abc</s6><s7>abc</s7><s8>abc</s8></Row>
+</Dataset>
+<Dataset name='Result 10'>
+ <Row><s1></s1><s2></s2><s3></s3><s4></s4><s5></s5><s6></s6><s7></s7><s8></s8></Row>
+ <Row><s1></s1><s2></s2><s3></s3><s4></s4><s5></s5><s6></s6><s7></s7><s8></s8></Row>
+ <Row><s1>	 a b	c 	 </s1><s2> 	 a b	c 	</s2><s3>	 a b	c 	</s3><s4>	ab	c	</s4><s5>a b	c 	 </s5><s6> 	 a b	c</s6><s7>a b	c</s7><s8>abc</s8></Row>
+ <Row><s1>abc</s1><s2>abc</s2><s3>abc</s3><s4>abc</s4><s5>abc</s5><s6>abc</s6><s7>abc</s7><s8>abc</s8></Row>
+</Dataset>

+ 63 - 0
testing/regress/ecl/trim.ecl

@@ -0,0 +1,63 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2018 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+// Test variations: left, right, both, all
+// Test variations: normal, whitespace
+// Test variations: utf8, string, varstring, unicode, varunicode
+// Test variations: const folded, not const folded
+// Test variations: spaces present, spaces absent, whitespace present, empty, all whitespace
+
+declare_tests(testtype) := MACRO
+#uniquename(rawinput)
+#uniquename(input)
+#uniquename(output)
+#uniquename(trans)
+%rawinput% := DATASET([{''},{' '},{' \t a b\tc \t '},{'abc'}], { testtype s; });
+%input% := NOFOLD(%rawinput%);
+
+%output% := RECORD
+ testtype s1; 
+ testtype s2; 
+ testtype s3; 
+ testtype s4; 
+ testtype s5; 
+ testtype s6; 
+ testtype s7; 
+ testtype s8; 
+END;
+
+%output% %trans%(%input% L) := TRANSFORM
+  SELF.s1 := TRIM(L.s, LEFT);
+  SELF.s2 := TRIM(L.s, RIGHT);
+  SELF.s3 := TRIM(L.s, LEFT, RIGHT);
+  SELF.s4 := TRIM(L.s, ALL);
+  SELF.s5 := TRIM(L.s, LEFT, WHITESPACE);
+  SELF.s6 := TRIM(L.s, RIGHT, WHITESPACE);
+  SELF.s7 := TRIM(L.s, LEFT, RIGHT, WHITESPACE);
+  SELF.s8 := TRIM(L.s, ALL, WHITESPACE);
+END;
+
+OUTPUT(project(%rawinput%, %trans%(LEFT)));
+OUTPUT(project(%input%, %trans%(LEFT)));
+
+ENDMACRO;
+
+declare_tests(utf8);
+declare_tests(string);
+declare_tests(varstring);
+declare_tests(unicode);
+declare_tests(varunicode);