Kaynağa Gözat

HPCC-674 Add new word remove functions to Std.Str

Add functions to remove the first, last or nth words from a string.

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 13 yıl önce
ebeveyn
işleme
e5a24ad6fe

+ 31 - 0
ecllibrary/std/Str.ecl

@@ -338,6 +338,37 @@ EXPORT unsigned4 WordCount(string text) :=
 EXPORT string GetNthWord(string text, unsigned4 n) :=
     lib_stringlib.StringLib.StringGetNthWord(text, n);
 
+/**
+ * Returns everything except the first word from the string.  Words are separated by one or more whitespace characters.
+ * Whitespace before and after the first word is also removed.
+ *
+ * @param text          The string to be broken into words.
+ * @return              The string excluding the first word.
+ */
+
+EXPORT ExcludeFirstWord(STRING text) := lib_stringlib.Stringlib.StringExcludeNthWord(text, 1);
+
+/**
+ * Returns everything except the last word from the string.  Words are separated by one or more whitespace characters.
+ * Whitespace after a word is removed with the word and leading whitespace is removed with the first word.
+ *
+ * @param text          The string to be broken into words.
+ * @return              The string excluding the last word.
+ */
+
+EXPORT ExcludeLastWord(STRING text) := lib_stringlib.Stringlib.StringExcludeLastWord(text);
+
+/**
+ * Returns everything except the nth word from the string.  Words are separated by one or more whitespace characters.
+ * Whitespace after a word is removed with the word and leading whitespace is removed with the first word.
+ *
+ * @param text          The string to be broken into words.
+ * @param n             Which word should be returned from the function.
+ * @return              The string excluding the nth word.
+ */
+
+EXPORT ExcludeNthWord(STRING text, UNSIGNED2 n) := lib_stringlib.Stringlib.StringExcludeNthWord(text, n);
+
 /*
  * Converts the data value to a sequence of hex pairs.
  *

+ 34 - 0
ecllibrary/teststd/str/TestExcludeFirstWord.ecl

@@ -0,0 +1,34 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+IMPORT Std.Str;
+
+EXPORT TestExcludeFirstWord := MODULE
+
+  EXPORT TestConst := MODULE
+    EXPORT Test01 := ASSERT(Str.ExcludeFirstWord('')+'!' = '!', CONST);
+    EXPORT Test04 := ASSERT(Str.ExcludeFirstWord('             ')+'!' = '!', CONST);
+    EXPORT Test07 := ASSERT(Str.ExcludeFirstWord('x')+'!' = '!');
+    EXPORT Test11 := ASSERT(Str.ExcludeFirstWord(' x')+'!' = '!');
+    EXPORT Test12 := ASSERT(Str.ExcludeFirstWord('x ')+'!' = '!');
+    EXPORT Test15 := ASSERT(Str.ExcludeFirstWord(' abc def ')+'!' = 'def !');
+    EXPORT Test17 := ASSERT(Str.ExcludeFirstWord(' a b c   def ')+'!' = 'b c   def !');
+    EXPORT Test18 := ASSERT(Str.ExcludeFirstWord(' ,,,, ')+'!' = '!');
+    EXPORT Test19 := ASSERT(Str.ExcludeFirstWord(' ,,,, ,,, ')+'!' = ',,, !');
+  END;
+
+END;

+ 35 - 0
ecllibrary/teststd/str/TestExcludeLastWord.ecl

@@ -0,0 +1,35 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+IMPORT Std.Str;
+
+EXPORT TestExcludeLastWord := MODULE
+
+  EXPORT TestConst := MODULE
+    EXPORT Test01 := ASSERT(Str.ExcludeLastWord('')+'!' = '!', CONST);
+    EXPORT Test04 := ASSERT(Str.ExcludeLastWord('             ')+'!' = '!', CONST);
+    EXPORT Test07 := ASSERT(Str.ExcludeLastWord('x')+'!' = '!');
+    EXPORT Test11 := ASSERT(Str.ExcludeLastWord(' x')+'!' = '!');
+    EXPORT Test12 := ASSERT(Str.ExcludeLastWord('x ')+'!' = '!');
+    EXPORT Test13 := ASSERT(Str.ExcludeLastWord(' x ')+'!' = '!');
+    EXPORT Test15 := ASSERT(Str.ExcludeLastWord(' abc def ')+'!' = ' abc !');
+    EXPORT Test17 := ASSERT(Str.ExcludeLastWord(' a b c   def ')+'!' = ' a b c   !');
+    EXPORT Test18 := ASSERT(Str.ExcludeLastWord(' ,,,, ')+'!' = '!');
+    EXPORT Test19 := ASSERT(Str.ExcludeLastWord(' ,,,, ,,, ')+'!' = ' ,,,, !');
+  END;
+
+END;

+ 54 - 0
ecllibrary/teststd/str/TestExcludeNthWord.ecl

@@ -0,0 +1,54 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+IMPORT Std.Str;
+
+EXPORT TestExcludeNthWord := MODULE
+
+  EXPORT TestConst := MODULE
+    //Check action on a string with no entries.
+    EXPORT Test01 := ASSERT(Str.ExcludeNthWord('',0)+'!' = '!', CONST);
+    EXPORT Test02 := ASSERT(Str.ExcludeNthWord('',1)+'!' = '!', CONST);
+    EXPORT Test03 := ASSERT(Str.ExcludeNthWord('',-1)+'!' = '!', CONST);
+    EXPORT Test04 := ASSERT(Str.ExcludeNthWord('             ',0)+'!' = '!', CONST);
+    EXPORT Test05 := ASSERT(Str.ExcludeNthWord('             ',1)+'!' = '!', CONST);
+    EXPORT Test06 := ASSERT(Str.ExcludeNthWord('             ',-1)+'!' = '!', CONST);
+    //Check action on a string containing a single word - with various whitespace
+    EXPORT Test07 := ASSERT(Str.ExcludeNthWord('x',0)+'!' = 'x!');
+    EXPORT Test08 := ASSERT(Str.ExcludeNthWord('x',1)+'!' = '!');
+    EXPORT Test09 := ASSERT(Str.ExcludeNthWord('x',2)+'!' = 'x!');
+    EXPORT Test10 := ASSERT(Str.ExcludeNthWord('x',3)+'!' = 'x!');
+    EXPORT Test11 := ASSERT(Str.ExcludeNthWord(' x',1)+'!' = '!');
+    EXPORT Test12 := ASSERT(Str.ExcludeNthWord('x ',1)+'!' = '!');
+    EXPORT Test13 := ASSERT(Str.ExcludeNthWord(' x',2)+'!' = ' x!');
+    EXPORT Test14 := ASSERT(Str.ExcludeNthWord(' x ',1)+'!' = '!');
+    //Check action on a string containg multiple words - with various whitespace combinations.
+    EXPORT Test15 := ASSERT(Str.ExcludeNthWord(' abc def ', 1)+'!' = 'def !');
+    EXPORT Test16 := ASSERT(Str.ExcludeNthWord(' abc def ', 2)+'!' = ' abc !');
+    EXPORT Test17 := ASSERT(Str.ExcludeNthWord('  a b c   def    ',0)+'!' = '  a b c   def    !');
+    EXPORT Test18 := ASSERT(Str.ExcludeNthWord('  a b c   def    ',1)+'!' = 'b c   def    !');
+    EXPORT Test19 := ASSERT(Str.ExcludeNthWord('  a b c   def    ',2)+'!' = '  a c   def    !');
+    EXPORT Test20 := ASSERT(Str.ExcludeNthWord('  a b c   def    ',3)+'!' = '  a b def    !');
+    EXPORT Test21 := ASSERT(Str.ExcludeNthWord('  a b c   def    ',4)+'!' = '  a b c   !');
+    EXPORT Test22 := ASSERT(Str.ExcludeNthWord('  a b c   def    ',5)+'!' = '  a b c   def    !');
+    EXPORT Test23 := ASSERT(Str.ExcludeNthWord(' ,,,, ',1)+'!' = '!');
+    //Test other space characters (< 0x20)
+    EXPORT Test24 := ASSERT(Str.ExcludeNthWord('  a b\nc \t  def    ',2)+'!' = '  a c \t  def    !');
+    EXPORT Test25 := ASSERT(Str.ExcludeNthWord('  a b\nc \t  def    ',3)+'!' = '  a b\ndef    !');
+  END;
+
+END;

+ 1 - 0
ecllibrary/teststd/str/TestGetNthWord.ecl

@@ -25,6 +25,7 @@ EXPORT TestGetNthWord := MODULE
     EXPORT Test16 := ASSERT(Str.GetNthWord(' abc def ', 2)+'!' = 'def!');
     EXPORT Test17 := ASSERT(Str.GetNthWord(' a b c   def ',3)+'!' = 'c!');
     EXPORT Test18 := ASSERT(Str.GetNthWord(' ,,,, ',1)+'!' = ',,,,!');
+    EXPORT Test19 := ASSERT(Str.GetNthWord(' a    b c   def ',3)+'!' = 'c!');
   END;
 
 END;

+ 86 - 6
plugins/stringlib/stringlib.cpp

@@ -74,6 +74,8 @@ const char * EclDefinition =
 "  unsigned integer4 EditDistanceV2(const string l, const string r) : c, pure,entrypoint='slEditDistanceV2'; \n"
 "  boolean EditDistanceWithinRadiusV2(const string l, const string r, unsigned4 radius) : c,pure,entrypoint='slEditDistanceWithinRadiusV2'; \n"
 "  string StringGetNthWord(const string src, unsigned4 n) : c, pure,entrypoint='slStringGetNthWord'; \n"
+"  string StringExcludeLastWord(const string src) : c, pure,entrypoint='slStringExcludeLastWord'; \n"
+"  string StringExcludeNthWord(const string src, unsigned4 n) : c, pure,entrypoint='slStringExcludeNthWord'; \n"
 "  unsigned4 StringWordCount(const string src) : c, pure,entrypoint='slStringWordCount'; \n"
 "  unsigned4 CountWords(const string src, const string _separator, BOOLEAN allow_blanks) : c, pure,entrypoint='slCountWords'; \n"
 "  SET OF STRING SplitWords(const string src, const string _separator, BOOLEAN allow_blanks) : c, pure,entrypoint='slSplitWords'; \n"
@@ -1162,12 +1164,17 @@ STRINGLIB_API bool STRINGLIB_CALL slEditDistanceWithinRadiusV2(unsigned leftLen,
     return nsStringlib::editDistanceV3(leftLen, left, rightLen, right, radius) <= radius;
 }
 
+inline bool isWordSeparator(char x)
+{
+    return (unsigned char)x <= 0x20;
+}
+
 STRINGLIB_API void STRINGLIB_CALL slStringGetNthWord(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned n)
 {
     const char* start = 0;
     const char* end = 0;
     // skip any leading white space
-    while (srcLen>0 && (unsigned char)*src<=0x20) {
+    while (srcLen>0 && isWordSeparator(*src)) {
         src++;
         srcLen--;
     }
@@ -1175,13 +1182,13 @@ STRINGLIB_API void STRINGLIB_CALL slStringGetNthWord(unsigned & tgtLen, char * &
         start = src;
         n--;
         // go to the next white space
-        while (srcLen>0 && (unsigned char)*src>0x20) {
+        while (srcLen>0 && !isWordSeparator(*src)) {
             src++;
             srcLen--;
         }
         end = src;
         // skip white space again
-        while (srcLen>0 && (unsigned char)*src<=0x20) {
+        while (srcLen>0 && isWordSeparator(*src)) {
             src++;
             srcLen--;
         }
@@ -1201,7 +1208,7 @@ STRINGLIB_API unsigned STRINGLIB_CALL slStringWordCount(unsigned srcLen,const ch
 {
     // skip any leading white space
     unsigned word_count = 0;
-    while (srcLen>0 && (unsigned char)*src<=0x20) {
+    while (srcLen>0 && isWordSeparator(*src)) {
         src++;
         srcLen--;
     }
@@ -1209,12 +1216,12 @@ STRINGLIB_API unsigned STRINGLIB_CALL slStringWordCount(unsigned srcLen,const ch
     while (srcLen>0) {
         word_count++;
         // go to the next white space
-        while (srcLen>0 && (unsigned char)*src>0x20) {
+        while (srcLen>0 && !isWordSeparator(*src)) {
             src++;
             srcLen--;
         }
         // skip white space again
-        while (srcLen>0 && (unsigned char)*src<=0x20) {
+        while (srcLen>0 && isWordSeparator(*src)) {
             src++;
             srcLen--;
         }
@@ -1222,6 +1229,79 @@ STRINGLIB_API unsigned STRINGLIB_CALL slStringWordCount(unsigned srcLen,const ch
     return word_count;
 }
 
+STRINGLIB_API void STRINGLIB_CALL slStringExcludeLastWord(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src)
+{
+    //Remove first word also removes leading whitespace, otherwise just remove trailing whitespace
+    unsigned idx = 0;
+    unsigned startLast = 0;
+    while (idx < srcLen && isWordSeparator(src[idx]))
+        idx++;
+
+    for (;;)
+    {
+        while (idx < srcLen && !isWordSeparator(src[idx]))
+            idx++;
+
+        while (idx < srcLen && isWordSeparator(src[idx]))
+            idx++;
+
+        if (idx == srcLen)
+            break;
+
+        startLast = idx;
+    }
+
+    unsigned len = startLast;
+    tgtLen = len;
+    if (len)
+    {
+        tgt = (char *)CTXMALLOC(parentCtx, len);
+        memcpy(tgt,src,len);
+    }
+    else
+        tgt = NULL;
+}
+
+STRINGLIB_API void STRINGLIB_CALL slStringExcludeNthWord(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned n)
+{
+    unsigned idx = 0;
+    unsigned startLast = 0;
+    while (idx < srcLen && isWordSeparator(src[idx]))
+        idx++;
+
+    unsigned matchIndex = 0;
+    //Remove first word also removes leading whitespace, otherwise just remove trailing whitespace
+    //No matching words returns a blank string
+    if (idx != srcLen)
+    {
+        for (;;)
+        {
+            while (idx < srcLen && !isWordSeparator(src[idx]))
+                idx++;
+
+            while (idx < srcLen && isWordSeparator(src[idx]))
+                idx++;
+
+            if (++matchIndex == n)
+                break;
+            startLast = idx;
+            if (idx == srcLen)
+                break;
+        }
+    }
+
+    unsigned len = startLast + (srcLen - idx);
+    tgtLen = len;
+    if (len)
+    {
+        tgt = (char *)CTXMALLOC(parentCtx, len);
+        memcpy(tgt,src,startLast);
+        memcpy(tgt+startLast,src+idx,(srcLen - idx));
+    }
+    else
+        tgt = NULL;
+}
+
 //--------------------------------------------------------------------------------------------------------------------
 
 STRINGLIB_API unsigned STRINGLIB_CALL slCountWords(size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems)

+ 2 - 0
plugins/stringlib/stringlib.hpp

@@ -77,6 +77,8 @@ STRINGLIB_API unsigned STRINGLIB_CALL slEditDistanceV2(unsigned leftLen, const c
 STRINGLIB_API bool STRINGLIB_CALL slEditDistanceWithinRadiusV2(unsigned leftLen, const char * left, unsigned rightLen, const char * right, unsigned radius);
 STRINGLIB_API void STRINGLIB_CALL slStringGetNthWord(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned n);
 STRINGLIB_API unsigned STRINGLIB_CALL slStringWordCount(unsigned srcLen, const char * src);
+STRINGLIB_API void STRINGLIB_CALL slStringExcludeLastWord(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src);
+STRINGLIB_API void STRINGLIB_CALL slStringExcludeNthWord(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned n);
 STRINGLIB_API unsigned STRINGLIB_CALL slCountWords(size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems);
 STRINGLIB_API void STRINGLIB_CALL slSplitWords(bool & __isAllResult, size32_t & __lenResult, void * & __result, size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems);
 STRINGLIB_API void STRINGLIB_CALL slCombineWords(size32_t & __lenResult, void * & __result, bool isAllSrc, size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems);