13 yıl önce · e5a24ad6fe
--- a/ecllibrary/std/Str.ecl
+++ b/ecllibrary/std/Str.ecl
@@ -338,6 +338,37 @@ EXPORT unsigned4 WordCount(string text) :=
 
				 EXPORT string GetNthWord(string text, unsigned4 n) :=
			
 
				     lib_stringlib.StringLib.StringGetNthWord(text, n);
			
 
				 
			
 
				+/**
			
 
				+ * Returns everything except the first word from the string.  Words are separated by one or more whitespace characters.
			
 
				+ * Whitespace before and after the first word is also removed.
			
 
				+ *
			
 
				+ * @param text          The string to be broken into words.
			
 
				+ * @return              The string excluding the first word.
			
 
				+ */
			
 
				+
			
 
				+EXPORT ExcludeFirstWord(STRING text) := lib_stringlib.Stringlib.StringExcludeNthWord(text, 1);
			
 
				+
			
 
				+/**
			
 
				+ * Returns everything except the last word from the string.  Words are separated by one or more whitespace characters.
			
 
				+ * Whitespace after a word is removed with the word and leading whitespace is removed with the first word.
			
 
				+ *
			
 
				+ * @param text          The string to be broken into words.
			
 
				+ * @return              The string excluding the last word.
			
 
				+ */
			
 
				+
			
 
				+EXPORT ExcludeLastWord(STRING text) := lib_stringlib.Stringlib.StringExcludeLastWord(text);
			
 
				+
			
 
				+/**
			
 
				+ * Returns everything except the nth word from the string.  Words are separated by one or more whitespace characters.
			
 
				+ * Whitespace after a word is removed with the word and leading whitespace is removed with the first word.
			
 
				+ *
			
 
				+ * @param text          The string to be broken into words.
			
 
				+ * @param n             Which word should be returned from the function.
			
 
				+ * @return              The string excluding the nth word.
			
 
				+ */
			
 
				+
			
 
				+EXPORT ExcludeNthWord(STRING text, UNSIGNED2 n) := lib_stringlib.Stringlib.StringExcludeNthWord(text, n);
			
 
				+
			
 
				 /*
			
 
				  * Converts the data value to a sequence of hex pairs.
			
 
				  *
			
--- a/ecllibrary/teststd/str/TestExcludeFirstWord.ecl
+++ b/ecllibrary/teststd/str/TestExcludeFirstWord.ecl
@@ -0,0 +1,34 @@
 
				+/*##############################################################################
			
 
				+
			
 
				+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
			
 
				+
			
 
				+    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+    you may not use this file except in compliance with the License.
			
 
				+    You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+    Unless required by applicable law or agreed to in writing, software
			
 
				+    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+    See the License for the specific language governing permissions and
			
 
				+    limitations under the License.
			
 
				+############################################################################## */
			
 
				+
			
 
				+IMPORT Std.Str;
			
 
				+
			
 
				+EXPORT TestExcludeFirstWord := MODULE
			
 
				+
			
 
				+  EXPORT TestConst := MODULE
			
 
				+    EXPORT Test01 := ASSERT(Str.ExcludeFirstWord('')+'!' = '!', CONST);
			
 
				+    EXPORT Test04 := ASSERT(Str.ExcludeFirstWord('             ')+'!' = '!', CONST);
			
 
				+    EXPORT Test07 := ASSERT(Str.ExcludeFirstWord('x')+'!' = '!');
			
 
				+    EXPORT Test11 := ASSERT(Str.ExcludeFirstWord(' x')+'!' = '!');
			
 
				+    EXPORT Test12 := ASSERT(Str.ExcludeFirstWord('x ')+'!' = '!');
			
 
				+    EXPORT Test15 := ASSERT(Str.ExcludeFirstWord(' abc def ')+'!' = 'def !');
			
 
				+    EXPORT Test17 := ASSERT(Str.ExcludeFirstWord(' a b c   def ')+'!' = 'b c   def !');
			
 
				+    EXPORT Test18 := ASSERT(Str.ExcludeFirstWord(' ,,,, ')+'!' = '!');
			
 
				+    EXPORT Test19 := ASSERT(Str.ExcludeFirstWord(' ,,,, ,,, ')+'!' = ',,, !');
			
 
				+  END;
			
 
				+
			
 
				+END;
			
--- a/ecllibrary/teststd/str/TestExcludeLastWord.ecl
+++ b/ecllibrary/teststd/str/TestExcludeLastWord.ecl
@@ -0,0 +1,35 @@
 
				+/*##############################################################################
			
 
				+
			
 
				+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
			
 
				+
			
 
				+    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+    you may not use this file except in compliance with the License.
			
 
				+    You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+    Unless required by applicable law or agreed to in writing, software
			
 
				+    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+    See the License for the specific language governing permissions and
			
 
				+    limitations under the License.
			
 
				+############################################################################## */
			
 
				+
			
 
				+IMPORT Std.Str;
			
 
				+
			
 
				+EXPORT TestExcludeLastWord := MODULE
			
 
				+
			
 
				+  EXPORT TestConst := MODULE
			
 
				+    EXPORT Test01 := ASSERT(Str.ExcludeLastWord('')+'!' = '!', CONST);
			
 
				+    EXPORT Test04 := ASSERT(Str.ExcludeLastWord('             ')+'!' = '!', CONST);
			
 
				+    EXPORT Test07 := ASSERT(Str.ExcludeLastWord('x')+'!' = '!');
			
 
				+    EXPORT Test11 := ASSERT(Str.ExcludeLastWord(' x')+'!' = '!');
			
 
				+    EXPORT Test12 := ASSERT(Str.ExcludeLastWord('x ')+'!' = '!');
			
 
				+    EXPORT Test13 := ASSERT(Str.ExcludeLastWord(' x ')+'!' = '!');
			
 
				+    EXPORT Test15 := ASSERT(Str.ExcludeLastWord(' abc def ')+'!' = ' abc !');
			
 
				+    EXPORT Test17 := ASSERT(Str.ExcludeLastWord(' a b c   def ')+'!' = ' a b c   !');
			
 
				+    EXPORT Test18 := ASSERT(Str.ExcludeLastWord(' ,,,, ')+'!' = '!');
			
 
				+    EXPORT Test19 := ASSERT(Str.ExcludeLastWord(' ,,,, ,,, ')+'!' = ' ,,,, !');
			
 
				+  END;
			
 
				+
			
 
				+END;
			
--- a/ecllibrary/teststd/str/TestExcludeNthWord.ecl
+++ b/ecllibrary/teststd/str/TestExcludeNthWord.ecl
@@ -0,0 +1,54 @@
 
				+/*##############################################################################
			
 
				+
			
 
				+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
			
 
				+
			
 
				+    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+    you may not use this file except in compliance with the License.
			
 
				+    You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+    Unless required by applicable law or agreed to in writing, software
			
 
				+    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+    See the License for the specific language governing permissions and
			
 
				+    limitations under the License.
			
 
				+############################################################################## */
			
 
				+
			
 
				+IMPORT Std.Str;
			
 
				+
			
 
				+EXPORT TestExcludeNthWord := MODULE
			
 
				+
			
 
				+  EXPORT TestConst := MODULE
			
 
				+    //Check action on a string with no entries.
			
 
				+    EXPORT Test01 := ASSERT(Str.ExcludeNthWord('',0)+'!' = '!', CONST);
			
 
				+    EXPORT Test02 := ASSERT(Str.ExcludeNthWord('',1)+'!' = '!', CONST);
			
 
				+    EXPORT Test03 := ASSERT(Str.ExcludeNthWord('',-1)+'!' = '!', CONST);
			
 
				+    EXPORT Test04 := ASSERT(Str.ExcludeNthWord('             ',0)+'!' = '!', CONST);
			
 
				+    EXPORT Test05 := ASSERT(Str.ExcludeNthWord('             ',1)+'!' = '!', CONST);
			
 
				+    EXPORT Test06 := ASSERT(Str.ExcludeNthWord('             ',-1)+'!' = '!', CONST);
			
 
				+    //Check action on a string containing a single word - with various whitespace
			
 
				+    EXPORT Test07 := ASSERT(Str.ExcludeNthWord('x',0)+'!' = 'x!');
			
 
				+    EXPORT Test08 := ASSERT(Str.ExcludeNthWord('x',1)+'!' = '!');
			
 
				+    EXPORT Test09 := ASSERT(Str.ExcludeNthWord('x',2)+'!' = 'x!');
			
 
				+    EXPORT Test10 := ASSERT(Str.ExcludeNthWord('x',3)+'!' = 'x!');
			
 
				+    EXPORT Test11 := ASSERT(Str.ExcludeNthWord(' x',1)+'!' = '!');
			
 
				+    EXPORT Test12 := ASSERT(Str.ExcludeNthWord('x ',1)+'!' = '!');
			
 
				+    EXPORT Test13 := ASSERT(Str.ExcludeNthWord(' x',2)+'!' = ' x!');
			
 
				+    EXPORT Test14 := ASSERT(Str.ExcludeNthWord(' x ',1)+'!' = '!');
			
 
				+    //Check action on a string containg multiple words - with various whitespace combinations.
			
 
				+    EXPORT Test15 := ASSERT(Str.ExcludeNthWord(' abc def ', 1)+'!' = 'def !');
			
 
				+    EXPORT Test16 := ASSERT(Str.ExcludeNthWord(' abc def ', 2)+'!' = ' abc !');
			
 
				+    EXPORT Test17 := ASSERT(Str.ExcludeNthWord('  a b c   def    ',0)+'!' = '  a b c   def    !');
			
 
				+    EXPORT Test18 := ASSERT(Str.ExcludeNthWord('  a b c   def    ',1)+'!' = 'b c   def    !');
			
 
				+    EXPORT Test19 := ASSERT(Str.ExcludeNthWord('  a b c   def    ',2)+'!' = '  a c   def    !');
			
 
				+    EXPORT Test20 := ASSERT(Str.ExcludeNthWord('  a b c   def    ',3)+'!' = '  a b def    !');
			
 
				+    EXPORT Test21 := ASSERT(Str.ExcludeNthWord('  a b c   def    ',4)+'!' = '  a b c   !');
			
 
				+    EXPORT Test22 := ASSERT(Str.ExcludeNthWord('  a b c   def    ',5)+'!' = '  a b c   def    !');
			
 
				+    EXPORT Test23 := ASSERT(Str.ExcludeNthWord(' ,,,, ',1)+'!' = '!');
			
 
				+    //Test other space characters (< 0x20)
			
 
				+    EXPORT Test24 := ASSERT(Str.ExcludeNthWord('  a b\nc \t  def    ',2)+'!' = '  a c \t  def    !');
			
 
				+    EXPORT Test25 := ASSERT(Str.ExcludeNthWord('  a b\nc \t  def    ',3)+'!' = '  a b\ndef    !');
			
 
				+  END;
			
 
				+
			
 
				+END;
			
--- a/ecllibrary/teststd/str/TestGetNthWord.ecl
+++ b/ecllibrary/teststd/str/TestGetNthWord.ecl
@@ -25,6 +25,7 @@ EXPORT TestGetNthWord := MODULE
 
				     EXPORT Test16 := ASSERT(Str.GetNthWord(' abc def ', 2)+'!' = 'def!');
			
 
				     EXPORT Test17 := ASSERT(Str.GetNthWord(' a b c   def ',3)+'!' = 'c!');
			
 
				     EXPORT Test18 := ASSERT(Str.GetNthWord(' ,,,, ',1)+'!' = ',,,,!');
			
 
				+    EXPORT Test19 := ASSERT(Str.GetNthWord(' a    b c   def ',3)+'!' = 'c!');
			
 
				   END;
			
 
				 
			
 
				 END;
			
--- a/plugins/stringlib/stringlib.cpp
+++ b/plugins/stringlib/stringlib.cpp
@@ -74,6 +74,8 @@ const char * EclDefinition =
 
				 "  unsigned integer4 EditDistanceV2(const string l, const string r) : c, pure,entrypoint='slEditDistanceV2'; \n"
			
 
				 "  boolean EditDistanceWithinRadiusV2(const string l, const string r, unsigned4 radius) : c,pure,entrypoint='slEditDistanceWithinRadiusV2'; \n"
			
 
				 "  string StringGetNthWord(const string src, unsigned4 n) : c, pure,entrypoint='slStringGetNthWord'; \n"
			
 
				+"  string StringExcludeLastWord(const string src) : c, pure,entrypoint='slStringExcludeLastWord'; \n"
			
 
				+"  string StringExcludeNthWord(const string src, unsigned4 n) : c, pure,entrypoint='slStringExcludeNthWord'; \n"
			
 
				 "  unsigned4 StringWordCount(const string src) : c, pure,entrypoint='slStringWordCount'; \n"
			
 
				 "  unsigned4 CountWords(const string src, const string _separator, BOOLEAN allow_blanks) : c, pure,entrypoint='slCountWords'; \n"
			
 
				 "  SET OF STRING SplitWords(const string src, const string _separator, BOOLEAN allow_blanks) : c, pure,entrypoint='slSplitWords'; \n"
			
@@ -1162,12 +1164,17 @@ STRINGLIB_API bool STRINGLIB_CALL slEditDistanceWithinRadiusV2(unsigned leftLen,
 
				     return nsStringlib::editDistanceV3(leftLen, left, rightLen, right, radius) <= radius;
			
 
				 }
			
 
				 
			
 
				+inline bool isWordSeparator(char x)
			
 
				+{
			
 
				+    return (unsigned char)x <= 0x20;
			
 
				+}
			
 
				+
			
 
				 STRINGLIB_API void STRINGLIB_CALL slStringGetNthWord(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned n)
			
 
				 {
			
 
				     const char* start = 0;
			
 
				     const char* end = 0;
			
 
				     // skip any leading white space
			
 
				-    while (srcLen>0 && (unsigned char)*src<=0x20) {
			
 
				+    while (srcLen>0 && isWordSeparator(*src)) {
			
 
				         src++;
			
 
				         srcLen--;
			
 
				     }
			
@@ -1175,13 +1182,13 @@ STRINGLIB_API void STRINGLIB_CALL slStringGetNthWord(unsigned & tgtLen, char * &
 
				         start = src;
			
 
				         n--;
			
 
				         // go to the next white space
			
 
				-        while (srcLen>0 && (unsigned char)*src>0x20) {
			
 
				+        while (srcLen>0 && !isWordSeparator(*src)) {
			
 
				             src++;
			
 
				             srcLen--;
			
 
				         }
			
 
				         end = src;
			
 
				         // skip white space again
			
 
				-        while (srcLen>0 && (unsigned char)*src<=0x20) {
			
 
				+        while (srcLen>0 && isWordSeparator(*src)) {
			
 
				             src++;
			
 
				             srcLen--;
			
 
				         }
			
@@ -1201,7 +1208,7 @@ STRINGLIB_API unsigned STRINGLIB_CALL slStringWordCount(unsigned srcLen,const ch
 
				 {
			
 
				     // skip any leading white space
			
 
				     unsigned word_count = 0;
			
 
				-    while (srcLen>0 && (unsigned char)*src<=0x20) {
			
 
				+    while (srcLen>0 && isWordSeparator(*src)) {
			
 
				         src++;
			
 
				         srcLen--;
			
 
				     }
			
@@ -1209,12 +1216,12 @@ STRINGLIB_API unsigned STRINGLIB_CALL slStringWordCount(unsigned srcLen,const ch
 
				     while (srcLen>0) {
			
 
				         word_count++;
			
 
				         // go to the next white space
			
 
				-        while (srcLen>0 && (unsigned char)*src>0x20) {
			
 
				+        while (srcLen>0 && !isWordSeparator(*src)) {
			
 
				             src++;
			
 
				             srcLen--;
			
 
				         }
			
 
				         // skip white space again
			
 
				-        while (srcLen>0 && (unsigned char)*src<=0x20) {
			
 
				+        while (srcLen>0 && isWordSeparator(*src)) {
			
 
				             src++;
			
 
				             srcLen--;
			
 
				         }
			
@@ -1222,6 +1229,79 @@ STRINGLIB_API unsigned STRINGLIB_CALL slStringWordCount(unsigned srcLen,const ch
 
				     return word_count;
			
 
				 }
			
 
				 
			
 
				+STRINGLIB_API void STRINGLIB_CALL slStringExcludeLastWord(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src)
			
 
				+{
			
 
				+    //Remove first word also removes leading whitespace, otherwise just remove trailing whitespace
			
 
				+    unsigned idx = 0;
			
 
				+    unsigned startLast = 0;
			
 
				+    while (idx < srcLen && isWordSeparator(src[idx]))
			
 
				+        idx++;
			
 
				+
			
 
				+    for (;;)
			
 
				+    {
			
 
				+        while (idx < srcLen && !isWordSeparator(src[idx]))
			
 
				+            idx++;
			
 
				+
			
 
				+        while (idx < srcLen && isWordSeparator(src[idx]))
			
 
				+            idx++;
			
 
				+
			
 
				+        if (idx == srcLen)
			
 
				+            break;
			
 
				+
			
 
				+        startLast = idx;
			
 
				+    }
			
 
				+
			
 
				+    unsigned len = startLast;
			
 
				+    tgtLen = len;
			
 
				+    if (len)
			
 
				+    {
			
 
				+        tgt = (char *)CTXMALLOC(parentCtx, len);
			
 
				+        memcpy(tgt,src,len);
			
 
				+    }
			
 
				+    else
			
 
				+        tgt = NULL;
			
 
				+}
			
 
				+
			
 
				+STRINGLIB_API void STRINGLIB_CALL slStringExcludeNthWord(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned n)
			
 
				+{
			
 
				+    unsigned idx = 0;
			
 
				+    unsigned startLast = 0;
			
 
				+    while (idx < srcLen && isWordSeparator(src[idx]))
			
 
				+        idx++;
			
 
				+
			
 
				+    unsigned matchIndex = 0;
			
 
				+    //Remove first word also removes leading whitespace, otherwise just remove trailing whitespace
			
 
				+    //No matching words returns a blank string
			
 
				+    if (idx != srcLen)
			
 
				+    {
			
 
				+        for (;;)
			
 
				+        {
			
 
				+            while (idx < srcLen && !isWordSeparator(src[idx]))
			
 
				+                idx++;
			
 
				+
			
 
				+            while (idx < srcLen && isWordSeparator(src[idx]))
			
 
				+                idx++;
			
 
				+
			
 
				+            if (++matchIndex == n)
			
 
				+                break;
			
 
				+            startLast = idx;
			
 
				+            if (idx == srcLen)
			
 
				+                break;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    unsigned len = startLast + (srcLen - idx);
			
 
				+    tgtLen = len;
			
 
				+    if (len)
			
 
				+    {
			
 
				+        tgt = (char *)CTXMALLOC(parentCtx, len);
			
 
				+        memcpy(tgt,src,startLast);
			
 
				+        memcpy(tgt+startLast,src+idx,(srcLen - idx));
			
 
				+    }
			
 
				+    else
			
 
				+        tgt = NULL;
			
 
				+}
			
 
				+
			
 
				 //--------------------------------------------------------------------------------------------------------------------
			
 
				 
			
 
				 STRINGLIB_API unsigned STRINGLIB_CALL slCountWords(size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems)
			
--- a/plugins/stringlib/stringlib.hpp
+++ b/plugins/stringlib/stringlib.hpp
@@ -77,6 +77,8 @@ STRINGLIB_API unsigned STRINGLIB_CALL slEditDistanceV2(unsigned leftLen, const c
 
				 STRINGLIB_API bool STRINGLIB_CALL slEditDistanceWithinRadiusV2(unsigned leftLen, const char * left, unsigned rightLen, const char * right, unsigned radius);
			
 
				 STRINGLIB_API void STRINGLIB_CALL slStringGetNthWord(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned n);
			
 
				 STRINGLIB_API unsigned STRINGLIB_CALL slStringWordCount(unsigned srcLen, const char * src);
			
 
				+STRINGLIB_API void STRINGLIB_CALL slStringExcludeLastWord(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src);
			
 
				+STRINGLIB_API void STRINGLIB_CALL slStringExcludeNthWord(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src, unsigned n);
			
 
				 STRINGLIB_API unsigned STRINGLIB_CALL slCountWords(size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems);
			
 
				 STRINGLIB_API void STRINGLIB_CALL slSplitWords(bool & __isAllResult, size32_t & __lenResult, void * & __result, size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems);
			
 
				 STRINGLIB_API void STRINGLIB_CALL slCombineWords(size32_t & __lenResult, void * & __result, bool isAllSrc, size32_t lenSrc, const char * src, size32_t lenSeparator, const char * separator, bool allowBlankItems);