Bläddra i källkod

Merge pull request #8003 from ghalliday/issue14461

HPCC-14461 Fix SEH in PARSE on empty computed string

Reviewed-By: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 9 år sedan
förälder
incheckning
4b6dbafb7e
3 ändrade filer med 63 tillägg och 34 borttagningar
  1. 37 34
      common/thorhelper/thorrparse.cpp
  2. 2 0
      testing/regress/ecl/key/parse2.xml
  3. 24 0
      testing/regress/ecl/parse2.ecl

+ 37 - 34
common/thorhelper/thorrparse.cpp

@@ -3391,43 +3391,46 @@ bool RegexParser::performMatch(IMatchedAction & action, const void * row, unsign
         const byte * end = endData - algo->minPatternLength;
 
         RegexState state(cache, algo->kind, helper, this, algo->inputFormat, len, start);
-        state.row = row;
-        state.processor = &action;
-        state.best = NULL;
-        for (const byte * curScan = start; curScan <= end;)
+        if (len >= algo->minPatternLength)
         {
-            state.cur = curScan;
-            state.top.start = curScan;
-            state.nextScanPosition = NULL;
-            state.score = 0;
-            if (!algo->singleChoicePerLine)
-                state.best = NULL;
-            if ((size32_t)(endData - curScan) > maxSize)
+            state.row = row;
+            state.processor = &action;
+            state.best = NULL;
+            for (const byte * curScan = start; curScan <= end;)
             {
-                state.end = curScan + (maxSize + charWidth);
-                state.lengthIsLimited = true;
-            }
-            else
-            {
-                state.end = endData;
-                state.lengthIsLimited = false;
-            }
-            algo->match(state);
-            if (state.numMatched >= algo->keepLimit)
-                break;
-            if (state.numMatched > algo->atMostLimit)
-            {
-                results.reset();
-                return false;
+                state.cur = curScan;
+                state.top.start = curScan;
+                state.nextScanPosition = NULL;
+                state.score = 0;
+                if (!algo->singleChoicePerLine)
+                    state.best = NULL;
+                if ((size32_t)(endData - curScan) > maxSize)
+                {
+                    state.end = curScan + (maxSize + charWidth);
+                    state.lengthIsLimited = true;
+                }
+                else
+                {
+                    state.end = endData;
+                    state.lengthIsLimited = false;
+                }
+                algo->match(state);
+                if (state.numMatched >= algo->keepLimit)
+                    break;
+                if (state.numMatched > algo->atMostLimit)
+                {
+                    results.reset();
+                    return false;
+                }
+                if (algo->scanAction == INlpParseAlgorithm::NlpScanWhole)
+                    break;
+                if (state.numMatched && (algo->scanAction == INlpParseAlgorithm::NlpScanNone))
+                    break;
+                if (state.nextScanPosition && (algo->scanAction == INlpParseAlgorithm::NlpScanNext) && (curScan != state.nextScanPosition))
+                    curScan = state.nextScanPosition;
+                else
+                    curScan += charWidth;
             }
-            if (algo->scanAction == INlpParseAlgorithm::NlpScanWhole)
-                break;
-            if (state.numMatched && (algo->scanAction == INlpParseAlgorithm::NlpScanNone))
-                break;
-            if (state.nextScanPosition && (algo->scanAction == INlpParseAlgorithm::NlpScanNext) && (curScan != state.nextScanPosition))
-                curScan = state.nextScanPosition;
-            else
-                curScan += charWidth;
         }
 
         if (state.numMatched == 0)

+ 2 - 0
testing/regress/ecl/key/parse2.xml

@@ -0,0 +1,2 @@
+<Dataset name='Result 1'>
+</Dataset>

+ 24 - 0
testing/regress/ecl/parse2.ecl

@@ -0,0 +1,24 @@
+rec := record
+  unsigned8 id;
+  unicode   searchText;
+end;
+
+cleansedFieldInline := dataset([{6420, ''}], rec);
+
+pattern words := pattern('[^,;]+');
+pattern sepchar := [',',';','AND'];
+rule termsRule := FIRST words sepchar |sepchar words LAST | sepchar words sepchar | FIRST words LAST;
+
+normalizeSeperators(unicode str) := regexreplace(u'AND',str,u',');
+
+termsDs := parse(NOFOLD(cleansedFieldInline),
+                 normalizeSeperators(searchText),
+                 termsRule,
+                 transform({rec, unicode terms},
+                           self.terms := trim(matchunicode(words),left,right),
+                           self := left),
+                 SCAN ALL);
+
+sequential (
+  output(termsDs);  // Test parsing an empty string with a pattern that has a minimum match length > 0
+);