13 gadi atpakaļ · 55b79d1650
--- a/common/thorhelper/thorparse.cpp
+++ b/common/thorhelper/thorparse.cpp
@@ -114,7 +114,6 @@ NlpMatchPath::NlpMatchPath(const UnsignedArray & _ids, const UnsignedArray & _in
 
				         ids.append(_ids.item(idx));
			
 
				         indices.append(_indices.item(idx));
			
 
				     }
			
 
				-    init();
			
 
				 }
			
 
				 
			
 
				 
			
@@ -130,83 +129,12 @@ NlpMatchPath::NlpMatchPath(MemoryBuffer & in)
 
				         ids.append(id);
			
 
				         indices.append(index);
			
 
				     }
			
 
				-    init();
			
 
				 }
			
 
				 
			
 
				 NlpMatchPath::~NlpMatchPath()
			
 
				 {
			
 
				-    delete [] searchIndices;
			
 
				 }
			
 
				 
			
 
				-void NlpMatchPath::init()
			
 
				-{
			
 
				-    maxDepth = ids.ordinality();
			
 
				-    searchIndices = new unsigned[maxDepth];
			
 
				-}
			
 
				-
			
 
				-
			
 
				-IMatchWalker * NlpMatchPath::findInChildren(IMatchWalker * top, regexid_t id)
			
 
				-{
			
 
				-    for (unsigned i = 0;; i++)
			
 
				-    {
			
 
				-        Owned<IMatchWalker> child = top->getChild(i);
			
 
				-        if (!child)
			
 
				-            return NULL;
			
 
				-        IMatchWalker * ret = find(child, id);
			
 
				-        if (ret)
			
 
				-            return ret;
			
 
				-    }
			
 
				-    return NULL;
			
 
				-}
			
 
				-
			
 
				-IMatchWalker * NlpMatchPath::find(IMatchWalker * top, regexid_t id)
			
 
				-{
			
 
				-    unsigned savedSearchDepth = maxSearchDepth;
			
 
				-    if (top->queryID() == id)
			
 
				-    {
			
 
				-        unsigned thisLevelIndex = searchIndices[pathIndex];
			
 
				-        if ((thisLevelIndex == UNKNOWN_INSTANCE) || (thisLevelIndex == 1))
			
 
				-        {
			
 
				-            pathIndex++;
			
 
				-            if (pathIndex == ids.ordinality())
			
 
				-            {
			
 
				-                maxSearchDepth = pathIndex;
			
 
				-                return LINK(top);
			
 
				-            }
			
 
				-
			
 
				-            if (thisLevelIndex == 1)
			
 
				-                maxSearchDepth = pathIndex;
			
 
				-            return findInChildren(top, ids.item(pathIndex));
			
 
				-        }
			
 
				-        else
			
 
				-            searchIndices[pathIndex]--;
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-        Owned<IMatchWalker> ret = findInChildren(top, id);
			
 
				-        //return if matched another level - may have failed to match, or matched completely
			
 
				-        if (savedSearchDepth != maxSearchDepth)
			
 
				-            return ret.getClear();
			
 
				-    }
			
 
				-    return NULL;
			
 
				-}
			
 
				-
			
 
				-IMatchedElement * NlpMatchPath::getMatch(IMatchWalker * top)
			
 
				-{
			
 
				-    CriticalBlock procedure(cs);
			
 
				-
			
 
				-    //MORE: We could allocate searchIndices on the stack and pass as a parameter
			
 
				-    memcpy(searchIndices, indices.getArray(), sizeof(*searchIndices)*maxDepth);
			
 
				-    pathIndex = 0;
			
 
				-    maxSearchDepth = 0;
			
 
				-    Owned<IMatchWalker> state = find(top, ids.item(0));
			
 
				-    if (!state)
			
 
				-        return NULL;
			
 
				-    return new MatchWalker2MatchedElement(state);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-
			
 
				 void NlpMatchPath::serialize(MemoryBuffer & out) const
			
 
				 {
			
 
				     unsigned num = ids.ordinality();
			
@@ -263,20 +191,6 @@ CMatchedResults::~CMatchedResults()
 
				     kill();
			
 
				 }
			
 
				 
			
 
				-//MORE: Implement one that works directly on grammar symbols
			
 
				-void CMatchedResults::extractResults(IMatchWalker * top, const byte * _in, const byte * _rootResult)
			
 
				-{
			
 
				-    in = _in;
			
 
				-    rootResult = _rootResult;
			
 
				-    notMatched.ptr = in;
			
 
				-    ForEachItemIn(idx, def->matchResults)
			
 
				-    {
			
 
				-        ::Release(matched[idx]);
			
 
				-        matched[idx] = def->matchResults.item(idx).getMatch(top);
			
 
				-        if (!matched[idx]) matched[idx] = LINK(&notMatched);
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				 bool CMatchedResults::getMatched(unsigned idx)              
			
 
				 { 
			
 
				     return matched[idx] != &notMatched; 
			
--- a/common/thorhelper/thorparse.hpp
+++ b/common/thorhelper/thorparse.hpp
@@ -70,6 +70,8 @@ public:
 
				     inline _ATOM queryName()                              { return name; }
			
 
				     inline regexid_t queryID()                            { return id; }
			
 
				 
			
 
				+    inline void reset(_ATOM _name, regexid_t _id) { next = NULL; firstChild = NULL; name = _name; id = _id; }
			
 
				+
			
 
				 public:
			
 
				     const byte * start;
			
 
				     const byte * end;
			
--- a/common/thorhelper/thorparse.ipp
+++ b/common/thorhelper/thorparse.ipp
@@ -59,6 +59,13 @@ public:
 
				 
			
 
				 #define UNKNOWN_INSTANCE    ((unsigned)-1)
			
 
				 
			
 
				+class NlpMatchSearchInstance
			
 
				+{
			
 
				+public:
			
 
				+    unsigned lastExactMatchDepth;
			
 
				+    unsigned nextIndex;
			
 
				+};
			
 
				+
			
 
				 class THORHELPER_API NlpMatchPath : public CInterface
			
 
				 {
			
 
				 public:
			
@@ -68,21 +75,25 @@ public:
 
				 
			
 
				     void serialize(MemoryBuffer & buffer) const;
			
 
				 
			
 
				-    IMatchedElement * getMatch(IMatchWalker * top);
			
 
				+    inline unsigned numItems() const { return ids.ordinality(); }
			
 
				+    inline unsigned getId(unsigned i) const { return ids.item(i); }
			
 
				+    inline unsigned getIndex(unsigned i) const { return indices.item(i); }
			
 
				+    inline bool matchAny(unsigned i) const { return indices.item(i) == UNKNOWN_INSTANCE; }
			
 
				+
			
 
				+    inline unsigned nextExactMatchIndex(unsigned from) const
			
 
				+    {
			
 
				+        for (unsigned i=from; i < indices.ordinality(); i++)
			
 
				+        {
			
 
				+            unsigned cur = indices.item(i);
			
 
				+            if (cur != UNKNOWN_INSTANCE)
			
 
				+                return cur;
			
 
				+        }
			
 
				+        return 0;
			
 
				+    }
			
 
				 
			
 
				 protected:
			
 
				-    void init();
			
 
				-    IMatchWalker * find(IMatchWalker * top, regexid_t id);
			
 
				-    IMatchWalker * findInChildren(IMatchWalker * top, regexid_t id);
			
 
				-    
			
 
				-protected:
			
 
				-    unsigned maxDepth;
			
 
				-    unsigned pathIndex;
			
 
				-    unsigned maxSearchDepth;
			
 
				-    unsigned * searchIndices;
			
 
				     UnsignedArray ids;
			
 
				     UnsignedArray indices;
			
 
				-    CriticalSection cs;
			
 
				 };
			
 
				 
			
 
				 
			
@@ -156,7 +167,6 @@ public:
 
				     ~CMatchedResults();
			
 
				     IMPLEMENT_IINTERFACE
			
 
				     
			
 
				-    void extractResults(IMatchWalker * top, const byte * _in, const byte * _rootResult = NULL);
			
 
				     void kill();
			
 
				 
			
 
				     //IMatchedResults
			
--- a/common/thorhelper/thorrparse.cpp
+++ b/common/thorhelper/thorrparse.cpp
@@ -237,40 +237,47 @@ static void removeTrailingSeparator(MatchState & matched)
 
				         matched.end = child->start;
			
 
				 }
			
 
				 
			
 
				-MatchState * RegexMatchPath::find(MatchState * top, regexid_t id)
			
 
				+MatchState * RegexMatchSearchInstance::find(MatchState * top, const NlpMatchPath & path, unsigned depth)
			
 
				 {
			
 
				-    unsigned savedSearchDepth = maxSearchDepth;
			
 
				+    regexid_t id = path.getId(depth);
			
 
				     do
			
 
				     {
			
 
				         if (top->queryID() == id)
			
 
				         {
			
 
				-            unsigned thisLevelIndex = searchIndices[pathIndex];
			
 
				-            if ((thisLevelIndex == UNKNOWN_INSTANCE) || (thisLevelIndex == 1))
			
 
				+            bool matchAny = path.matchAny(depth);
			
 
				+            if (matchAny || (nextIndex == 1))
			
 
				             {
			
 
				-
			
 
				-                pathIndex++;
			
 
				-                if (pathIndex == ids.ordinality())
			
 
				+                if (depth+1 == path.numItems())
			
 
				                 {
			
 
				-                    maxSearchDepth = pathIndex;
			
 
				+                    lastExactMatchDepth = depth+1;
			
 
				                     return top;
			
 
				                 }
			
 
				 
			
 
				-                if (thisLevelIndex == 1)
			
 
				-                    maxSearchDepth = pathIndex;
			
 
				+                if (!matchAny)
			
 
				+                {
			
 
				+                    lastExactMatchDepth = depth+1;
			
 
				+                    nextIndex = path.nextExactMatchIndex(depth+1);
			
 
				+                }
			
 
				+
			
 
				+                MatchState * ret = NULL;
			
 
				+                unsigned prevExactMatchDepth = lastExactMatchDepth;
			
 
				                 if (top->firstChild)
			
 
				-                    return find(top->firstChild, ids.item(pathIndex));
			
 
				-                return NULL;
			
 
				+                    ret = find(top->firstChild, path, depth+1);
			
 
				+                //If must match a child, or one of children had a required match then we have a result
			
 
				+                if (!matchAny || (prevExactMatchDepth != lastExactMatchDepth))
			
 
				+                    return ret;
			
 
				             }
			
 
				             else
			
 
				-                searchIndices[pathIndex]--;
			
 
				+                nextIndex--;
			
 
				         }
			
 
				         else
			
 
				         {
			
 
				             if (top->firstChild)
			
 
				             {
			
 
				-                MatchState * ret = find(top->firstChild, id);
			
 
				+                unsigned prevExactMatchDepth = lastExactMatchDepth;
			
 
				+                MatchState * ret = find(top->firstChild, path, depth);
			
 
				                 //return if matched another level - may have failed to match, or matched completely
			
 
				-                if (savedSearchDepth != maxSearchDepth)
			
 
				+                if (prevExactMatchDepth != lastExactMatchDepth)
			
 
				                     return ret;
			
 
				             }
			
 
				         }
			
@@ -279,14 +286,12 @@ MatchState * RegexMatchPath::find(MatchState * top, regexid_t id)
 
				     return NULL;
			
 
				 }
			
 
				 
			
 
				-IMatchedElement * RegexMatchPath::getMatch(MatchState * top, bool removeTrailingSep)
			
 
				+IMatchedElement * RegexMatchPath::getMatch(MatchState * top, bool removeTrailingSep) const
			
 
				 {
			
 
				-    CriticalBlock procedure(cs);
			
 
				-    //MORE: We could allocate searchIndices on the stack and pass as a parameter
			
 
				-    memcpy(searchIndices, indices.getArray(), sizeof(*searchIndices)*maxDepth);
			
 
				-    pathIndex = 0;
			
 
				-    maxSearchDepth = 0;
			
 
				-    MatchState * state = find(top, ids.item(0));
			
 
				+    RegexMatchSearchInstance search;
			
 
				+    search.lastExactMatchDepth = 0;
			
 
				+    search.nextIndex = nextExactMatchIndex(0);
			
 
				+    MatchState * state = search.find(top, *this, 0);
			
 
				     if (!state)
			
 
				         return NULL;
			
 
				     if (removeTrailingSep)
			
@@ -454,7 +459,8 @@ void RegexPattern::cleanupBeginMatch(ActiveStage & stage, RegexState & state)
 
				     DBGLOG("%*s[%p]Pop Begin Match", patternDepth, "", stage.pattern);
			
 
				 #endif
			
 
				     state.popMatch(stage.extra.matched->save);
			
 
				-    delete stage.extra.matched;
			
 
				+    state.cache.destroyStateSave(stage.extra.matched);
			
 
				+    stage.extra.matched = NULL;
			
 
				 }
			
 
				 
			
 
				 RegexMatchAction RegexPattern::pushStageEndMatch(RegexState & state)
			
@@ -1669,7 +1675,7 @@ RegexMatchAction RegexBeginCheckPattern::match(RegexState & state)
 
				 
			
 
				 RegexMatchAction RegexBeginCheckPattern::beginMatch(RegexState & state)
			
 
				 {
			
 
				-    RegexMatchStateSave * matched = new RegexMatchStateSave(NULL, 0);
			
 
				+    RegexMatchStateSave * matched = state.cache.createStateSave(NULL, 0);
			
 
				     pushStageBeginMatch(state, matched);
			
 
				     return RegexMatchContinue;
			
 
				 }
			
@@ -1978,9 +1984,9 @@ RegexMatchAction RegexNamedPattern::match(RegexState & state)
 
				     return def->match(state, &end, matched);
			
 
				 #else
			
 
				     //Allocate on the heap to make a stack fault less likely
			
 
				-    RegexMatchState * matched = new RegexMatchState(def);
			
 
				+    RegexMatchState * matched = state.cache.createState(def);
			
 
				     RegexMatchAction ret = def->match(state, &end, *matched);
			
 
				-    delete matched;
			
 
				+    state.cache.destroyState(matched);
			
 
				     return ret;
			
 
				 #endif
			
 
				 }
			
@@ -2057,7 +2063,7 @@ void RegexNamedPattern::RegexEndNamedPattern::killStage(ActiveStage & stage, Reg
 
				 
			
 
				 RegexMatchAction RegexNamedPattern::beginMatch(RegexState & state)
			
 
				 {
			
 
				-    RegexMatchStateSave * matched = new RegexMatchStateSave(def);
			
 
				+    RegexMatchStateSave * matched = state.cache.createStateSave(def);
			
 
				     ActiveStage & stage = pushStageBeginMatch(state, matched);
			
 
				     stage.setState(RSfinished);                 // so children don't get processed.
			
 
				     state.namedStack.append(end);
			
@@ -2089,7 +2095,7 @@ RegexMatchAction RegexBeginSeparatorPattern::match(RegexState & state)
 
				 
			
 
				 RegexMatchAction RegexBeginSeparatorPattern::beginMatch(RegexState & state)
			
 
				 {
			
 
				-    RegexMatchStateSave * matched = new RegexMatchStateSave(separatorTagAtom, 0);
			
 
				+    RegexMatchStateSave * matched = state.cache.createStateSave(separatorTagAtom, 0);
			
 
				     pushStageBeginMatch(state, matched);
			
 
				     return RegexMatchContinue;
			
 
				 }
			
@@ -2566,11 +2572,12 @@ RegexMatchAction RegexAsciiDfaPattern::match(RegexState & state)
 
				     }
			
 
				     else
			
 
				     {
			
 
				-        ConstPointerArray matches;
			
 
				+        ConstPointerArray & potentialMatches = state.cache.potentialMatches;
			
 
				+        unsigned prevPotentialMatches = potentialMatches.ordinality();
			
 
				         loop
			
 
				         {
			
 
				             if (states[activeState].accepts())
			
 
				-                matches.append(cur);
			
 
				+                potentialMatches.append(cur);
			
 
				             if (cur == end)
			
 
				                 break;
			
 
				             byte next = *cur++;
			
@@ -2583,13 +2590,15 @@ RegexMatchAction RegexAsciiDfaPattern::match(RegexState & state)
 
				                 break;
			
 
				         }
			
 
				 
			
 
				-        while (matches.ordinality())
			
 
				+        while (potentialMatches.ordinality() > prevPotentialMatches)
			
 
				         {
			
 
				-            state.cur = (const byte *)matches.tos();
			
 
				-            matches.pop();
			
 
				+            state.cur = (const byte *)potentialMatches.pop();
			
 
				             RegexMatchAction ret = matchNext(state);
			
 
				             if (ret != RegexMatchBacktrack)
			
 
				+            {
			
 
				+                potentialMatches.trunc(prevPotentialMatches);
			
 
				                 return ret;
			
 
				+            }
			
 
				         }
			
 
				         return RegexMatchBacktrack;
			
 
				     }
			
@@ -2618,10 +2627,11 @@ void RegexAsciiDfaPattern::toXMLattr(StringBuffer & out, RegexXmlState & state)
 
				         out.append(" token");
			
 
				 }
			
 
				 
			
 
				-
			
 
				 void RegexAsciiDfaPattern::killStage(ActiveStage & stage, RegexState & state)
			
 
				 {
			
 
				-    delete stage.extra.matches;
			
 
				+    ConstPointerArray & potentialMatches = state.cache.potentialMatches;
			
 
				+    unsigned prevPotentialMatches = stage.extra.prevPotentialMatches;
			
 
				+    potentialMatches.trunc(prevPotentialMatches);
			
 
				 }
			
 
				 
			
 
				 RegexMatchAction RegexAsciiDfaPattern::beginMatch(RegexState & state)
			
@@ -2632,8 +2642,8 @@ RegexMatchAction RegexAsciiDfaPattern::beginMatch(RegexState & state)
 
				     const AsciiDfaState * states = dfa.queryStates();
			
 
				     unsigned * transitions = dfa.queryTransitions();
			
 
				     const byte * best = NULL;
			
 
				-    ConstPointerArray * matches = NULL;
			
 
				-
			
 
				+    ConstPointerArray & potentialMatches = state.cache.potentialMatches;
			
 
				+    const unsigned prevPotentialMatches = potentialMatches.ordinality();
			
 
				     loop
			
 
				     {
			
 
				         if (states[activeState].accepts())
			
@@ -2642,12 +2652,9 @@ RegexMatchAction RegexAsciiDfaPattern::beginMatch(RegexState & state)
 
				                 best = cur;
			
 
				             else
			
 
				             {
			
 
				-                if (!matches)
			
 
				-                {
			
 
				-                    matches = new ConstPointerArray;
			
 
				-                    matches->append(best);
			
 
				-                }
			
 
				-                matches->append(cur);
			
 
				+                if (prevPotentialMatches == potentialMatches.ordinality())
			
 
				+                    potentialMatches.append(best);
			
 
				+                potentialMatches.append(cur);
			
 
				             }
			
 
				         }
			
 
				         if (cur == end)
			
@@ -2666,10 +2673,12 @@ RegexMatchAction RegexAsciiDfaPattern::beginMatch(RegexState & state)
 
				         return RegexMatchBacktrack;
			
 
				 
			
 
				     ActiveStage & stage = pushStage(state);
			
 
				-    stage.extra.matches = matches;
			
 
				+    stage.extra.prevPotentialMatches = prevPotentialMatches;
			
 
				     if (matchesToken)
			
 
				         stage.flags |= RFbeginToken; 
			
 
				-    if (!matches)
			
 
				+
			
 
				+    //Only a single match, therefore no need to backtrack.
			
 
				+    if (prevPotentialMatches == potentialMatches.ordinality())
			
 
				     {
			
 
				         stage.followPosition = best;
			
 
				         stage.setMatched();
			
@@ -2683,14 +2692,16 @@ RegexMatchAction RegexAsciiDfaPattern::beginMatch(RegexState & state)
 
				 
			
 
				 RegexMatchAction RegexAsciiDfaPattern::nextAction(ActiveStage & stage, RegexState & state)
			
 
				 {
			
 
				+    ConstPointerArray & potentialMatches = state.cache.potentialMatches;
			
 
				+    unsigned prevPotentialMatches = stage.extra.prevPotentialMatches;
			
 
				+    assertex(prevPotentialMatches <= potentialMatches.ordinality());
			
 
				     switch (stage.getState())
			
 
				     {
			
 
				     case RSretry:
			
 
				         {
			
 
				-            ConstPointerArray * matches = stage.extra.matches;
			
 
				-            if (matches && matches->ordinality())
			
 
				+            if (prevPotentialMatches < potentialMatches.ordinality())
			
 
				             {
			
 
				-                stage.followPosition = (const byte *)matches->pop();
			
 
				+                stage.followPosition = (const byte *)potentialMatches.pop();
			
 
				                 stage.setMatched();
			
 
				                 return RegexMatchContinue;
			
 
				             }
			
@@ -3108,6 +3119,54 @@ RegexPattern * deserializeRegex(MemoryBuffer & in)
 
				 
			
 
				 //---------------------------------------------------------------------------
			
 
				 
			
 
				+RegexMatchState * RegexStateCache::createState(RegexNamed * def)
			
 
				+{
			
 
				+    if (matchStates.ordinality())
			
 
				+    {
			
 
				+        RegexMatchState * ret = &matchStates.popGet();
			
 
				+        ret->reset(def);
			
 
				+        return ret;
			
 
				+    }
			
 
				+
			
 
				+    return new RegexMatchState(def);
			
 
				+}
			
 
				+
			
 
				+void RegexStateCache::destroyState(RegexMatchState * state)
			
 
				+{
			
 
				+    matchStates.append(*state);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+RegexMatchStateSave * RegexStateCache::createStateSave(RegexNamed * def)
			
 
				+{
			
 
				+    if (matchStateSaves.ordinality())
			
 
				+    {
			
 
				+        RegexMatchStateSave * ret = &matchStateSaves.popGet();
			
 
				+        ret->reset(def);
			
 
				+        return ret;
			
 
				+    }
			
 
				+
			
 
				+    return new RegexMatchStateSave(def);
			
 
				+}
			
 
				+
			
 
				+RegexMatchStateSave * RegexStateCache::createStateSave(_ATOM _name, regexid_t _id)
			
 
				+{
			
 
				+    if (matchStateSaves.ordinality())
			
 
				+    {
			
 
				+        RegexMatchStateSave * ret = &matchStateSaves.popGet();
			
 
				+        ret->reset(_name, _id);
			
 
				+        return ret;
			
 
				+    }
			
 
				+
			
 
				+    return new RegexMatchStateSave(_name, _id);
			
 
				+}
			
 
				+
			
 
				+void RegexStateCache::destroyStateSave(RegexMatchStateSave * state)
			
 
				+{
			
 
				+    matchStateSaves.append(*state);
			
 
				+}
			
 
				+
			
 
				+
			
 
				 void RegexState::processPattern(RegexPattern * grammar)
			
 
				 {
			
 
				     if (implementation == NLPAregexStack)
			
@@ -3331,7 +3390,7 @@ bool RegexParser::performMatch(IMatchedAction & action, const void * row, unsign
 
				         const byte * endData = start + len;
			
 
				         const byte * end = endData - algo->minPatternLength;
			
 
				 
			
 
				-        RegexState state(algo->kind, helper, this, algo->inputFormat, len, start);
			
 
				+        RegexState state(cache, algo->kind, helper, this, algo->inputFormat, len, start);
			
 
				         state.row = row;
			
 
				         state.processor = &action;
			
 
				         state.best = NULL;
			
--- a/common/thorhelper/thorrparse.ipp
+++ b/common/thorhelper/thorrparse.ipp
@@ -30,17 +30,20 @@
 
				 //#define TRACE_REGEX
			
 
				 #endif
			
 
				 
			
 
				+
			
 
				+class RegexMatchSearchInstance : public NlpMatchSearchInstance
			
 
				+{
			
 
				+public:
			
 
				+    MatchState * find(MatchState * top, const NlpMatchPath & path, unsigned depth);
			
 
				+};
			
 
				+
			
 
				 class THORHELPER_API RegexMatchPath : public NlpMatchPath
			
 
				 {
			
 
				 public:
			
 
				     RegexMatchPath(MemoryBuffer & in) : NlpMatchPath(in) {}
			
 
				     RegexMatchPath(const UnsignedArray & _ids, const UnsignedArray & _indices) : NlpMatchPath(_ids, _indices) {}
			
 
				 
			
 
				-    IMatchedElement * getMatch(MatchState * top, bool removeTrailingSeparator);
			
 
				-
			
 
				-protected:
			
 
				-    void init();
			
 
				-    MatchState * find(MatchState * top, regexid_t id);
			
 
				+    IMatchedElement * getMatch(MatchState * top, bool removeTrailingSeparator) const;
			
 
				 };
			
 
				 
			
 
				 class THORHELPER_API CRegexMatchedResultInfo : public CMatchedResultInfo
			
@@ -112,7 +115,7 @@ public:
 
				         MatchSaveState saved;
			
 
				         RegexMatchStateSave * matched;
			
 
				         RegexPattern * nextPattern;
			
 
				-        ConstPointerArray * matches;
			
 
				+        unsigned prevPotentialMatches;
			
 
				         RegexRepeatInstance * repeatInstance;
			
 
				         const byte * limit;
			
 
				     } extra;
			
@@ -130,12 +133,16 @@ MAKECopyArrayOf(ActiveStage, ActiveStage &, ActiveStageArray);
 
				 
			
 
				 class RegexState;
			
 
				 // Used to represent a single match in the regular expression tree.  Also 
			
 
				-class THORHELPER_API RegexMatchState : public MatchState
			
 
				+class THORHELPER_API RegexMatchState : public CInterface, public MatchState
			
 
				 {
			
 
				 public:
			
 
				     RegexMatchState() : MatchState() { }
			
 
				     RegexMatchState(_ATOM _name, regexid_t _id) : MatchState(_name, _id) { }
			
 
				     RegexMatchState(RegexNamed * owner) : MatchState(owner->queryName(), owner->queryID()) {}
			
 
				+    IMPLEMENT_IINTERFACE
			
 
				+
			
 
				+    using MatchState::reset;
			
 
				+    void reset(RegexNamed * owner) { MatchState::reset(owner->queryName(), owner->queryID()); }
			
 
				 };
			
 
				 
			
 
				 class THORHELPER_API RegexMatchStateSave : public RegexMatchState
			
@@ -150,11 +157,29 @@ public:
 
				 };
			
 
				 
			
 
				 struct RegexMatchInfo;
			
 
				+class RegexStateCache
			
 
				+{
			
 
				+public:
			
 
				+    RegexMatchState * createState(RegexNamed * def);
			
 
				+    RegexMatchStateSave * createStateSave(RegexNamed * def);
			
 
				+    RegexMatchStateSave * createStateSave(_ATOM _name, regexid_t _id);
			
 
				+    void destroyState(RegexMatchState * state);
			
 
				+    void destroyStateSave(RegexMatchStateSave * state);
			
 
				+
			
 
				+    CIArrayOf<RegexMatchState> matchStates;
			
 
				+    CIArrayOf<RegexMatchStateSave> matchStateSaves;
			
 
				+    ConstPointerArray potentialMatches;
			
 
				+};
			
 
				+
			
 
				 class RegexState : public NlpState
			
 
				 {
			
 
				 public:
			
 
				-    RegexState(unsigned _implementation, INlpHelper * _helper, INlpMatchedAction * _action, NlpInputFormat _inputFormat, size32_t _len, const void * _text) : NlpState(_action, _inputFormat, _len, _text) { implementation = _implementation; numMatched = 0; curActiveStage = NotFound; helper = _helper; }
			
 
				-    RegexState(const RegexState & _state, INlpMatchedAction * _action, size32_t _len, const void * _text) : NlpState(_action, _state.inputFormat, _len, _text) 
			
 
				+    RegexState(RegexStateCache & _cache, unsigned _implementation, INlpHelper * _helper, INlpMatchedAction * _action, NlpInputFormat _inputFormat, size32_t _len, const void * _text)
			
 
				+    : NlpState(_action, _inputFormat, _len, _text), cache(_cache)
			
 
				+    { implementation = _implementation; numMatched = 0; curActiveStage = NotFound; helper = _helper; }
			
 
				+
			
 
				+    RegexState(const RegexState & _state, INlpMatchedAction * _action, size32_t _len, const void * _text)
			
 
				+    : NlpState(_action, _state.inputFormat, _len, _text), cache(_state.cache)
			
 
				     { 
			
 
				         implementation = _state.implementation; numMatched = 0; curActiveStage = NotFound; helper = _state.helper;
			
 
				     }
			
@@ -168,6 +193,7 @@ protected:
 
				     inline ActiveStage & topStage()                         { return stages.item(curActiveStage); }
			
 
				 
			
 
				 public:
			
 
				+    RegexStateCache & cache;
			
 
				     RegexPatternCopyArray stack;
			
 
				     IMatchedAction * processor;
			
 
				     const byte * nextScanPosition;
			
@@ -1121,6 +1147,7 @@ public:
 
				     RegexAlgorithm * algo;
			
 
				     RegexMatches results;
			
 
				     CRegexMatchedResults matched;
			
 
				+    RegexStateCache cache;
			
 
				     unsigned charWidth;
			
 
				 };
			
 
				 
			
--- a/common/thorhelper/thortalgo.cpp
+++ b/common/thorhelper/thortalgo.cpp
@@ -45,65 +45,63 @@ void TomitaStateInformation::set(const TomitaStateInformation & other)
 
				 
			
 
				 //---------------------------------------------------------------------------
			
 
				 
			
 
				-GrammarSymbol * TomitaMatchPath::findInChildren(GrammarSymbol * top, regexid_t id)
			
 
				+GrammarSymbol * TomitaMatchSearchInstance::findInChildren(GrammarSymbol * top, const TomitaMatchPath & path, unsigned depth)
			
 
				 {
			
 
				+    unsigned prevExactMatchDepth = lastExactMatchDepth;
			
 
				     for (unsigned i = 0;; i++)
			
 
				     {
			
 
				         GrammarSymbol * child = top->queryChild(i);
			
 
				         if (!child)
			
 
				             return NULL;
			
 
				-        GrammarSymbol * ret = find(child, id);
			
 
				-        if (ret)
			
 
				+        GrammarSymbol * ret = find(child, path, depth);
			
 
				+        if (prevExactMatchDepth != lastExactMatchDepth)
			
 
				             return ret;
			
 
				     }
			
 
				     return NULL;
			
 
				 }
			
 
				 
			
 
				-GrammarSymbol * TomitaMatchPath::find(GrammarSymbol * top, regexid_t id)
			
 
				+GrammarSymbol * TomitaMatchSearchInstance::find(GrammarSymbol * top, const TomitaMatchPath & path, unsigned depth)
			
 
				 {
			
 
				-    unsigned savedSearchDepth = maxSearchDepth;
			
 
				     if (top->isPacked())
			
 
				         top = top->queryPacked(choices->getInstance(top));
			
 
				 
			
 
				+    regexid_t id = path.getId(depth);
			
 
				     if (top->getId() == id)
			
 
				     {
			
 
				-        unsigned thisLevelIndex = searchIndices[pathIndex];
			
 
				-        if ((thisLevelIndex == UNKNOWN_INSTANCE) || (thisLevelIndex == 1))
			
 
				+        bool matchAny = path.matchAny(depth);
			
 
				+        if (matchAny || (nextIndex == 1))
			
 
				         {
			
 
				-            pathIndex++;
			
 
				-            if (pathIndex == ids.ordinality())
			
 
				+            if (depth+1 == path.numItems())
			
 
				             {
			
 
				-                maxSearchDepth = pathIndex;
			
 
				+                lastExactMatchDepth = depth+1;
			
 
				                 return top;
			
 
				             }
			
 
				 
			
 
				-            if (thisLevelIndex == 1)
			
 
				-                maxSearchDepth = pathIndex;
			
 
				-            return findInChildren(top, ids.item(pathIndex));
			
 
				+            if (!matchAny)
			
 
				+            {
			
 
				+                lastExactMatchDepth = depth+1;
			
 
				+                nextIndex = path.nextExactMatchIndex(depth+1);
			
 
				+            }
			
 
				+
			
 
				+            return findInChildren(top, path, depth+1);
			
 
				         }
			
 
				         else
			
 
				-            searchIndices[pathIndex]--;
			
 
				+        {
			
 
				+            nextIndex--;
			
 
				+            return NULL;
			
 
				+        }
			
 
				     }
			
 
				     else
			
 
				-    {
			
 
				-        GrammarSymbol * ret = findInChildren(top, id);
			
 
				-        //return if matched another level - may have failed to match, or matched completely
			
 
				-        if (savedSearchDepth != maxSearchDepth)
			
 
				-            return ret;
			
 
				-    }
			
 
				-    return NULL;
			
 
				+        return findInChildren(top, path, depth);
			
 
				 }
			
 
				 
			
 
				-IMatchedElement * TomitaMatchPath::getMatch(GrammarSymbol * top, PackedSymbolChoice & choice)
			
 
				+IMatchedElement * TomitaMatchPath::getMatch(GrammarSymbol * top, PackedSymbolChoice & choice) const
			
 
				 {
			
 
				-    CriticalBlock procedure(cs);
			
 
				-
			
 
				-    //MORE: We could allocate searchIndices on the stack and pass as a parameter
			
 
				-    memcpy(searchIndices, indices.getArray(), sizeof(*searchIndices)*maxDepth);
			
 
				-    pathIndex = 0;
			
 
				-    maxSearchDepth = 0;
			
 
				-    choices = &choice;
			
 
				-    GrammarSymbol * state = find(top, ids.item(0));
			
 
				+    TomitaMatchSearchInstance search;
			
 
				+    search.lastExactMatchDepth = 0;
			
 
				+    search.nextIndex = nextExactMatchIndex(0);
			
 
				+    search.choices = &choice;
			
 
				+    GrammarSymbol * state = search.find(top, *this, 0);
			
 
				     if (!state)
			
 
				         return NULL;
			
 
				     return LINK(state);
			
--- a/common/thorhelper/thortalgo.ipp
+++ b/common/thorhelper/thortalgo.ipp
@@ -62,22 +62,27 @@ public:
 
				     NlpInputFormat inputFormat;
			
 
				 };
			
 
				 
			
 
				-class THORHELPER_API TomitaMatchPath : public NlpMatchPath
			
 
				+class TomitaMatchPath;
			
 
				+class TomitaMatchSearchInstance : public NlpMatchSearchInstance
			
 
				 {
			
 
				 public:
			
 
				-    TomitaMatchPath(MemoryBuffer & in) : NlpMatchPath(in) { choices = NULL; }
			
 
				-    TomitaMatchPath(const UnsignedArray & _ids, const UnsignedArray & _indices) : NlpMatchPath(_ids, _indices) { choices = NULL; }
			
 
				+    TomitaMatchSearchInstance() { choices = NULL; }
			
 
				 
			
 
				-    IMatchedElement * getMatch(GrammarSymbol * top, PackedSymbolChoice & choice);
			
 
				+    GrammarSymbol * find(GrammarSymbol * top, const TomitaMatchPath & path, unsigned depth);
			
 
				+    GrammarSymbol * findInChildren(GrammarSymbol * top, const TomitaMatchPath & path, unsigned depth);
			
 
				 
			
 
				-protected:
			
 
				-    GrammarSymbol * find(GrammarSymbol * top, regexid_t id);
			
 
				-    GrammarSymbol * findInChildren(GrammarSymbol * top, regexid_t id);
			
 
				-    
			
 
				-protected:
			
 
				     PackedSymbolChoice * choices;
			
 
				 };
			
 
				 
			
 
				+class THORHELPER_API TomitaMatchPath : public NlpMatchPath
			
 
				+{
			
 
				+public:
			
 
				+    TomitaMatchPath(MemoryBuffer & in) : NlpMatchPath(in) { }
			
 
				+    TomitaMatchPath(const UnsignedArray & _ids, const UnsignedArray & _indices) : NlpMatchPath(_ids, _indices) { }
			
 
				+
			
 
				+    IMatchedElement * getMatch(GrammarSymbol * top, PackedSymbolChoice & choice) const;
			
 
				+};
			
 
				+
			
 
				 
			
 
				 class THORHELPER_API CTomitaMatchedResultInfo : public CMatchedResultInfo
			
 
				 {
			
--- a/ecl/regress/pat19.ecl
+++ b/ecl/regress/pat19.ecl
@@ -0,0 +1,58 @@
 
				+/*##############################################################################
			
 
				+
			
 
				+    Copyright (C) 2011 HPCC Systems.
			
 
				+
			
 
				+    All rights reserved. This program is free software: you can redistribute it and/or modify
			
 
				+    it under the terms of the GNU Affero General Public License as
			
 
				+    published by the Free Software Foundation, either version 3 of the
			
 
				+    License, or (at your option) any later version.
			
 
				+
			
 
				+    This program is distributed in the hope that it will be useful,
			
 
				+    but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+    GNU Affero General Public License for more details.
			
 
				+
			
 
				+    You should have received a copy of the GNU Affero General Public License
			
 
				+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
			
 
				+############################################################################## */
			
 
				+
			
 
				+token word := PATTERN('[a-z0-9]')+;
			
 
				+token a := word;
			
 
				+token b := word;
			
 
				+rule c := a b;
			
 
				+rule d := a b;
			
 
				+rule e := c;
			
 
				+rule f := e d;
			
 
				+rule g := f f;
			
 
				+
			
 
				+//i.e.
			
 
				+//g[f[e[c[a b]] d[a b]] f[e[c[a b]] d[a b]]]
			
 
				+
			
 
				+infile := dataset([
			
 
				+        {'w1 w2 w3 w4 w5 w6 w7 w8'}
			
 
				+        ], { string line });
			
 
				+
			
 
				+
			
 
				+results :=
			
 
				+    record
			
 
				+        '\'' + MATCHTEXT(word) + ':w1\'';
			
 
				+        '\'' + MATCHTEXT(word[2]) + ':w2\'';
			
 
				+        '\'' + MATCHTEXT(word[8]) + ':w8\'';
			
 
				+        '\'' + MATCHTEXT(word[9]) + ':\'';
			
 
				+        '\'' + MATCHTEXT(a/word[1]) + ':w1\'';
			
 
				+        '\'' + MATCHTEXT(a[3]/word[1]) + ':w5\'';
			
 
				+        '\'' + MATCHTEXT(a[3]/word[2]) + ':\'';
			
 
				+        '\'' + MATCHTEXT(a[2]/word) + ':w3\'';
			
 
				+        '\'' + MATCHTEXT(e/a[2]/word) + ':w5\'';
			
 
				+        '\'' + MATCHTEXT(g/f/e/c/a/word) + ':w1\'';
			
 
				+        '\'' + MATCHTEXT(g/f[2]/e/c/a/word) + ':w5\'';
			
 
				+        '\'' + MATCHTEXT(f[1]/e/c/b/word) + ':w2\'';
			
 
				+        '\'' + MATCHTEXT(f[1]/b[2]/word) + ':w4\'';
			
 
				+        '\'' + MATCHTEXT(f[1]/c/b[2]/word) + ':\'';
			
 
				+        '\'' + MATCHTEXT(f[1]/c/b[3]/word) + ':\'';
			
 
				+        '\'' + MATCHTEXT(g/f[1]/b[2]/word) + ':w4\'';
			
 
				+        '\'' + MATCHTEXT(g/f[1]/c/b[3]/word) + ':\'';
			
 
				+        '\'' + MATCHTEXT(a/word[3]) + ':w5\'';
			
 
				+    end;
			
 
				+
			
 
				+output(PARSE(infile,line,g,results,whole,nocase,skip([' ',',',';','\t','.']*)));
			
--- a/ecl/regress/pat5.ecl
+++ b/ecl/regress/pat5.ecl
@@ -39,6 +39,7 @@ infile := dataset([
 
				 
			
 
				 results :=
			
 
				     record
			
 
				+        '\'' + MATCHTEXT(noun/patWord[2]) + '\'';
			
 
				         '\'' + MATCHTEXT(patWord[1]) + '\'';
			
 
				         '\'' + MATCHTEXT(patWord[2]) + '\'';
			
 
				         '\'' + MATCHTEXT(patWord[3]) + '\'';
			
@@ -46,7 +47,6 @@ results :=
 
				         '\'' + MATCHTEXT(noun[2]) + '\'';
			
 
				         '\'' + MATCHTEXT(noun[3]) + '\'';
			
 
				         '\'' + MATCHTEXT(noun/patWord[1]) + '\'';
			
 
				-        '\'' + MATCHTEXT(noun/patWord[2]) + '\'';
			
 
				         '\'' + MATCHTEXT(noun/patWord[3]) + '\'';
			
 
				         '\'' + MATCHTEXT(noun[1]/patWord[1]) + '\'';
			
 
				         '\'' + MATCHTEXT(noun[1]/patWord[2]) + '\'';
			
--- a/system/jlib/jarray.hpp
+++ b/system/jlib/jarray.hpp
@@ -121,7 +121,7 @@ public:
 
				     aindex_t bAdd(MEMBER & newItem, CompareFunc, bool & isNew);
			
 
				     aindex_t bSearch(const MEMBER & key, CompareFunc) const;
			
 
				     aindex_t find(PARAM) const;
			
 
				-    MEMBER *getArray(aindex_t = 0);
			
 
				+    MEMBER *getArray(aindex_t = 0) const;
			
 
				     void sort(CompareFunc);
			
 
				     void swap(aindex_t pos1, aindex_t pos2);
			
 
				 };
			
--- a/system/jlib/jarray.tpp
+++ b/system/jlib/jarray.tpp
@@ -85,7 +85,7 @@ aindex_t BaseArrayOf<MEMBER, PARAM>::bSearch(const MEMBER & key, CompareFunc cf)
 
				 }
			
 
				 
			
 
				 template <class MEMBER, class PARAM>
			
 
				-MEMBER *BaseArrayOf<MEMBER, PARAM>::getArray(aindex_t pos)
			
 
				+MEMBER *BaseArrayOf<MEMBER, PARAM>::getArray(aindex_t pos) const
			
 
				 {
			
 
				    MEMBER * head= (MEMBER *)SELF::_head;
			
 
				    assertex(pos <= SELF::used);
			
--- a/testing/ecl/key/pat19.xml
+++ b/testing/ecl/key/pat19.xml
--- a/testing/ecl/key/tpat19.xml
+++ b/testing/ecl/key/tpat19.xml
--- a/testing/ecl/pat19.ecl
+++ b/testing/ecl/pat19.ecl
@@ -0,0 +1,58 @@
 
				+/*##############################################################################
			
 
				+
			
 
				+    Copyright (C) 2011 HPCC Systems.
			
 
				+
			
 
				+    All rights reserved. This program is free software: you can redistribute it and/or modify
			
 
				+    it under the terms of the GNU Affero General Public License as
			
 
				+    published by the Free Software Foundation, either version 3 of the
			
 
				+    License, or (at your option) any later version.
			
 
				+
			
 
				+    This program is distributed in the hope that it will be useful,
			
 
				+    but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+    GNU Affero General Public License for more details.
			
 
				+
			
 
				+    You should have received a copy of the GNU Affero General Public License
			
 
				+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
			
 
				+############################################################################## */
			
 
				+
			
 
				+token word := PATTERN('[a-z0-9]')+;
			
 
				+token a := word;
			
 
				+token b := word;
			
 
				+rule c := a b;
			
 
				+rule d := a b;
			
 
				+rule e := c;
			
 
				+rule f := e d;
			
 
				+rule g := f f;
			
 
				+
			
 
				+//i.e.
			
 
				+//g[f[e[c[a b]] d[a b]] f[e[c[a b]] d[a b]]]
			
 
				+
			
 
				+infile := dataset([
			
 
				+        {'w1 w2 w3 w4 w5 w6 w7 w8'}
			
 
				+        ], { string line });
			
 
				+
			
 
				+
			
 
				+results :=
			
 
				+    record
			
 
				+        '\'' + MATCHTEXT(word) + ':w1\'';
			
 
				+        '\'' + MATCHTEXT(word[2]) + ':w2\'';
			
 
				+        '\'' + MATCHTEXT(word[8]) + ':w8\'';
			
 
				+        '\'' + MATCHTEXT(word[9]) + ':\'';
			
 
				+        '\'' + MATCHTEXT(a/word[1]) + ':w1\'';
			
 
				+        '\'' + MATCHTEXT(a[3]/word[1]) + ':w5\'';
			
 
				+        '\'' + MATCHTEXT(a[3]/word[2]) + ':\'';
			
 
				+        '\'' + MATCHTEXT(a[2]/word) + ':w3\'';
			
 
				+        '\'' + MATCHTEXT(e/a[2]/word) + ':w5\'';
			
 
				+        '\'' + MATCHTEXT(g/f/e/c/a/word) + ':w1\'';
			
 
				+        '\'' + MATCHTEXT(g/f[2]/e/c/a/word) + ':w5\'';
			
 
				+        '\'' + MATCHTEXT(f[1]/e/c/b/word) + ':w2\'';
			
 
				+        '\'' + MATCHTEXT(f[1]/b[2]/word) + ':w4\'';
			
 
				+        '\'' + MATCHTEXT(f[1]/c/b[2]/word) + ':\'';
			
 
				+        '\'' + MATCHTEXT(f[1]/c/b[3]/word) + ':\'';
			
 
				+        '\'' + MATCHTEXT(g/f[1]/b[2]/word) + ':w4\'';
			
 
				+        '\'' + MATCHTEXT(g/f[1]/c/b[3]/word) + ':\'';
			
 
				+        '\'' + MATCHTEXT(a/word[3]) + ':w5\'';
			
 
				+    end;
			
 
				+
			
 
				+output(PARSE(infile,line,g,results,whole,nocase,skip([' ',',',';','\t','.']*)));
			
--- a/testing/ecl/tpat19.ecl
+++ b/testing/ecl/tpat19.ecl
@@ -0,0 +1,58 @@
 
				+/*##############################################################################
			
 
				+
			
 
				+    Copyright (C) 2011 HPCC Systems.
			
 
				+
			
 
				+    All rights reserved. This program is free software: you can redistribute it and/or modify
			
 
				+    it under the terms of the GNU Affero General Public License as
			
 
				+    published by the Free Software Foundation, either version 3 of the
			
 
				+    License, or (at your option) any later version.
			
 
				+
			
 
				+    This program is distributed in the hope that it will be useful,
			
 
				+    but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+    GNU Affero General Public License for more details.
			
 
				+
			
 
				+    You should have received a copy of the GNU Affero General Public License
			
 
				+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
			
 
				+############################################################################## */
			
 
				+
			
 
				+token word := PATTERN('[a-z0-9]')+;
			
 
				+token a := word;
			
 
				+token b := word;
			
 
				+rule c := a b;
			
 
				+rule d := a b;
			
 
				+rule e := c;
			
 
				+rule f := e d;
			
 
				+rule g := f f;
			
 
				+
			
 
				+//i.e.
			
 
				+//g[f[e[c[a b]] d[a b]] f[e[c[a b]] d[a b]]]
			
 
				+
			
 
				+infile := dataset([
			
 
				+        {'w1 w2 w3 w4 w5 w6 w7 w8'}
			
 
				+        ], { string line });
			
 
				+
			
 
				+
			
 
				+results :=
			
 
				+    record
			
 
				+        '\'' + MATCHTEXT(word) + ':w1\'';
			
 
				+        '\'' + MATCHTEXT(word[2]) + ':w2\'';
			
 
				+        '\'' + MATCHTEXT(word[8]) + ':w8\'';
			
 
				+        '\'' + MATCHTEXT(word[9]) + ':\'';
			
 
				+        '\'' + MATCHTEXT(a/word[1]) + ':w1\'';
			
 
				+        '\'' + MATCHTEXT(a[3]/word[1]) + ':w5\'';
			
 
				+        '\'' + MATCHTEXT(a[3]/word[2]) + ':\'';
			
 
				+        '\'' + MATCHTEXT(a[2]/word) + ':w3\'';
			
 
				+        '\'' + MATCHTEXT(e/a[2]/word) + ':w5\'';
			
 
				+        '\'' + MATCHTEXT(g/f/e/c/a/word) + ':w1\'';
			
 
				+        '\'' + MATCHTEXT(g/f[2]/e/c/a/word) + ':w5\'';
			
 
				+        '\'' + MATCHTEXT(f[1]/e/c/b/word) + ':w2\'';
			
 
				+        '\'' + MATCHTEXT(f[1]/b[2]/word) + ':w4\'';
			
 
				+        '\'' + MATCHTEXT(f[1]/c/b[2]/word) + ':\'';
			
 
				+        '\'' + MATCHTEXT(f[1]/c/b[3]/word) + ':\'';
			
 
				+        '\'' + MATCHTEXT(g/f[1]/b[2]/word) + ':w4\'';
			
 
				+        '\'' + MATCHTEXT(g/f[1]/c/b[3]/word) + ':\'';
			
 
				+        '\'' + MATCHTEXT(a/word[3]) + ':w5\'';
			
 
				+    end;
			
 
				+
			
 
				+output(PARSE(infile,line,g,results,whole,nocase,skip([' ',',',';','\t','.']*), parse));