Bladeren bron

HPCC-25884 Add Plugin for NLP++ Analyzers

Signed-off-by: David de Hilster <david.dehilster@lexisnexisrisk.com>
David de Hilster 4 jaren geleden
bovenliggende
commit
a27ce31abf

+ 3 - 0
.gitmodules

@@ -61,3 +61,6 @@
 [submodule "system/security/plugins/jwtSecurity/jwt-cpp"]
 	path = system/security/plugins/jwtSecurity/jwt-cpp
 	url = https://github.com/hpcc-systems/jwt-cpp.git
+[submodule "nlp/nlp-engine"]
+	path = plugins/nlp/nlp-engine
+	url = https://github.com/hpcc-systems/nlp-engine.git

+ 1 - 0
CMakeLists.txt

@@ -166,6 +166,7 @@ if ( PLUGIN )
     HPCC_ADD_SUBDIRECTORY (plugins/couchbase "COUCHBASEEMBED")
     HPCC_ADD_SUBDIRECTORY (plugins/spark "SPARK")
     HPCC_ADD_SUBDIRECTORY (plugins/h3 "H3")
+    HPCC_ADD_SUBDIRECTORY (plugins/nlp "NLP")
 elseif ( NOT MAKE_DOCS_ONLY )
     HPCC_ADD_SUBDIRECTORY (system)
     HPCC_ADD_SUBDIRECTORY (initfiles)

+ 1 - 0
cmake_modules/commonSetup.cmake

@@ -69,6 +69,7 @@ IF ("${COMMONSETUP_DONE}" STREQUAL "")
   option(USE_CBLAS "Configure use of cblas" ON)
   option(USE_AZURE "Configure use of azure" ON)
   option(USE_H3 "Configure use of Uber H3 geospatial indexing" ON)
+  option(USE_NLP "Configure use of NLP++ engine" ON)
   if (WIN32)
     option(USE_AERON "Include the Aeron message protocol" OFF)
     option(USE_GIT "Configure use of GIT (Hooks)" OFF)

+ 1 - 0
plugins/CMakeLists.txt

@@ -19,6 +19,7 @@ add_subdirectory (dmetaphone)
 add_subdirectory (eclblas)
 add_subdirectory (fileservices)
 add_subdirectory (h3)
+add_subdirectory (nlp)
 add_subdirectory (logging)
 add_subdirectory (parselib)
 add_subdirectory (stringlib)

+ 96 - 0
plugins/nlp/CMakeLists.txt

@@ -0,0 +1,96 @@
+################################################################################
+#    HPCC SYSTEMS software Copyright (C) 2021 HPCC Systems®.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+################################################################################
+
+# Component: nlp
+
+#####################################################
+# Description:
+# ------------
+#    Cmake Input File for nlp
+#####################################################
+
+project(nlp)
+
+if(USE_NLP)
+
+    include_directories(nlp-engine/cs/include)
+    include_directories(nlp-engine/include)
+    include_directories(nlp-engine/include/Api)
+    include_directories(nlp-engine/include/Api/lite)
+
+    add_subdirectory(nlp-engine/cs)
+    add_subdirectory(nlp-engine/src)
+    add_subdirectory(nlp-engine/lite)
+
+    set(SRCS
+        nlp.hpp
+        nlp.cpp
+        nlp_eng.hpp
+        nlp_eng.cpp)
+
+    include_directories(
+        ./../../system/include
+        ./../../rtl/eclrtl
+        ./../../rtl/include
+        ./../../common/deftype
+        ./../../system/jlib
+        ./nlp-engine/include/Api
+        ./nlp-engine)
+
+    add_definitions(-D_USRDLL -DECL_NLP_EXPORTS)
+
+    HPCC_ADD_LIBRARY(nlp SHARED ${SRCS} ${UBER_NLP_SOURCE_FILES})
+    if(${CMAKE_VERSION} VERSION_LESS "2.8.9")
+        message(WARNING "Cannot set NO_SONAME. shlibdeps will give warnings when package is installed")
+    elseif(NOT APPLE)
+        set_target_properties(nlp PROPERTIES NO_SONAME 1)
+    endif()
+
+    target_compile_definitions(nlp PUBLIC NLP_HAVE_ALLOCA)
+
+    install(
+        TARGETS nlp
+        DESTINATION plugins)
+
+    target_link_libraries(nlp
+        eclrtl
+        jlib
+        prim
+        kbm
+        consh
+        words
+        lite
+        ${EXAMPLE_PLUGIN_DEP_LIBRARIES})
+
+    if(PLATFORM OR CLIENTTOOLS_ONLY)
+
+        install(
+            FILES ${CMAKE_CURRENT_SOURCE_DIR}/lib_nlp.ecllib
+            DESTINATION plugins
+            COMPONENT Runtime)
+
+    endif()
+
+    if(PLATFORM)
+
+        install(
+            DIRECTORY ./nlp-engine/analyzers ./nlp-engine/data
+            DESTINATION plugins/nlp/nlp-engine
+            COMPONENT Runtime)
+
+    endif()
+
+endif(USE_NLP)

+ 51 - 0
plugins/nlp/README.md

@@ -0,0 +1,51 @@
+NLP Plugin
+================
+
+This plugin exposes nlp-engine to ECL.  It is a wrapper around VisualText's nlp-engine:
+* [NLP-Engine GitHub](https://github.com/VisualText/nlp-engine)
+* [VisualText open source software website](https://visualtext.org)
+
+
+Installation and Dependencies
+------------------------------
+
+The nlp plugin has a dependency on https://github.com/VisualText/nlp-engine which has been added to the HPCC-Platform repository as a git submodule.  To install:
+```c
+git submodule update --init --recursive
+```
+
+Quick Start
+------------
+
+Import the nlp plugin library to analyze a text into its syntactic parse tree which is returned as an XML string:
+```c
+IMPORT nlp from lib_nlp; 
+
+text01 := 'The quick brown fox jumped over the lazy boy.';
+parsedtext01 := nlp.AnalyzeText('taiparse',text01);
+output(parsedtext01);
+
+text02 := 'TAI has bought the American Medical Records Processing for more than $130 million dollars.';
+parsedtext02 := nlp.AnalyzeText('corporate',text02);
+output(parsedtext02);
+
+text03 := 'Right middle lobe consolidation compatible with acute pneumonitis.';
+parsedtext03 := nlp.AnalyzeText('taiparse',text03);
+output(parsedtext03);
+
+text04 := 'TAI\'s stock is up 4% from $58.33 a share to $60.66.';
+parsedtext04 := nlp.AnalyzeText('corporate',text04);
+output(parsedtext04);
+```
+
+### Analyzer Functions
+
+#### AnalyzeText
+
+```c
+STRING AnalyzeText(CONST VARSTRING analyzerName, CONST VARSTRING textToAnalyze)
+```
+
+Runs the analyzer on the passed text and returns and XML string from the analyzer. The first time an analyzer is called, it is initialized and subsequent calls to that analyzer will run with the analyzer already in memory.
+
+Returns the text that is from the output in the specified NLP++ analyzer.

+ 20 - 0
plugins/nlp/lib_nlp.ecllib

@@ -0,0 +1,20 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2021 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the License);
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an AS IS BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+EXPORT nlp := SERVICE : plugin('nlp'), namespace('nlp'), library('nlp'), CPP, PURE
+  string AnalyzeText(const string analyzer, const string txt) : cpp,pure,entrypoint='AnalyzeText';
+END;

+ 1 - 0
plugins/nlp/nlp-engine

@@ -0,0 +1 @@
+Subproject commit 9b58e09e194e0f0d14543933b3d7d1e39a841a43

+ 71 - 0
plugins/nlp/nlp.cpp

@@ -0,0 +1,71 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2021 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+#include "platform.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <assert.h>
+
+#include "eclrtl.hpp"
+#include "nlp.hpp"
+
+#define NLP_VERSION "nlp plugin 1.0.0"
+
+ECL_NLP_API bool getECLPluginDefinition(ECLPluginDefinitionBlock *pb)
+{
+    if (pb->size != sizeof(ECLPluginDefinitionBlock))
+        return false;
+
+    pb->magicVersion = PLUGIN_VERSION;
+    pb->version = NLP_VERSION;
+    pb->moduleName = "lib_nlp";
+    pb->ECL = NULL;
+    pb->flags = PLUGIN_IMPLICIT_MODULE;
+    pb->description = "ECL plugin library for nlp\n";
+    return true;
+}
+
+#include <fstream>
+
+namespace nlp {
+
+    IPluginContext * parentCtx = NULL;
+    static CriticalSection cs; 
+    static NLPEng *nlpEng = NULL;
+
+    ECL_NLP_API void setPluginContext(IPluginContext * _ctx) { parentCtx = _ctx; }
+
+    //--------------------------------------------------------------------------------
+    //                           ECL SERVICE ENTRYPOINTS
+    //--------------------------------------------------------------------------------
+
+    ECL_NLP_API void ECL_NLP_CALL AnalyzeText(size32_t & tgtLen, char * & tgt, size32_t anaLen, const char * ana, size32_t txtLen, const char * txt)
+    {
+        {
+            CriticalBlock block(cs);
+            if (nlpEng == NULL) {
+                nlpEng = new NLPEng();
+            }
+        }
+        ostringstream sso;
+        tgtLen = nlpEng->nlpEngAnalyze(ana,txt,sso);
+        tgt = (char *) CTXMALLOC(parentCtx, tgtLen);
+        memcpy_iflen(tgt, sso.str().c_str(), tgtLen);
+    }
+} // namespace nlp

+ 38 - 0
plugins/nlp/nlp.hpp

@@ -0,0 +1,38 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2021 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+#ifndef NLP_INCL
+#define NLP_INCL
+
+#ifdef _WIN32
+#define ECL_NLP_CALL _cdecl
+#else
+#define ECL_NLP_CALL
+#endif
+
+#ifdef ECL_NLP_EXPORTS
+#define ECL_NLP_API DECL_EXPORT
+#else
+#define ECL_NLP_API DECL_IMPORT
+#endif
+
+#include "hqlplugins.hpp"
+#include "eclhelper.hpp"
+#include "nlp_eng.hpp"
+#include <sstream>
+
+#endif

+ 90 - 0
plugins/nlp/nlp_eng.cpp

@@ -0,0 +1,90 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2021 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+// NLP-ENGINE requires LINUX to be set for linux compiles.
+// The NLP-ENGINE needs to be updated to __linux__.
+#ifdef __linux__
+#define LINUX 1
+#endif
+
+#include "nlp_engine.h"
+#include "nlp_eng.hpp"
+
+static NLP_ENGINE *nlpEngine = NULL;
+static CriticalSection csNLP; 
+
+NLPEng::NLPEng() {}
+
+NLPEng::~NLPEng() {}
+
+#include <sstream>
+
+int NLPEng::nlpEngAnalyze(const char *analyzerName, const char *inputText, ostringstream &sso)
+{
+    string w = queryCurrentProcessPath();
+    
+#ifdef NLP_DEBUG
+    ofstream handle;
+    handle.open(NLP_DEBUG_FILE, std::ofstream::out | std::ofstream::app);
+    handle << "[queryCurrentProcessPath: " << w << "]" << endl;
+#endif
+
+    // This is where the nlp analyzers reside that can be called by the user.
+    // The path logic here is hacked for now for the first version given that
+    // how these files will get to the server has yet to be determined.
+    size_t pos = w.find_last_of("/");
+    size_t pos2 = w.find_last_of("/",pos-1);
+    string parent = w.substr(0,pos2);
+    VStringBuffer workingFolder("%s/%s",parent.c_str(),"plugins/nlp/nlp-engine");
+
+#ifdef NLP_DEBUG
+    handle << "[parent: " << parent << "]" << endl;
+    handle << "[workingFolder nlp: " << workingFolder.str() << "]" << endl;
+    handle.close();
+#endif
+
+    {
+        CriticalBlock block(csNLP);
+        if (nlpEngine == NULL) {
+            nlpEngine = new NLP_ENGINE(workingFolder.str());
+        }
+    }
+
+    istrstream ssi(inputText);
+
+#ifdef NLP_DEBUG
+    clock_t s_time, e_time;
+    s_time = clock();
+#endif
+
+    nlpEngine->analyze((char *)analyzerName,&ssi,&sso);
+
+#ifdef NLP_DEBUG
+    e_time = clock();
+    handle.open(NLP_DEBUG_FILE, std::ofstream::out | std::ofstream::app);
+    handle << "===============================================" << endl;
+    handle << "[Analyzer: " << analyzerName << "]" << endl;
+    handle << "[Text: " << inputText << "]" << endl;
+    handle << "[Exec analyzer time="
+           << (double) (e_time - s_time)/CLOCKS_PER_SEC
+           << " sec]" << endl;
+    handle << sso.str() << endl;
+    handle.close();
+#endif
+    sso.seekp(0, ios_base::end);
+    return sso.tellp();
+}

+ 38 - 0
plugins/nlp/nlp_eng.hpp

@@ -0,0 +1,38 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2021 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+//#define NLP_DEBUG
+
+#ifdef NLP_DEBUG
+#include <iostream>
+#define NLP_DEBUG_FILE "/tmp/nlp_debug.txt"
+#endif
+
+#include<sstream>
+#include "jutil.hpp"
+#include "jmutex.hpp"
+using namespace std;
+
+class NLPEng
+{
+private:
+    string workingFolder;
+public:
+    NLPEng();
+    ~NLPEng();
+    int nlpEngAnalyze(const char *analyzerName, const char *inputText, ostringstream &sso);
+};

+ 428 - 0
testing/regress/ecl/key/nlppp.xml

@@ -0,0 +1,428 @@
+<Dataset name='Result 1'>
+ <Row><Result_1>&lt;?xml version=&quot;1.0&quot; encoding=&quot;ISO-8859-1&quot;?&gt;
+&lt;vertice&gt;
+  &lt;id&gt;1&lt;/id&gt;
+  &lt;label&gt;_TEXTZONE&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;vertice&gt;
+  &lt;id&gt;2&lt;/id&gt;
+  &lt;label&gt;_sent&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;1&lt;/source&gt;
+  &lt;target&gt;2&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice voice=&quot;active&quot;&gt;
+  &lt;id&gt;3&lt;/id&gt;
+  &lt;label&gt;_clause&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;2&lt;/source&gt;
+  &lt;target&gt;3&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;fox&quot;&gt;
+  &lt;id&gt;4&lt;/id&gt;
+  &lt;label&gt;_np&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;3&lt;/source&gt;
+  &lt;target&gt;4&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;the&quot;&gt;
+  &lt;id&gt;5&lt;/id&gt;
+  &lt;label&gt;_det&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;4&lt;/source&gt;
+  &lt;target&gt;5&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice&gt;
+  &lt;id&gt;6&lt;/id&gt;
+  &lt;label&gt;The&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;5&lt;/source&gt;
+  &lt;target&gt;6&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;quick&quot;&gt;
+  &lt;id&gt;7&lt;/id&gt;
+  &lt;label&gt;_adj&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;4&lt;/source&gt;
+  &lt;target&gt;7&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;quick&quot;&gt;
+  &lt;id&gt;8&lt;/id&gt;
+  &lt;label&gt;quick&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;7&lt;/source&gt;
+  &lt;target&gt;8&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;brown&quot;&gt;
+  &lt;id&gt;9&lt;/id&gt;
+  &lt;label&gt;_adj&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;4&lt;/source&gt;
+  &lt;target&gt;9&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;brown&quot;&gt;
+  &lt;id&gt;10&lt;/id&gt;
+  &lt;label&gt;brown&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;9&lt;/source&gt;
+  &lt;target&gt;10&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;fox&quot;&gt;
+  &lt;id&gt;11&lt;/id&gt;
+  &lt;label&gt;_noun&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;4&lt;/source&gt;
+  &lt;target&gt;11&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;fox&quot;&gt;
+  &lt;id&gt;12&lt;/id&gt;
+  &lt;label&gt;fox&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;11&lt;/source&gt;
+  &lt;target&gt;12&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;jump&quot; voice=&quot;active&quot; tense=&quot;past&quot;&gt;
+  &lt;id&gt;13&lt;/id&gt;
+  &lt;label&gt;_vg&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;3&lt;/source&gt;
+  &lt;target&gt;13&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;jump&quot;&gt;
+  &lt;id&gt;14&lt;/id&gt;
+  &lt;label&gt;_verb&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;13&lt;/source&gt;
+  &lt;target&gt;14&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;jump&quot;&gt;
+  &lt;id&gt;15&lt;/id&gt;
+  &lt;label&gt;jumped&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;14&lt;/source&gt;
+  &lt;target&gt;15&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;boy&quot;&gt;
+  &lt;id&gt;16&lt;/id&gt;
+  &lt;label&gt;_advl&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;3&lt;/source&gt;
+  &lt;target&gt;16&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;over&quot;&gt;
+  &lt;id&gt;17&lt;/id&gt;
+  &lt;label&gt;_prep&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;16&lt;/source&gt;
+  &lt;target&gt;17&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice&gt;
+  &lt;id&gt;18&lt;/id&gt;
+  &lt;label&gt;over&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;17&lt;/source&gt;
+  &lt;target&gt;18&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;boy&quot;&gt;
+  &lt;id&gt;19&lt;/id&gt;
+  &lt;label&gt;_np&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;16&lt;/source&gt;
+  &lt;target&gt;19&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;the&quot;&gt;
+  &lt;id&gt;20&lt;/id&gt;
+  &lt;label&gt;_det&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;19&lt;/source&gt;
+  &lt;target&gt;20&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice&gt;
+  &lt;id&gt;21&lt;/id&gt;
+  &lt;label&gt;the&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;20&lt;/source&gt;
+  &lt;target&gt;21&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;lazy&quot;&gt;
+  &lt;id&gt;22&lt;/id&gt;
+  &lt;label&gt;_adj&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;19&lt;/source&gt;
+  &lt;target&gt;22&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;lazy&quot;&gt;
+  &lt;id&gt;23&lt;/id&gt;
+  &lt;label&gt;lazy&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;22&lt;/source&gt;
+  &lt;target&gt;23&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;boy&quot;&gt;
+  &lt;id&gt;24&lt;/id&gt;
+  &lt;label&gt;_noun&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;19&lt;/source&gt;
+  &lt;target&gt;24&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;boy&quot;&gt;
+  &lt;id&gt;25&lt;/id&gt;
+  &lt;label&gt;boy&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;24&lt;/source&gt;
+  &lt;target&gt;25&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice&gt;
+  &lt;id&gt;26&lt;/id&gt;
+  &lt;label&gt;_qEOS&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;1&lt;/source&gt;
+  &lt;target&gt;26&lt;/target&gt;
+&lt;/edge&gt;
+</Result_1></Row>
+</Dataset>
+<Dataset name='Result 2'>
+ <Row><Result_2>-------------------------------------------------------------
+&quot;TAI has bought the American Medical Records Processing for
+more than $130 million dollars&quot;
+Action: buy
+   company1: (name) TAI
+   company2: (name) American Medical Records Processing
+   amount: (&gt;) 130000000
+</Result_2></Row>
+</Dataset>
+<Dataset name='Result 3'>
+ <Row><Result_3>&lt;?xml version=&quot;1.0&quot; encoding=&quot;ISO-8859-1&quot;?&gt;
+&lt;vertice&gt;
+  &lt;id&gt;1&lt;/id&gt;
+  &lt;label&gt;_TEXTZONE&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;vertice&gt;
+  &lt;id&gt;2&lt;/id&gt;
+  &lt;label&gt;_sent&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;1&lt;/source&gt;
+  &lt;target&gt;2&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice voice=&quot;active&quot;&gt;
+  &lt;id&gt;3&lt;/id&gt;
+  &lt;label&gt;_clause&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;2&lt;/source&gt;
+  &lt;target&gt;3&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;consolidation&quot;&gt;
+  &lt;id&gt;4&lt;/id&gt;
+  &lt;label&gt;_np&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;3&lt;/source&gt;
+  &lt;target&gt;4&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;right&quot;&gt;
+  &lt;id&gt;5&lt;/id&gt;
+  &lt;label&gt;_adj&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;4&lt;/source&gt;
+  &lt;target&gt;5&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;right&quot;&gt;
+  &lt;id&gt;6&lt;/id&gt;
+  &lt;label&gt;Right&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;5&lt;/source&gt;
+  &lt;target&gt;6&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;middle&quot;&gt;
+  &lt;id&gt;7&lt;/id&gt;
+  &lt;label&gt;_noun&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;4&lt;/source&gt;
+  &lt;target&gt;7&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;middle&quot;&gt;
+  &lt;id&gt;8&lt;/id&gt;
+  &lt;label&gt;middle&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;7&lt;/source&gt;
+  &lt;target&gt;8&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;lobe&quot;&gt;
+  &lt;id&gt;9&lt;/id&gt;
+  &lt;label&gt;_noun&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;4&lt;/source&gt;
+  &lt;target&gt;9&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;lobe&quot;&gt;
+  &lt;id&gt;10&lt;/id&gt;
+  &lt;label&gt;lobe&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;9&lt;/source&gt;
+  &lt;target&gt;10&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;consolidation&quot;&gt;
+  &lt;id&gt;11&lt;/id&gt;
+  &lt;label&gt;_noun&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;4&lt;/source&gt;
+  &lt;target&gt;11&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;consolidation&quot;&gt;
+  &lt;id&gt;12&lt;/id&gt;
+  &lt;label&gt;consolidation&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;11&lt;/source&gt;
+  &lt;target&gt;12&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;compatible&quot;&gt;
+  &lt;id&gt;13&lt;/id&gt;
+  &lt;label&gt;_adjc&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;3&lt;/source&gt;
+  &lt;target&gt;13&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;compatible&quot;&gt;
+  &lt;id&gt;14&lt;/id&gt;
+  &lt;label&gt;_adj&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;13&lt;/source&gt;
+  &lt;target&gt;14&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;compatible&quot;&gt;
+  &lt;id&gt;15&lt;/id&gt;
+  &lt;label&gt;compatible&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;14&lt;/source&gt;
+  &lt;target&gt;15&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice&gt;
+  &lt;id&gt;16&lt;/id&gt;
+  &lt;label&gt;_clause&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;2&lt;/source&gt;
+  &lt;target&gt;16&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;pneumonitis&quot;&gt;
+  &lt;id&gt;17&lt;/id&gt;
+  &lt;label&gt;_advl&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;16&lt;/source&gt;
+  &lt;target&gt;17&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;with&quot;&gt;
+  &lt;id&gt;18&lt;/id&gt;
+  &lt;label&gt;_prep&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;17&lt;/source&gt;
+  &lt;target&gt;18&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice&gt;
+  &lt;id&gt;19&lt;/id&gt;
+  &lt;label&gt;with&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;18&lt;/source&gt;
+  &lt;target&gt;19&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;pneumonitis&quot;&gt;
+  &lt;id&gt;20&lt;/id&gt;
+  &lt;label&gt;_np&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;17&lt;/source&gt;
+  &lt;target&gt;20&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;acute&quot;&gt;
+  &lt;id&gt;21&lt;/id&gt;
+  &lt;label&gt;_adj&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;20&lt;/source&gt;
+  &lt;target&gt;21&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;acute&quot;&gt;
+  &lt;id&gt;22&lt;/id&gt;
+  &lt;label&gt;acute&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;21&lt;/source&gt;
+  &lt;target&gt;22&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;pneumonitis&quot;&gt;
+  &lt;id&gt;23&lt;/id&gt;
+  &lt;label&gt;_noun&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;20&lt;/source&gt;
+  &lt;target&gt;23&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice stem=&quot;pneumonitis&quot;&gt;
+  &lt;id&gt;24&lt;/id&gt;
+  &lt;label&gt;pneumonitis&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;23&lt;/source&gt;
+  &lt;target&gt;24&lt;/target&gt;
+&lt;/edge&gt;
+&lt;vertice&gt;
+  &lt;id&gt;25&lt;/id&gt;
+  &lt;label&gt;_qEOS&lt;/label&gt;
+&lt;/vertice&gt;
+&lt;edge&gt;
+  &lt;source&gt;1&lt;/source&gt;
+  &lt;target&gt;25&lt;/target&gt;
+&lt;/edge&gt;
+</Result_3></Row>
+</Dataset>
+<Dataset name='Result 4'>
+ <Row><Result_4>-------------------------------------------------------------
+&quot;TAI&apos;s stock is up 4% from $58.33 a share to $60.66&quot;
+Company: TAI
+   stock: (action) up
+   stock: (percent) 4%
+   stock: (from) 58
+   stock: (to) 60
+</Result_4></Row>
+</Dataset>

+ 18 - 0
testing/regress/ecl/nlppp.ecl

@@ -0,0 +1,18 @@
+IMPORT lib_nlp;
+nlp := lib_nlp.nlp;
+
+text01 := 'The quick brown fox jumped over the lazy boy.';
+parsedtext01 := nlp.AnalyzeText('parse_en-us',text01);
+output(parsedtext01);
+
+text02 := 'TAI has bought the American Medical Records Processing for more than $130 million dollars.';
+parsedtext02 := nlp.AnalyzeText('corporate',text02);
+output(parsedtext02);
+
+text03 := 'Right middle lobe consolidation compatible with acute pneumonitis.';
+parsedtext03 := nlp.AnalyzeText('parse_en-us',text03);
+output(parsedtext03);
+
+text04 := 'TAI\'s stock is up 4% from $58.33 a share to $60.66.';
+parsedtext04 := nlp.AnalyzeText('corporate',text04);
+output(parsedtext04);