瀏覽代碼

HPCC-8030 Java/Python/Javascript language support in ECL

Added plugin to support Python.

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 12 年之前
父節點
當前提交
e3a271df43

+ 1 - 0
plugins/CMakeLists.txt

@@ -26,3 +26,4 @@ if ("${BUILD_LEVEL}" STREQUAL "COMMUNITY")
 endif ()
 
 add_subdirectory (v8embed)
+add_subdirectory (pyembed)

+ 60 - 0
plugins/pyembed/CMakeLists.txt

@@ -0,0 +1,60 @@
+################################################################################
+#    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+################################################################################
+
+
+# Component: pyembed
+
+#####################################################
+# Description:
+# ------------
+#    Cmake Input File for pyembed
+#####################################################
+
+set ( toolsdir "${HPCC_SOURCE_DIR}/tools" )
+set ( debug_python Off )   # A lot slower but can assist in debugging...
+
+project( pyembed )
+
+set (    SRCS
+         pyembed.cpp
+    )
+
+include_directories (
+         /usr/include/python2.7
+         ./../../system/include
+         ./../../rtl/eclrtl
+         ./../../rtl/include
+         ./../../system/jlib
+    )
+
+ADD_DEFINITIONS( -D_USRDLL -DPYEMBED_EXPORTS )
+if (debug_python)
+  ADD_DEFINITIONS(-DPy_DEBUG)
+endif()
+
+HPCC_ADD_LIBRARY( pyembed SHARED ${SRCS} )
+install ( TARGETS pyembed DESTINATION plugins )
+
+if (debug_python)
+  target_link_libraries ( pyembed python2.7_d )
+else()
+  target_link_libraries ( pyembed python2.7 )
+endif()
+
+target_link_libraries ( pyembed
+    eclrtl
+    jlib
+    )

+ 331 - 0
plugins/pyembed/pyembed.cpp

@@ -0,0 +1,331 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+#include "platform.h"
+#include "Python.h"
+#include "eclrtl.hpp"
+#include "jexcept.hpp"
+#include "jthread.hpp"
+#include "hqlplugins.hpp"
+
+#ifdef _WIN32
+#define EXPORT __declspec(dllexport)
+#else
+#define EXPORT
+#endif
+
+static const char * compatibleVersions[] = {
+    "Python2.7 Embed Helper 1.0.0",
+    NULL };
+
+static const char *version = "Python2.7 Embed Helper 1.0.0";
+
+static const char * EclDefinition =
+    "EXPORT Language := SERVICE\n"
+    "  boolean getEmbedContext():cpp,pure,namespace='pyembed',entrypoint='getEmbedContext',prototype='IEmbedContext* getEmbedContext()';\n"
+    "  boolean syntaxCheck(const varstring src):cpp,pure,namespace='pyembed',entrypoint='syntaxCheck';\n"
+    "END;"
+    "export getEmbedContext := Language.getEmbedContext;"
+    "export syntaxCheck := Language.syntaxCheck;";
+
+extern "C" EXPORT bool getECLPluginDefinition(ECLPluginDefinitionBlock *pb)
+{
+    if (pb->size == sizeof(ECLPluginDefinitionBlockEx))
+    {
+        ECLPluginDefinitionBlockEx * pbx = (ECLPluginDefinitionBlockEx *) pb;
+        pbx->compatibleVersions = compatibleVersions;
+    }
+    else if (pb->size != sizeof(ECLPluginDefinitionBlock))
+        return false;
+    pb->magicVersion = PLUGIN_VERSION;
+    pb->version = version;
+    pb->moduleName = "python";
+    pb->ECL = EclDefinition;
+    pb->flags = PLUGIN_DLL_MODULE | PLUGIN_MULTIPLE_VERSIONS;
+    pb->description = "Python2.7 Embed Helper";
+    return true;
+}
+
+namespace pyembed {
+
+// Use class OwnedPyObject for any objects that are not 'borrowed references'
+// so that the appropriate Py_DECREF call is made when the OwnedPyObject goes
+// out of scope, even if the function returns prematurely (such as via an exception).
+// In particular, checkPythonError is a lot easier to call safely if this is used.
+
+class OwnedPyObject
+{
+    PyObject *ptr;
+public:
+    inline OwnedPyObject() : ptr(NULL)     {}
+    inline OwnedPyObject(PyObject *_ptr) : ptr(_ptr) {}
+    inline ~OwnedPyObject()                { if (ptr) Py_DECREF(ptr); }
+    inline PyObject * get() const           { return ptr; }
+    inline PyObject * operator -> () const { return ptr; }
+    inline operator PyObject *() const    { return ptr; }
+    inline void clear()                     { if (ptr) Py_DECREF(ptr); ptr = NULL; }
+    inline void setown(PyObject *_ptr)      { clear(); ptr = _ptr; }
+};
+
+// call checkPythonError to throw an exception if Python error state is set
+
+static void checkPythonError()
+{
+    PyObject* err = PyErr_Occurred();
+    if (err)
+    {
+        OwnedPyObject errStr = PyObject_Str(err);
+        PyErr_Clear();
+        rtlFail(0, PyString_AsString(errStr));
+    }
+}
+
+// The Python Global Interpreter Lock (GIL) won't know about C++-created threads, so we need to
+// call PyGILState_Ensure() and PyGILState_Release at the start and end of every function.
+// Wrapping them in a class like this ensures that the release always happens even if
+// the function exists prematurely
+
+class GILstateWrapper
+{
+    PyGILState_STATE gstate;
+public:
+    GILstateWrapper()
+    {
+        gstate = PyGILState_Ensure();
+    }
+    ~GILstateWrapper()
+    {
+        PyGILState_Release(gstate);
+    }
+};
+
+// There is a singleton PythonThreadContext per thread. This allows us to
+// ensure that we can make repeated calls to a Python function efficiently.
+
+class PythonThreadContext
+{
+public:
+    PyThreadState *threadState;
+public:
+    PythonThreadContext()
+    {
+        threadState = PyEval_SaveThread();
+        locals.setown(PyDict_New());
+        globals.setown(PyDict_New());
+    }
+    ~PythonThreadContext()
+    {
+        locals.clear();
+        globals.clear();
+        script.clear();
+        result.clear();
+        PyEval_RestoreThread(threadState);
+    }
+
+    inline void bindRealParam(const char *name, double val)
+    {
+        OwnedPyObject vval = PyFloat_FromDouble(val);
+        PyDict_SetItemString(locals, name, vval);
+    }
+    inline void bindSignedParam(const char *name, __int64 val)
+    {
+        OwnedPyObject vval = PyLong_FromLongLong(val);
+        PyDict_SetItemString(locals, name, vval);
+    }
+    inline void bindUnsignedParam(const char *name, unsigned __int64 val)
+    {
+        OwnedPyObject vval = PyLong_FromUnsignedLongLong(val);
+        PyDict_SetItemString(locals, name, vval);
+    }
+    inline void bindStringParam(const char *name, size32_t len, const char *val)
+    {
+        OwnedPyObject vval = PyString_FromStringAndSize(val, len);
+        PyDict_SetItemString(locals, name, vval);
+    }
+    inline void bindVStringParam(const char *name, const char *val)
+    {
+        OwnedPyObject vval = PyString_FromString(val);
+        PyDict_SetItemString(locals, name, vval);
+    }
+
+    inline double getRealResult()
+    {
+        assertex(result);
+        return (__int64) PyFloat_AsDouble(result);
+    }
+    inline __int64 getSignedResult()
+    {
+        assertex(result);
+        return (__int64) PyLong_AsLongLong(result);
+    }
+    inline unsigned __int64 getUnsignedResult()
+    {
+        return (__int64) PyLong_AsUnsignedLongLong(result);
+    }
+    inline void getStringResult(size32_t &__len, char * &__result)
+    {
+        assertex(result);
+        __len = PyString_Size(result);
+        const char * chars =  PyString_AsString(result);
+        checkPythonError();
+        __result = (char *)rtlMalloc(__len);
+        memcpy(__result, chars, __len);
+    }
+
+    inline void compileEmbeddedScript(const char *text)
+    {
+        if (!prevtext || strcmp(text, prevtext) != 0)
+        {
+            prevtext.clear();
+            script.setown(Py_CompileString(text, "", Py_eval_input));
+            checkPythonError();
+            prevtext.set(text);
+        }
+
+    }
+    inline void callFunction()
+    {
+        checkPythonError();
+        result.setown(PyEval_EvalCode((PyCodeObject *) script.get(), locals, globals));
+        checkPythonError();
+    }
+private:
+    GILstateWrapper GILState;
+    OwnedPyObject locals;
+    OwnedPyObject globals;
+    OwnedPyObject script;
+    OwnedPyObject result;
+    StringAttr prevtext;
+};
+
+// Each call to a Python function will use a new Python27EmbedFunctionContext object
+// This takes care of ensuring that the Python GIL is locked while we are executing python code,
+// and released when we are not
+
+class Python27EmbedFunctionContext : public CInterfaceOf<IEmbedFunctionContext>
+{
+public:
+    Python27EmbedFunctionContext(PythonThreadContext *_sharedCtx)
+    : sharedCtx(_sharedCtx)
+    {
+        PyEval_RestoreThread(sharedCtx->threadState);
+    }
+    ~Python27EmbedFunctionContext()
+    {
+        sharedCtx->threadState = PyEval_SaveThread();
+    }
+    virtual void bindRealParam(const char *name, double val)
+    {
+        return sharedCtx->bindRealParam(name, val);
+    }
+    virtual void bindSignedParam(const char *name, __int64 val)
+    {
+        return sharedCtx->bindSignedParam(name, val);
+    }
+    virtual void bindUnsignedParam(const char *name, unsigned __int64 val)
+    {
+        return sharedCtx->bindUnsignedParam(name, val);
+    }
+    virtual void bindStringParam(const char *name, size32_t len, const char *val)
+    {
+        return sharedCtx->bindStringParam(name, len, val);
+    }
+    virtual void bindVStringParam(const char *name, const char *val)
+    {
+        return sharedCtx->bindVStringParam(name, val);
+    }
+
+    virtual double getRealResult()
+    {
+        return sharedCtx->getRealResult();
+    }
+    virtual __int64 getSignedResult()
+    {
+        return sharedCtx->getSignedResult();
+    }
+    virtual unsigned __int64 getUnsignedResult()
+    {
+        return sharedCtx->getUnsignedResult();
+    }
+    virtual void getStringResult(size32_t &__len, char * &__result)
+    {
+        sharedCtx->getStringResult(__len, __result);
+    }
+
+    virtual void compileEmbeddedScript(const char *text)
+    {
+        sharedCtx->compileEmbeddedScript(text);
+    }
+    virtual void callFunction()
+    {
+        sharedCtx->callFunction();
+    }
+private:
+    PythonThreadContext *sharedCtx;
+};
+
+__thread PythonThreadContext* threadContext;  // We reuse per thread, for speed
+__thread ThreadTermFunc threadHookChain;
+
+static void releaseContext()
+{
+    delete threadContext;
+    if (threadHookChain)
+        (*threadHookChain)();
+}
+
+static class Python27EmbedContext : public CInterfaceOf<IEmbedContext>
+{
+public:
+    Python27EmbedContext()
+    {
+        // Initialize the Python Interpreter
+        Py_Initialize();
+        PyEval_InitThreads();
+        tstate = PyEval_SaveThread();
+        Link();  // Deliberately 'leak' in order to avoid freeing this global object prematurely
+    }
+    ~Python27EmbedContext()
+    {
+        PyEval_RestoreThread(tstate);
+        // Finish the Python Interpreter
+        Py_Finalize();
+    }
+    virtual IEmbedFunctionContext *createFunctionContext()
+    {
+        if (!threadContext)
+        {
+            threadContext = new PythonThreadContext;
+            threadHookChain = addThreadTermFunc(releaseContext);
+        }
+        return new Python27EmbedFunctionContext(threadContext);
+    }
+protected:
+    PyThreadState *tstate;
+} theEmbedContext;
+
+extern IEmbedContext* getEmbedContext()
+{
+    return LINK(&theEmbedContext);
+}
+
+extern bool syntaxCheck(const char *script)
+{
+    return true; // MORE
+}
+
+} // namespace

+ 3 - 3
plugins/v8embed/v8embed.cpp

@@ -29,10 +29,10 @@
 #endif
 
 static const char * compatibleVersions[] = {
-    "JAVASCRIPT 1.0.0",
+    "V8 JavaScript Embed Helper 1.0.0",
     NULL };
 
-static const char *version = "V8JavaScriptHelper 1.1.14";
+static const char *version = "V8 JavaScript Embed Helper 1.0.0";
 static const char * EclDefinition =
     "EXPORT Language := SERVICE\n"
     "  boolean getEmbedContext():cpp,pure,namespace='javascriptLanguageHelper',entrypoint='getEmbedContext',prototype='IEmbedContext* getEmbedContext()';\n"
@@ -55,7 +55,7 @@ extern "C" EXPORT bool getECLPluginDefinition(ECLPluginDefinitionBlock *pb)
     pb->moduleName = "javascript";
     pb->ECL = EclDefinition;
     pb->flags = PLUGIN_DLL_MODULE | PLUGIN_MULTIPLE_VERSIONS;
-    pb->description = "V8 Javascript language helper";
+    pb->description = "V8 JavaScript Embed Helper";
     return true;
 }
 

+ 4 - 4
testing/ecl/embedjs.ecl

@@ -12,16 +12,16 @@ add3('World');
 
 s1 :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := add1(COUNTER)));
 s2 :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := add1(COUNTER/2)));
-SUM(NOFOLD(s1 & s2), a);
+SUM(NOFOLD(s1 + s2), a);
 
 s1a :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := (integer) add2((STRING)COUNTER)));
 s2a :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := (integer) add3((STRING)(COUNTER/2))));
-SUM(NOFOLD(s1a & s2a), a);
+SUM(NOFOLD(s1a + s2a), a);
 
 s1b :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := COUNTER+1));
 s2b :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := (COUNTER/2)+1));
-SUM(NOFOLD(s1b & s2b), a);
+SUM(NOFOLD(s1b + s2b), a);
 
 s1c :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := (integer) ((STRING) COUNTER + '1')));
 s2c :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := (integer) ((STRING)(COUNTER/2) + '1')));
-SUM(NOFOLD(s1c & s2c), a);
+SUM(NOFOLD(s1c + s2c), a);

+ 12 - 7
testing/ecl/embedp.ecl

@@ -1,12 +1,17 @@
-python := module
-  export a := 1;
-end;
+IMPORT python;
 
-integer a(integer val) := EMBED(python) return val+1; ENDEMBED;
-string a2(string val) := EMBED(python) return val+'1'; ENDEMBED;
-string a3(varstring val) := EMBED(python) return val+'1'; ENDEMBED;
+integer a(integer val) := EMBED(python)
+val+1
+ENDEMBED;
+string a2(string val) := EMBED(python)
+val+'1'
+ENDEMBED;
 
-integer b(integer val) := EMBED(python, 'return val-2;');
+string a3(varstring val) := EMBED(python)
+val+'1'
+ENDEMBED;
+
+integer b(integer val) := EMBED(python, 'val-2');
 
 a(10);
 a2('Hello');

+ 34 - 0
testing/ecl/embedpy.ecl

@@ -0,0 +1,34 @@
+IMPORT Python;
+
+Python.Language.syntaxcheck('1+2');
+
+integer add1(integer val) := EMBED(Python)
+val+1
+ENDEMBED;
+string add2(string val) := EMBED(Python)
+val+'1'
+ENDEMBED;
+
+string add3(varstring val) := EMBED(Python)
+val+'1'
+ENDEMBED;
+
+add1(10);
+add2('Hello');
+add3('World');
+
+s1 :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := add1(COUNTER)));
+s2 :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := add1(COUNTER/2)));
+ SUM(NOFOLD(s1 + s2), a);
+
+s1a :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := (integer) add2((STRING)COUNTER)));
+s2a :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := (integer) add3((STRING)(COUNTER/2))));
+ SUM(NOFOLD(s1a + s2a), a);
+
+s1b :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := COUNTER+1));
+s2b :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := (COUNTER/2)+1));
+ SUM(NOFOLD(s1b + s2b), a);
+
+s1c :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := (integer) ((STRING) COUNTER + '1')));
+s2c :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := (integer) ((STRING)(COUNTER/2) + '1')));
+ SUM(NOFOLD(s1c + s2c), a);

+ 21 - 0
testing/ecl/key/embedpy.xml

@@ -0,0 +1,21 @@
+<Dataset name='Result 1'>
+ <Row><Result_1>11</Result_1></Row>
+</Dataset>
+<Dataset name='Result 2'>
+ <Row><Result_2>Hello1</Result_2></Row>
+</Dataset>
+<Dataset name='Result 3'>
+ <Row><Result_3>World1</Result_3></Row>
+</Dataset>
+<Dataset name='Result 4'>
+ <Row><Result_4>46875625000</Result_4></Row>
+</Dataset>
+<Dataset name='Result 5'>
+ <Row><Result_5>328126500000</Result_5></Row>
+</Dataset>
+<Dataset name='Result 6'>
+ <Row><Result_6>46875625000</Result_6></Row>
+</Dataset>
+<Dataset name='Result 7'>
+ <Row><Result_7>328126500000</Result_7></Row>
+</Dataset>