Explorar o código

HPCC-8541 - Add examples illustrating calling Python and Java from ECL

We will want to produce a cleaner / more integrated solution at some point, but
examples of how to call Java ad Python from ECL using embedded C++ may help
as an interim solution.

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman %!s(int64=12) %!d(string=hai) anos
pai
achega
fdfecc92f4

+ 147 - 0
initfiles/examples/jni/java_from_ecl.ecl

@@ -0,0 +1,147 @@
+/* Example of calling java from ECL code via embedded C++
+*
+* This example uses the following code in JavaCat.java:
+*
+* public class JavaCat
+* {
+*   public static String cat(String a, String b)
+*   {
+*     System.out.println("In java");
+*     System.out.println(a+b);
+*     return a + b;
+*   }
+* }
+*
+* Compile it using
+*
+*   javac javacat
+*
+* You can also generate the signature for the function to be called using
+*
+*   javap -s -p javacat
+*
+* To compile this ECL example, you need to link the JNI libraries:
+*
+* eclcc calljava.ecl -Wc,-I/usr/lib/jvm/java-6-openjdk/include/ -Wl,-L/usr/lib/jvm/java-6-openjdk/jre/lib/amd64/server/ \
+*    -Wl,-L/usr/lib/jvm/java-6-openjdk/jre/lib/amd64/ -Wl,-ljawt -Wl,-ljvm -target=roxie
+*
+*/
+
+// Embedded C++ that makes a JNI call
+
+string cat(varstring a, varstring b) := BEGINC++
+
+// This section of the code should probably move to a plugin, or somesuch
+
+#include <jni.h>
+#include <assert.h>
+
+static JavaVM *javaVM;       /* denotes a Java VM */
+static pthread_once_t jni_init_flag = PTHREAD_ONCE_INIT;  /* Ensures initialized just once */
+
+static void initJNI()
+{
+    assert (!javaVM);
+    JavaVMInitArgs vm_args; /* JDK/JRE 6 VM initialization arguments */
+    JavaVMOption* options = new JavaVMOption[2];
+    options[0].optionString = "-Djava.class.path=.";
+    options[1].optionString = "-verbose:jni";
+    vm_args.version = JNI_VERSION_1_6;
+    vm_args.nOptions = 1;  // set to 2 if you want the verbose...
+    vm_args.options = options;
+    vm_args.ignoreUnrecognized = false;
+    /* load and initialize a Java VM, return a JNI interface pointer in env */
+    JNIEnv *env;       /* receives pointer to native method interface */
+    JNI_CreateJavaVM(&javaVM, (void**)&env, &vm_args);
+
+    delete options;
+}
+
+static JNIEnv *getJNIEnvironment()
+{
+    pthread_once(&jni_init_flag, initJNI);
+    // Make sure we attach the current thread to the JVM env...
+    // MORE - not sure how efficient it is to call AttachCurrentThread every time
+    //      - We could probably avoid doing so if we add a hook into jthread
+    //      - we are also never calling DetachCurrentThread
+    JNIEnv *JNIenv;       /* receives pointer to native method interface */
+    jint res = javaVM->AttachCurrentThread((void **) &JNIenv, NULL);
+    if (!res < 0)
+       printf("Thread attach failed");
+    return JNIenv;
+}
+
+// MORE - as coded, the JavaVM is never released.
+// This is not too big a deal for a standalone ECL test program for POC,
+// and isn't that much of a problem in a hthor program, as the process terminates
+// at the end of of the workunit execution, but might be an issue if using from thor or roxie.
+//
+// Also should consider whether the java VM should be shared between queries.
+//
+// There is a known bug in DestroyJavaVM, which means you can't actually create more than one Java VM per process.
+// This bug has been around forever, and is not going to get fixed. So the options here are limited...
+
+//--------------------------------------------------------
+
+// This section of code should ideally be generated by the ECL compiler in response to a method being marked as ,jni
+// You would need similar code for each java method you want to call
+
+static jclass JavaCat;
+static jmethodID JavaCat_cat;
+static pthread_once_t jni_resolve_flag = PTHREAD_ONCE_INIT;  /* Ensures called just once */
+
+static void resolveJNIMethods()
+{
+    /* Do all the function resolution just the once... */
+    /* MORE - this bit should be generated too */
+    JNIEnv *env = getJNIEnvironment();
+    JavaCat = env->FindClass("JavaCat");
+    JavaCat_cat = env->GetStaticMethodID(JavaCat, "cat", "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;");
+}
+
+//--------------------------------------------------------
+
+// This section of code should ideally be generated by the ECL compiler inline when calling a ,jni method
+
+#body
+
+// extern  void user1(size32_t & __lenResult,char * & __result,const char * a,const char * b) {
+    pthread_once(&jni_resolve_flag, resolveJNIMethods);
+
+    JNIEnv *env = getJNIEnvironment();
+    jstring jstrA = env->NewStringUTF(a);
+    jstring jstrB = env->NewStringUTF(b);
+    jstring result = (jstring) env->CallStaticObjectMethod(JavaCat, JavaCat_cat, jstrA, jstrB);
+    // MORE - should consider marshalling java exceptions into c++ ones here...
+
+    __lenResult = env->GetStringUTFLength(result);
+    const char * chars =  env->GetStringUTFChars(result, NULL);
+    __result = new char(__lenResult);
+    memcpy(__result, chars, __lenResult);
+    env->ReleaseStringUTFChars(result, chars);
+// }
+ENDC++;
+
+//--------------------------------------------------------
+
+// ECL code - an input dataset with 2 records, each containing 2 strings
+
+inrec := RECORD
+           string f1;
+           string f2;
+         END;
+infile1 := DATASET([{'a', 'b'}, {'c', 'd'}], inrec);
+infile2 := DATASET([{'e', 'f'}, {'g', 'h'}], inrec);
+
+// Output record has just one string, filled in from the result of the java function
+outrec := RECORD
+            string c;
+          END;
+
+outrec t(inrec L) := TRANSFORM
+  SELF.c := cat(L.f1, L.f2)  // Calls Java function
+END;
+
+outfile := project(infile1, t(LEFT))+project(infile2, t(LEFT));  // threaded concat operation
+
+outfile;

+ 7 - 0
initfiles/examples/python/python_cat.py

@@ -0,0 +1,7 @@
+#!/usr/bin/python
+
+def cat(a, b):
+  return a + b
+
+if __name__=="__main__":
+   print cat("Hello", " world")

+ 162 - 0
initfiles/examples/python/python_from_ecl.ecl

@@ -0,0 +1,162 @@
+/* Example of calling Python from ECL code via embedded C++
+*
+* This example uses the following code in python_cat.py:
+*
+* def cat(a, b):
+*   return a + b
+*
+* To compile this ECL example, you need to link the Python libraries:
+*
+* eclcc python_from_ecl.ecl -Wc,-I/usr/include/python2.7/ -Wl,-lpython2.7 -target=hthor
+*
+* To run it, ensure that PYTHONPATH is set such that python_cat.py can be located
+*/
+
+// Embedded C++ that makes a JNI call
+
+string cat(varstring a, varstring b) := BEGINC++
+
+// This section of the code should probably move to a plugin, or somesuch
+
+#include <Python.h>
+#include <assert.h>
+
+static pthread_once_t python_resolve_flag = PTHREAD_ONCE_INIT;  /* Ensures called just once */
+static PyObject *pName, *pModule, *pFunc_cat;
+static bool pythonInitialized = false;
+
+static void resolvePythonFunctions()
+{
+    /* Do all the function resolution just the once... */
+    PyObject *pDict;
+
+    // Initialize the Python Interpreter
+    Py_Initialize();
+    PyEval_InitThreads();
+    pythonInitialized = true;
+
+    // Build the name object
+    pName = PyString_FromString("python_cat");
+
+    // Load the module object
+    pModule = PyImport_Import(pName);
+    if (pModule == NULL)
+    {
+        PyErr_Print();
+    }
+    else
+    {
+        // pDict is a borrowed reference
+        pDict = PyModule_GetDict(pModule);
+        // pFunc_cat is also a borrowed reference
+        pFunc_cat = PyDict_GetItemString(pDict, "cat");
+        if (!pFunc_cat || !PyCallable_Check(pFunc_cat))
+        {
+            PyErr_Print();
+            pFunc_cat = NULL;
+        }
+    }
+    PyEval_ReleaseLock();
+}
+
+static void finishPython()
+{
+    // Clean up
+    if (pModule)
+        Py_DECREF(pModule);
+    if (pName)
+        Py_DECREF(pName);
+    // Finish the Python Interpreter
+    if (pythonInitialized)
+        Py_Finalize();
+}
+
+static void checkPythonError()
+{
+    PyObject* err = PyErr_Occurred();
+    if (err)
+    {
+        PyErr_Print();
+        rtlFail(0, "Unexpected failure"); // MORE - should probably get some info out of PyError rather than just printing it
+    }
+}
+
+// The Python Global Interpreter Lock (GIL) won't know about C++-created threads, so we need to
+// call PyGILState_Ensure() and PyGILState_Release at the start and end of every function.
+// Wrapping them in a class like this ensures that the release always happens even if
+// the function exists prematurely
+
+class GILstateWrapper
+{
+    PyGILState_STATE gstate;
+public:
+    GILstateWrapper()
+    {
+        gstate = PyGILState_Ensure();
+    }
+    ~GILstateWrapper()
+    {
+        PyGILState_Release(gstate);
+    }
+};
+
+// Use class OwnedPyObject for any objects that are not 'borrowed references'
+// so that the appropriate Py_DECREF call is made when the OwnedPyObject goes
+// out of scope, even if the function returns prematurely (such as via an exception).
+// In particular, checkPythonError is a lot easier to call safely if this is used.
+
+class OwnedPyObject
+{
+    PyObject *ptr;
+public:
+    inline OwnedPyObject(PyObject *_ptr) : ptr(_ptr) {}
+    inline ~OwnedPyObject()                { if (ptr) Py_DECREF(ptr); }
+    inline PyObject * operator -> () const { return ptr; }
+    inline operator PyObject *() const     { return ptr; }
+};
+
+//--------------------------------------------------------
+
+#body
+
+// extern  void user1(size32_t & __lenResult,char * & __result,const char * a,const char * b) {
+    pthread_once(&python_resolve_flag, resolvePythonFunctions);
+    if (!pFunc_cat)
+       rtlFail(0, "Could not resolve python functions");
+    GILstateWrapper gstate; // Ensure that we play nice with Python threads
+
+    OwnedPyObject pArgs = Py_BuildValue("s,s", a, b);
+    checkPythonError();
+    OwnedPyObject pResult = PyObject_CallObject(pFunc_cat, pArgs);
+    checkPythonError();
+
+    __lenResult = PyString_Size(pResult);
+    const char * chars =  PyString_AsString(pResult);
+    __result = new char(__lenResult);
+    memcpy(__result, chars, __lenResult);
+// }
+ENDC++;
+
+//--------------------------------------------------------
+
+// ECL code - an input dataset with 2 records, each containing 2 strings
+
+inrec := RECORD
+           string f1;
+           string f2;
+         END;
+infile1 := DATASET([{'a', 'b'}, {'c', 'd'}], inrec);
+infile2 := DATASET([{'e', 'f'}, {'g', 'h'}], inrec);
+
+// Output record has just one string, filled in from the result of the java function
+outrec := RECORD
+            string c;
+          END;
+
+outrec t(inrec L) := TRANSFORM
+  SELF.c := cat(L.f1, L.f2)  // Calls Python function
+END;
+
+outfile := project(infile1, t(LEFT))+project(infile2, t(LEFT));  // threaded concat operation
+
+outfile;