Browse Source

HPCC-16906 Add a new plugin for embedded Python3 code

Copied from the Python2 plugin, with modifications to reflect the changed C
API.

Note that libpython3.x exports many of the same symbols as libpython2.x, whcih
can lead to issues if both are loaded. In particular, I had to change the
visibility rules when loading plugins, and I also had to refine the code for
working around Centos distro issue with improperly linked Python extensions.
We now only apply the workaround if py3embed is not in use - people using
centos and buggy python distros will need to pick one or the other version of
pyembed, but not both.

Also discovered that one of the existing tests was working by luck (since
python sets are unordered), and stopped working on (at least some) python 3
installations.

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 8 years ago
parent
commit
5258c33b2c

+ 1 - 0
CMakeLists.txt

@@ -151,6 +151,7 @@ if ( PLUGIN )
     HPCC_ADD_SUBDIRECTORY (plugins/v8embed "V8EMBED")
     HPCC_ADD_SUBDIRECTORY (plugins/memcached "MEMCACHED")
     HPCC_ADD_SUBDIRECTORY (plugins/pyembed "PYEMBED")
+    HPCC_ADD_SUBDIRECTORY (plugins/py3embed "PY3EMBED")
     HPCC_ADD_SUBDIRECTORY (plugins/redis "REDIS")
     HPCC_ADD_SUBDIRECTORY (plugins/javaembed "JAVAEMBED")
     HPCC_ADD_SUBDIRECTORY (plugins/kafka "KAFKA")

+ 1 - 0
cmake_modules/commonSetup.cmake

@@ -142,6 +142,7 @@ IF ("${COMMONSETUP_DONE}" STREQUAL "")
     V8EMBED
     MEMCACHED
     PYEMBED
+    PY3EMBED
     REDIS
     MYSQLEMBED
     JAVAEMBED

+ 4 - 1
common/dllserver/thorplugin.cpp

@@ -634,7 +634,10 @@ bool SafePluginMap::addPlugin(const char *path, const char *dllname)
         if (!dll)
         {
             Owned<PluginDll> n = new PluginDll(path, NULL);
-            if (!n->load(true, false) || !n->init(pluginCtx))
+            // Note - we used to load plugins with global=true, but that caused issues when loading
+            // Python3 and Python2 plugins at the same time as the export similar symbols
+            // Loading with global=false should not cause any adverse issues
+            if (!n->load(false, false) || !n->init(pluginCtx))
                 throw MakeStringException(0, "Failed to load plugin %s", path);
             if (trace)
                 n->logLoaded();

+ 1 - 1
ecl/hql/hqlfold.cpp

@@ -698,7 +698,7 @@ void *loadExternalEntryPoint(IHqlExpression* expr, unsigned foldOptions, ITempla
         }
     }
 #ifdef _DEBUG
-    if (streq(library, "libpyembed.dylib") || streq(library, "libv8embed.dylib") || streq(library, "libjavaembed.dylib"))
+    if (streq(library, "libpy3embed.dylib") || streq(library, "libpyembed.dylib") || streq(library, "libv8embed.dylib") || streq(library, "libjavaembed.dylib"))
     {
         Dl_info info;
         if (dladdr((const void *) rtlStrToUInt4, &info))  // Any function in eclrtl would do...

+ 2 - 1
initfiles/examples/embed/python_cat.py

@@ -1,7 +1,8 @@
 #!/usr/bin/python
+from __future__ import print_function
 
 def cat(a, b):
   return a + b
 
 if __name__=="__main__":
-   print cat("Hello", " world")
+   print (cat("Hello", " world"))

+ 1 - 0
plugins/CMakeLists.txt

@@ -28,6 +28,7 @@ add_subdirectory (proxies)
 add_subdirectory (sqlite3)
 add_subdirectory (mysql)
 add_subdirectory (v8embed)
+add_subdirectory (py3embed)
 add_subdirectory (pyembed)
 add_subdirectory (javaembed)
 add_subdirectory (Rembed)

+ 90 - 0
plugins/py3embed/CMakeLists.txt

@@ -0,0 +1,90 @@
+################################################################################
+#    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+################################################################################
+
+
+# Component: py3embed
+
+#####################################################
+# Description:
+# ------------
+#    Cmake Input File for py3embed
+#####################################################
+
+project(py3embed)
+if(PY3EMBED)
+    unset(PYTHONLIBS_FOUND CACHE)
+    unset(PYTHON_LIBRARY CACHE)
+    unset(PYTHON_LIBRARIES CACHE)
+    unset(PYTHON_INCLUDE_PATH CACHE)
+    unset(PYTHON_INCLUDE_DIR CACHE)
+    unset(PYTHON_DEBUG_LIBRARIES CACHE)
+    unset(PYTHONLIBS_VERSION_STRING CACHE)
+    ADD_PLUGIN(py3embed PACKAGES PythonLibs MINVERSION 3.3)
+    set(PYTHON3LIBS_FOUND ${PYTHONLIBS_FOUND})
+    set(PYTHON3_LIBRARY ${PYTHON_LIBRARY})
+    set(PYTHON3_LIBRARIES ${PYTHON_LIBRARIES})
+    set(PYTHON3_INCLUDE_PATH ${PYTHON_INCLUDE_PATH})
+    set(PYTHON3_INCLUDE_DIR ${PYTHON_INCLUDE_DIR})
+    set(PYTHON3_DEBUG_LIBRARIES ${PYTHON_DEBUG_LIBRARIES})
+    set(PYTHON3LIBS_VERSION_STRING ${PYTHONLIBS_VERSION_STRING})
+    message("Python3 library is ${PYTHON3_LIBRARY}")
+
+    if(MAKE_PY3EMBED)
+        set(
+            SRCS
+            py3embed.cpp)
+
+        include_directories(
+            "${PYTHON3_INCLUDE_DIR}"
+            ./../../system/include
+            ./../../rtl/eclrtl
+            ./../../rtl/include
+            ./../../rtl/nbcd
+            ./../../common/deftype
+            ./../../common/thorhelper
+            ./../../dali/base
+            ./../../system/mp
+            ./../../roxie/roxiemem
+            ./../../system/jlib)
+
+        add_definitions(-D_USRDLL -DPY3EMBED_EXPORTS)
+
+        HPCC_ADD_LIBRARY(py3embed SHARED ${SRCS})
+        if(${CMAKE_VERSION} VERSION_LESS "2.8.9")
+            message(WARNING "Cannot set NO_SONAME. shlibdeps will give warnings when package is installed")
+        elseif(NOT APPLE)
+            set_target_properties(py3embed PROPERTIES NO_SONAME 1)
+        endif()
+
+        install(
+            TARGETS py3embed
+            DESTINATION plugins)
+
+        target_link_libraries(py3embed ${PYTHON3_LIBRARY})
+
+        target_link_libraries(
+            py3embed
+            eclrtl
+            roxiemem
+            jlib)
+    endif()
+endif()
+if(PLATFORM OR CLIENTTOOLS_ONLY)
+    install(
+        FILES ${CMAKE_CURRENT_SOURCE_DIR}/python3.ecllib
+        DESTINATION plugins
+        COMPONENT Runtime)
+endif()

File diff suppressed because it is too large
+ 1719 - 0
plugins/py3embed/py3embed.cpp


+ 26 - 0
plugins/py3embed/python3.ecllib

@@ -0,0 +1,26 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2013 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+EXPORT Language := SERVICE : plugin('py3embed')
+  integer getEmbedContext():cpp,pure,fold,namespace='py3embed',entrypoint='getEmbedContext',prototype='IEmbedContext* getEmbedContext()';
+  boolean syntaxCheck(const varstring src):cpp,pure,namespace='py3embed',entrypoint='syntaxCheck';
+END;
+EXPORT getEmbedContext := Language.getEmbedContext;
+EXPORT syntaxCheck := Language.syntaxCheck;
+EXPORT boolean supportsImport := true;
+EXPORT boolean supportsScript := true;
+EXPORT boolean prebind := true;

+ 18 - 3
plugins/pyembed/CMakeLists.txt

@@ -29,14 +29,29 @@ set(DEBUG_PYTHON_LIBRARY "/usr/lib/libpython2.7_d.so")
 project(pyembed)
 
 if(PYEMBED)
+    unset(PYTHONLIBS_FOUND CACHE)
+    unset(PYTHON_LIBRARY CACHE)
+    unset(PYTHON_LIBRARIES CACHE)
+    unset(PYTHON_INCLUDE_PATH CACHE)
+    unset(PYTHON_INCLUDE_DIR CACHE)
+    unset(PYTHON_DEBUG_LIBRARIES CACHE)
+    unset(PYTHONLIBS_VERSION_STRING CACHE)
     ADD_PLUGIN(pyembed PACKAGES PythonLibs MINVERSION 2.6 MAXVERSION 2.7)
+    set(PYTHON2LIBS_FOUND ${PYTHONLIBS_FOUND})
+    set(PYTHON2_LIBRARY ${PYTHON_LIBRARY})
+    set(PYTHON2_LIBRARIES ${PYTHON_LIBRARIES})
+    set(PYTHON2_INCLUDE_PATH ${PYTHON_INCLUDE_PATH})
+    set(PYTHON2_INCLUDE_DIR ${PYTHON_INCLUDE_DIR})
+    set(PYTHON2_DEBUG_LIBRARIES ${PYTHON_DEBUG_LIBRARIES})
+    set(PYTHON2LIBS_VERSION_STRING ${PYTHONLIBS_VERSION_STRING})
+    message("Python2 library is ${PYTHON2_LIBRARY}")
     if(MAKE_PYEMBED)
         set(
             SRCS
             pyembed.cpp)
 
         include_directories(
-            "${PYTHON_INCLUDE_DIR}"
+            "${PYTHON2_INCLUDE_DIR}"
             ./../../system/include
             ./../../rtl/eclrtl
             ./../../rtl/include
@@ -64,9 +79,9 @@ if(PYEMBED)
             TARGETS pyembed
             DESTINATION plugins)
         if(debug_python)
-            target_link_libraries(pyembed ${DEBUG_PYTHON_LIBRARY})
+            target_link_libraries(pyembed ${DEBUG_PYTHON2_LIBRARY})
         else()
-            target_link_libraries(pyembed ${PYTHON_LIBRARY})
+            target_link_libraries(pyembed ${PYTHON2_LIBRARY})
         endif()
 
         target_link_libraries(

+ 44 - 46
plugins/pyembed/pyembed.cpp

@@ -234,6 +234,38 @@ static void releaseContext()
     }
 }
 
+#ifndef _WIN32
+static bool findLoadedModule(StringBuffer &ret,  const char *match)
+{
+    bool found = false;
+    FILE *diskfp = fopen("/proc/self/maps", "r");
+    if (diskfp)
+    {
+        char ln[_MAX_PATH];
+        while (fgets(ln, sizeof(ln), diskfp))
+        {
+            if (strstr(ln, match))
+            {
+                const char *fullName = strchr(ln, '/');
+                if (fullName)
+                {
+                    char * lf = (char *) strchr(fullName, '\n');
+                    if (lf)
+                    {
+                        *lf = 0;
+                        ret.set(fullName);
+                        found = true;
+                        break;
+                    }
+                }
+            }
+        }
+        fclose(diskfp);
+    }
+    return found;
+}
+#endif
+
 // Use a global object to ensure that the Python interpreter is initialized on main thread
 
 static class Python27GlobalState
@@ -253,31 +285,14 @@ public:
         }
 #endif
 #ifndef _WIN32
-        // We need to ensure all symbols in the python2.6 so are loaded - due to bugs in some distro's python installations
-        FILE *diskfp = fopen("/proc/self/maps", "r");
-        if (diskfp)
-        {
-            char ln[_MAX_PATH];
-            while (fgets(ln, sizeof(ln), diskfp))
-            {
-                if (strstr(ln, "libpython2"))
-                {
-                    const char *fullName = strchr(ln, '/');
-                    if (fullName)
-                    {
-                        char * lf = (char *) strchr(fullName, '\n');
-                        if (lf)
-                        {
-                            *lf = 0;
-                            pythonLibrary = dlopen((char *)fullName, RTLD_NOW|RTLD_GLOBAL);
-//                            DBGLOG("dlopen %s returns %" I64F "x", fullName, (__uint64) pythonLibrary);
-                            break;
-                        }
-                    }
-                }
-            }
-            fclose(diskfp);
-        }
+        // We need to ensure all symbols in the python2.x so are loaded - due to bugs in some distro's python installations
+        // However this will likely break python3.
+        // Therefore on systems where both are present, do NOT do this - people using centos systems that suffer from issue
+        // https://bugs.centos.org/view.php?id=6063 will need to choose which version of python plugin to install but not both
+
+        StringBuffer modname, py3modname;
+        if  (findLoadedModule(modname, "libpython2.") && !findLoadedModule(py3modname, "libpython3."))
+            pythonLibrary = dlopen(modname.str(), RTLD_NOW|RTLD_GLOBAL);
 #endif
         // Initialize the Python Interpreter
         Py_Initialize();
@@ -476,28 +491,11 @@ MODULE_INIT(INIT_PRIORITY_STANDARD)
         DBGLOG("LoadSharedObject returned %p", h);
     }
 #else
-    FILE *diskfp = fopen("/proc/self/maps", "r");
-    if (diskfp)
+    StringBuffer modname;
+    if (findLoadedModule(modname, "libpyembed"))
     {
-        char ln[_MAX_PATH];
-        while (fgets(ln, sizeof(ln), diskfp))
-        {
-            if (strstr(ln, "libpyembed"))
-            {
-                const char *fullName = strchr(ln, '/');
-                if (fullName)
-                {
-                    char *tail = (char *) strstr(fullName, SharedObjectExtension);
-                    if (tail)
-                    {
-                        tail[strlen(SharedObjectExtension)] = 0;
-                        HINSTANCE h = LoadSharedObject(fullName, false, false);
-                        break;
-                    }
-                }
-            }
-        }
-        fclose(diskfp);
+        HINSTANCE h = LoadSharedObject(modname, false, false);
+        // Deliberately leak this handle
     }
 #endif
     return true;

+ 2 - 1
testing/ecl/python_cat.py

@@ -1,7 +1,8 @@
 #!/usr/bin/python
+from __future__ import print_function
 
 def cat(a, b):
   return a + b
 
 if __name__=="__main__":
-   print cat("Hello", " world")
+   print (cat("Hello", " world"))

+ 43 - 0
testing/regress/ecl/embedpy3-catch.ecl

@@ -0,0 +1,43 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2014 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+//class=embedded
+
+//nothor
+
+//Thor doesn't handle CATCH properly, see HPCC-9059
+//skip type==thorlcr TBD
+
+IMPORT Python3;
+
+integer testThrow(integer val) := EMBED(Python3)
+raise Exception('Error from Python')
+ENDEMBED;
+
+// Can't catch an expression(only a dataset)
+d := dataset([{ 1, '' }], { integer a, string m} ) : stored('nofold');
+
+d t := transform
+  self.a := FAILCODE;
+  self.m := FAILMESSAGE;
+  self := [];
+end;
+
+catch(d(testThrow(a) = a), onfail(t));
+
+
+

+ 126 - 0
testing/regress/ecl/embedpy3-fold.ecl

@@ -0,0 +1,126 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2014 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+//class=embedded
+
+//nothor
+
+IMPORT Python3;
+
+integer add1(integer val) := EMBED(Python3:FOLD)
+val+1
+ENDEMBED;
+
+string add2(string val) := EMBED(Python3:FOLD)
+val+'1'
+ENDEMBED;
+
+string add3(varstring val) := EMBED(Python3:FOLD)
+val+'1'
+ENDEMBED;
+
+utf8 add4(utf8 val) := EMBED(Python3:FOLD)
+val+'1'
+ENDEMBED;
+
+unicode add5(unicode val) := EMBED(Python3:FOLD)
+val+'1'
+ENDEMBED;
+
+utf8 add6(utf8 val) := EMBED(Python3:FOLD)
+return val+'1'
+ENDEMBED;
+
+unicode add7(unicode val) := EMBED(Python3:FOLD)
+return val+'1'
+ENDEMBED;
+
+data testData(data val) := EMBED(Python3:FOLD)
+val[0] = val[0] + 1
+return val
+ENDEMBED;
+
+set of integer testSet(set of integer val) := EMBED(Python3:FOLD)
+return val[0:2]
+ENDEMBED;
+
+set of string testSet2(set of string val) := EMBED(Python3:FOLD)
+return val[0:2]
+ENDEMBED;
+
+set of string testSet3(set of string8 val) := EMBED(Python3:FOLD)
+return val[0:2]
+ENDEMBED;
+
+set of utf8 testSet4(set of utf8 val) := EMBED(Python3:FOLD)
+return val[0:2]
+ENDEMBED;
+
+set of varstring testSet5(set of varstring val) := EMBED(Python3:FOLD)
+return val[0:2]
+ENDEMBED;
+
+set of varstring8 testSet6(set of varstring8 val) := EMBED(Python3:FOLD)
+return val[0:2]
+ENDEMBED;
+
+set of unicode testSet7(set of unicode val) := EMBED(Python3:FOLD)
+return val[0:2]
+ENDEMBED;
+
+set of unicode8 testSet8(set of unicode8 val) := EMBED(Python3:FOLD)
+return val[0:2]
+ENDEMBED;
+
+set of data testSet9(set of data val) := EMBED(Python3:FOLD)
+return val[0:2]
+ENDEMBED;
+
+// And a test that is NOT foldable:
+
+string myvalue := 'test' : STORED('myvalue');
+
+integer add1a(integer val) := EMBED(Python3:FOLD)
+val+1
+ENDEMBED;
+
+
+ASSERT(add1(10)=11, CONST);
+ASSERT(add1a(10)=11);
+ASSERT(add2('Hello')='Hello1', CONST);
+ASSERT(add3('World')='World1', CONST);
+ASSERT(add4(U'Oh là là Straße')=U'Oh là là Straße1', CONST);
+ASSERT(add5(U'Стоял')=U'Стоял1', CONST);
+ASSERT(add6(U'Oh là là Straße')=U'Oh là là Straße1', CONST);
+ASSERT(add7(U'Стоял')=U'Стоял1', CONST);
+
+ASSERT(add2('Oh là là Straße')='Oh là là Straße1', CONST);  // Passing latin chars - should be untranslated
+
+ASSERT(testData(D'ax')=D'bx', CONST);
+ASSERT(testSet([1,3,2])=[1,3], CONST);
+ASSERT(testSet2(['red','green','yellow'])=['red','green'],CONST);
+ASSERT(testSet3(['one','two','three'])=['one','two'],CONST);
+
+ASSERT(testSet4([U'Oh', U'là', U'Straße'])=[U'Oh', U'là'], CONST);
+ASSERT(testSet5(['Un','Deux','Trois'])=['Un','Deux'], CONST);
+ASSERT(testSet6(['Uno','Dos','Tre'])=['Uno','Dos'], CONST);
+
+ASSERT(testSet7([U'On', U'der', U'Straße'])=[U'On', U'der'], CONST);
+ASSERT(testSet8([U'Aus', U'zum', U'Straße'])=[U'Aus', U'zum'], CONST);
+ASSERT(testSet9([D'Aus', D'zum', D'Strade'])=[D'Aus', D'zum'], CONST);
+
+OUTPUT('ok');

+ 159 - 0
testing/regress/ecl/embedpy3.ecl

@@ -0,0 +1,159 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2014 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+//class=embedded
+
+//nothor
+
+IMPORT Python3;
+
+Python3.Language.syntaxcheck('1+2');
+
+integer add1(integer val) := EMBED(Python3)
+val+1
+ENDEMBED;
+
+string add2(string val) := EMBED(Python3)
+val+'1'
+ENDEMBED;
+
+string add3(varstring val) := EMBED(Python3)
+val+'1'
+ENDEMBED;
+
+utf8 add4(utf8 val) := EMBED(Python3)
+val+'1'
+ENDEMBED;
+
+unicode add5(unicode val) := EMBED(Python3)
+val+'1'
+ENDEMBED;
+
+utf8 add6(utf8 val) := EMBED(Python3)
+return val+'1'
+ENDEMBED;
+
+unicode add7(unicode val) := EMBED(Python3)
+return val+'1'
+ENDEMBED;
+
+data testData(data val) := EMBED(Python3)
+val[0] = val[0] + 1
+return val
+ENDEMBED;
+
+set of integer testSet(set of integer val) := EMBED(Python3)
+return sorted(val)
+ENDEMBED;
+
+set of string testSet2(set of string val) := EMBED(Python3)
+return sorted(val)
+ENDEMBED;
+
+set of string testSet3(set of string8 val) := EMBED(Python3)
+return sorted(val)
+ENDEMBED;
+
+set of utf8 testSet4(set of utf8 val) := EMBED(Python3)
+return sorted(val)
+ENDEMBED;
+
+set of varstring testSet5(set of varstring val) := EMBED(Python3)
+return sorted(val)
+ENDEMBED;
+
+set of varstring8 testSet6(set of varstring8 val) := EMBED(Python3)
+return sorted(val)
+ENDEMBED;
+
+set of unicode testSet7(set of unicode val) := EMBED(Python3)
+return sorted(val)
+ENDEMBED;
+
+set of unicode8 testSet8(set of unicode8 val) := EMBED(Python3)
+return sorted(val)
+ENDEMBED;
+
+set of data testSet9(set of data val) := EMBED(Python3)
+return val
+ENDEMBED;
+
+add1(10);
+add2('Hello');
+add3('World');
+add4(U'Oh là là Straße');
+add5(U'Стоял');
+add6(U'Oh là là Straße');
+add7(U'Стоял');
+
+add2('Oh là là Straße');  // Passing latin chars - should be untranslated
+
+testData(D'aa');
+testSet([1,3,2]);
+testSet2(['red','green','yellow']);
+testSet3(['one','two','three']);
+testSet4([U'Oh', U'là', U'Straße']);
+testSet5(['Un','Deux','Trois']);
+testSet6(['Uno','Dos','Tre']);
+testSet7([U'On', U'der', U'Straße']);
+testSet8([U'Aus', U'zum', U'Straße']);
+testSet9([D'Aus', D'zum', D'Strade']);
+
+s1 :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := add1(COUNTER)));
+s2 :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := add1(COUNTER/2)));
+ SUM(NOFOLD(s1 + s2), a);
+
+s1a :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := (integer) add2((STRING)COUNTER)));
+s2a :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := (integer) add3((STRING)(COUNTER/2))));
+ SUM(NOFOLD(s1a + s2a), a);
+
+s1b :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := COUNTER+1));
+s2b :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := (COUNTER/2)+1));
+ SUM(NOFOLD(s1b + s2b), a);
+
+s1c :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := (integer) ((STRING) COUNTER + '1')));
+s2c :=DATASET(250000, TRANSFORM({ integer a }, SELF.a := (integer) ((STRING)(COUNTER/2) + '1')));
+ SUM(NOFOLD(s1c + s2c), a);
+
+unsigned persistscope1(unsigned a) := EMBED(Python3: globalscope('yo'),persist('workunit'))
+  global b
+  b = a + 1
+  return a
+ENDEMBED;
+
+unsigned usepersistscope1(unsigned a) := EMBED(Python3: globalscope('yo'),persist('workunit'))
+  global b
+  return a + b
+ENDEMBED;
+
+unsigned persistscope2(unsigned a) := EMBED(Python3: globalscope('yi'),persist('workunit'))
+  global b
+  b = a + 11
+  return a
+ENDEMBED;
+
+unsigned usepersistscope2(unsigned a) := EMBED(Python3: globalscope('yi'),persist('workunit'))
+  global b
+  return a + b
+ENDEMBED;
+
+sequential(
+  persistscope1(1),
+  persistscope2(1),
+  usepersistscope1(1),
+  usepersistscope2(1)
+);

+ 36 - 0
testing/regress/ecl/embedpy3a.ecl

@@ -0,0 +1,36 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2014 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+//class=embedded
+
+import python3;
+
+string anagram(string word) := EMBED(Python3)
+  def anagram(w):
+    if word == 'cat':
+      return 'act'
+    else:
+      return w
+
+  return anagram(word)
+ENDEMBED;
+
+anagram('dog');
+anagram('cat');
+
+
+

+ 3 - 0
testing/regress/ecl/key/embedpy3-catch.xml

@@ -0,0 +1,3 @@
+<Dataset name='Result 1'>
+ <Row><a>0</a><m>pyembed: Error from Python</m></Row>
+</Dataset>

+ 3 - 0
testing/regress/ecl/key/embedpy3-fold.xml

@@ -0,0 +1,3 @@
+<Dataset name='Result 1'>
+ <Row><Result_1>ok</Result_1></Row>
+</Dataset>

+ 81 - 0
testing/regress/ecl/key/embedpy3.xml

@@ -0,0 +1,81 @@
+<Dataset name='Result 1'>
+ <Row><Result_1>true</Result_1></Row>
+</Dataset>
+<Dataset name='Result 2'>
+ <Row><Result_2>11</Result_2></Row>
+</Dataset>
+<Dataset name='Result 3'>
+ <Row><Result_3>Hello1</Result_3></Row>
+</Dataset>
+<Dataset name='Result 4'>
+ <Row><Result_4>World1</Result_4></Row>
+</Dataset>
+<Dataset name='Result 5'>
+ <Row><Result_5>Oh là là Straße1</Result_5></Row>
+</Dataset>
+<Dataset name='Result 6'>
+ <Row><Result_6>Стоял1</Result_6></Row>
+</Dataset>
+<Dataset name='Result 7'>
+ <Row><Result_7>Oh là là Straße1</Result_7></Row>
+</Dataset>
+<Dataset name='Result 8'>
+ <Row><Result_8>Стоял1</Result_8></Row>
+</Dataset>
+<Dataset name='Result 9'>
+ <Row><Result_9>Oh l&#224; l&#224; Stra&#223;e1</Result_9></Row>
+</Dataset>
+<Dataset name='Result 10'>
+ <Row><Result_10>6261</Result_10></Row>
+</Dataset>
+<Dataset name='Result 11'>
+ <Row><Result_11><Item>1</Item><Item>2</Item><Item>3</Item></Result_11></Row>
+</Dataset>
+<Dataset name='Result 12'>
+ <Row><Result_12><Item>green</Item><Item>red</Item><Item>yellow</Item></Result_12></Row>
+</Dataset>
+<Dataset name='Result 13'>
+ <Row><Result_13><Item>one     </Item><Item>three   </Item><Item>two     </Item></Result_13></Row>
+</Dataset>
+<Dataset name='Result 14'>
+ <Row><Result_14><Item>Oh</Item><Item>Straße</Item><Item>là</Item></Result_14></Row>
+</Dataset>
+<Dataset name='Result 15'>
+ <Row><Result_15><Item>Deux</Item><Item>Trois</Item><Item>Un</Item></Result_15></Row>
+</Dataset>
+<Dataset name='Result 16'>
+ <Row><Result_16><Item>Dos</Item><Item>Tre</Item><Item>Uno</Item></Result_16></Row>
+</Dataset>
+<Dataset name='Result 17'>
+ <Row><Result_17><Item>On</Item><Item>Straße</Item><Item>der</Item></Result_17></Row>
+</Dataset>
+<Dataset name='Result 18'>
+ <Row><Result_18><Item>Aus     </Item><Item>Straße  </Item><Item>zum     </Item></Result_18></Row>
+</Dataset>
+<Dataset name='Result 19'>
+ <Row><Result_19><Item>417573</Item><Item>7A756D</Item><Item>537472616465</Item></Result_19></Row>
+</Dataset>
+<Dataset name='Result 20'>
+ <Row><Result_20>46875625000</Result_20></Row>
+</Dataset>
+<Dataset name='Result 21'>
+ <Row><Result_21>328126500000</Result_21></Row>
+</Dataset>
+<Dataset name='Result 22'>
+ <Row><Result_22>46875625000</Result_22></Row>
+</Dataset>
+<Dataset name='Result 23'>
+ <Row><Result_23>328126500000</Result_23></Row>
+</Dataset>
+<Dataset name='Result 24'>
+ <Row><Result_24>1</Result_24></Row>
+</Dataset>
+<Dataset name='Result 25'>
+ <Row><Result_25>1</Result_25></Row>
+</Dataset>
+<Dataset name='Result 26'>
+ <Row><Result_26>3</Result_26></Row>
+</Dataset>
+<Dataset name='Result 27'>
+ <Row><Result_27>13</Result_27></Row>
+</Dataset>

+ 6 - 0
testing/regress/ecl/key/embedpy3a.xml

@@ -0,0 +1,6 @@
+<Dataset name='Result 1'>
+ <Row><Result_1>dog</Result_1></Row>
+</Dataset>
+<Dataset name='Result 2'>
+ <Row><Result_2>act</Result_2></Row>
+</Dataset>

+ 6 - 0
testing/regress/ecl/key/py3import.xml

@@ -0,0 +1,6 @@
+<Dataset name='Result 1'>
+ <Row><Result_1>Hello world!</Result_1></Row>
+</Dataset>
+<Dataset name='Result 2'>
+ <Row><Result_2>15</Result_2></Row>
+</Dataset>

+ 28 - 0
testing/regress/ecl/key/py3streame.xml

@@ -0,0 +1,28 @@
+<Dataset name='Result 1'>
+ <Row><name1>Gavin</name1><name2>Halliday  </name2><childnames><Row><name>a</name><value>1</value></Row><Row><name>b</name><value>2</value></Row><Row><name>c</name><value>3</value></Row></childnames><childdict><Row><name>aa</name><value>11</value></Row></childdict><r><name>aaa</name><value>111</value></r><val1>250</val1><val2>-1</val2><u1>là</u1><u2>là</u2><u3>là      </u3><val3>1</val3><d>4141</d><b>false</b><ss1><Item>1</Item><Item>2</Item></ss1></Row>
+ <Row><name1>John</name1><name2>Smith     </name2><childnames></childnames><childdict></childdict><r><name>c</name><value>3</value></r><val1>250</val1><val2>-1</val2><u1>là</u1><u2>là</u2><u3>là      </u3><val3>2</val3><d>4141</d><b>true</b><ss1><Item>3</Item></ss1></Row>
+</Dataset>
+<Dataset name='Result 2'>
+ <Row><name>Generate:</name><value>0</value></Row>
+ <Row><name>Generate:</name><value>1</value></Row>
+ <Row><name>Generate:</name><value>2</value></Row>
+ <Row><name>Generate:</name><value>3</value></Row>
+ <Row><name>Generate:</name><value>4</value></Row>
+ <Row><name>Generate:</name><value>5</value></Row>
+ <Row><name>Generate:</name><value>6</value></Row>
+ <Row><name>Generate:</name><value>7</value></Row>
+ <Row><name>Generate:</name><value>8</value></Row>
+ <Row><name>Generate:</name><value>9</value></Row>
+</Dataset>
+<Dataset name='Result 3'>
+ <Row><Result_3>500</Result_3></Row>
+</Dataset>
+<Dataset name='Result 4'>
+ <Row><Result_4>499</Result_4></Row>
+</Dataset>
+<Dataset name='Result 5'>
+ <Row><Result_5>Yo</Result_5></Row>
+</Dataset>
+<Dataset name='Result 6'>
+ <Row><name1>Gavin</name1><name2>Halliday  </name2><childnames><Row><name>a</name><value>1</value></Row><Row><name>b</name><value>2</value></Row><Row><name>c</name><value>3</value></Row></childnames><childdict><Row><name>aa</name><value>11</value></Row></childdict><r><name>aaa</name><value>111</value></r><val1>250</val1><val2>-1</val2><u1>là</u1><u2>là</u2><u3>là      </u3><val3>1</val3><d>4141</d><b>false</b><ss1><Item>1</Item><Item>2</Item></ss1></Row>
+</Dataset>

+ 36 - 0
testing/regress/ecl/key/py3streame2.xml

@@ -0,0 +1,36 @@
+<Dataset name='Result 1'>
+ <Row><name>Generate:</name><value>0</value></Row>
+ <Row><name>Generate:</name><value>1</value></Row>
+ <Row><name>Generate:</name><value>2</value></Row>
+ <Row><name>Generate:</name><value>3</value></Row>
+ <Row><name>Generate:</name><value>4</value></Row>
+ <Row><name>Generate:</name><value>5</value></Row>
+ <Row><name>Generate:</name><value>6</value></Row>
+ <Row><name>Generate:</name><value>7</value></Row>
+ <Row><name>Generate:</name><value>8</value></Row>
+ <Row><name>Generate:</name><value>9</value></Row>
+</Dataset>
+<Dataset name='Result 2'>
+ <Row><name>name1</name><value>1</value></Row>
+ <Row><name>name2</name><value>2</value></Row>
+</Dataset>
+<Dataset name='Result 3'>
+ <Row><name>1</name></Row>
+ <Row><name>2</name></Row>
+ <Row><name>3</name></Row>
+</Dataset>
+<Dataset name='Result 4'>
+ <Row><name>1</name></Row>
+ <Row><name>2</name></Row>
+ <Row><name>3</name></Row>
+</Dataset>
+<Dataset name='Result 5'>
+ <Row><name>Hello</name><value>10</value></Row>
+</Dataset>
+<Dataset name='Result 6'>
+ <Row><name>Hello</name><value>10</value></Row>
+</Dataset>
+<Dataset name='Result 7'>
+ <Row><name>Richard</name><value>10</value></Row>
+ <Row><name>dsfg</name><value>10</value></Row>
+</Dataset>

+ 10 - 0
testing/regress/ecl/key/py3streame3.xml

@@ -0,0 +1,10 @@
+<Dataset name='Result 1'>
+ <Row><name></name><value>10</value></Row>
+ <Row><name>Mr. </name><value>10</value></Row>
+ <Row><name>Rev. </name><value>10</value></Row>
+</Dataset>
+<Dataset name='Result 2'>
+ <Row><name></name><value>10</value></Row>
+ <Row><name>Mr. </name><value>10</value></Row>
+ <Row><name>Rev. </name><value>10</value></Row>
+</Dataset>

+ 1 - 1
testing/regress/ecl/key/streame.xml

@@ -1,6 +1,6 @@
 <Dataset name='Result 1'>
  <Row><name1>Gavin</name1><name2>Halliday  </name2><childnames><Row><name>a</name><value>1</value></Row><Row><name>b</name><value>2</value></Row><Row><name>c</name><value>3</value></Row></childnames><childdict><Row><name>aa</name><value>11</value></Row></childdict><r><name>aaa</name><value>111</value></r><val1>250</val1><val2>-1</val2><u1>là</u1><u2>là</u2><u3>là      </u3><val3>1</val3><d>4141</d><b>false</b><ss1><Item>1</Item><Item>2</Item></ss1></Row>
- <Row><name1>John</name1><name2>Smith     </name2><childnames></childnames><childdict></childdict><r><name>c</name><value>3</value></r><val1>250</val1><val2>-1</val2><u1>là</u1><u2>là</u2><u3>là      </u3><val3>2</val3><d>4141</d><b>true</b><ss1></ss1></Row>
+ <Row><name1>John</name1><name2>Smith     </name2><childnames></childnames><childdict></childdict><r><name>c</name><value>3</value></r><val1>250</val1><val2>-1</val2><u1>là</u1><u2>là</u2><u3>là      </u3><val3>2</val3><d>4141</d><b>true</b><ss1><Item>3</Item></ss1></Row>
 </Dataset>
 <Dataset name='Result 2'>
  <Row><name>Generate:</name><value>0</value></Row>

+ 27 - 0
testing/regress/ecl/py3import.ecl

@@ -0,0 +1,27 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2014 HPCC Systems.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+//class=embedded
+
+import python3;
+string pcat(string a, string b) := IMPORT(Python3, '/opt/HPCCSystems/examples/embed/python_cat.cat':time);
+pcat('Hello ', 'world!');
+
+integer padd(integer a, integer b) := EMBED(Python3 :time)
+   return a + b
+ENDEMBED;
+padd(1, 2)*5;

+ 92 - 0
testing/regress/ecl/py3streame.ecl

@@ -0,0 +1,92 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+//class=embedded
+
+IMPORT Python3;
+
+childrec := RECORD
+   string name => unsigned value;
+END;
+
+namesRecord := RECORD
+    STRING name1;
+    STRING10 name2;
+    LINKCOUNTED DATASET(childrec) childnames;
+    LINKCOUNTED DICTIONARY(childrec) childdict{linkcounted};
+    childrec r;
+    unsigned1 val1;
+    integer1   val2;
+    UTF8 u1;
+    UNICODE u2;
+    UNICODE8 u3;
+    BIG_ENDIAN unsigned4 val3;
+    DATA d;
+    BOOLEAN b;
+    SET OF STRING ss1;
+END;
+
+dataset(namesRecord) blockedNames(string prefix) := EMBED(Python3)
+  return ["Gavin","John","Bart"]
+ENDEMBED;
+
+_linkcounted_ dataset(namesRecord) linkedNames(string prefix) := EMBED(Python3)
+  return ["Gavin","John","Bart"]
+ENDEMBED;
+
+dataset(namesRecord) streamedNames(data d, utf8 u) := EMBED(Python3)
+  return [  \
+     ("Gavin", "Halliday", [("a", 1),("b", 2),("c", 3)], [("aa", 11)], ("aaa", 111), 250, -1,  U'là',  U'là',  U'là', 1, d, False, ["1","2"]), \
+     ("John", "Smith", [], [], ("c", 3), 250, -1,  U'là',  U'là',  u, 2, d, True, set(["3"])) \
+     ]
+ENDEMBED;
+
+// Test use of Python generator object for lazy evaluation...
+
+dataset(childrec) testGenerator(unsigned lim) := EMBED(Python3:time)
+  num = 0
+  while num < lim:
+    yield ("Generate:", num)
+    num += 1
+ENDEMBED;
+
+output(streamedNames(d'AA', u'là'));
+output (testGenerator(10));
+
+// Test what happens when two threads pull from a generator
+c := testGenerator(1000);
+count(c(value < 500));
+count(c(value > 500));
+
+// Test Python code returning named tuples
+childrec tnamed(string s) := EMBED(Python3)
+  import collections;
+  childrec = collections.namedtuple("childrec", "value,name")
+  return childrec(1,s)
+ENDEMBED;
+
+output(tnamed('Yo').name);
+
+// Test passing records into Python
+
+dataset(namesRecord) streamInOut(dataset(namesRecord) recs) := EMBED(Python3)
+  for rec in recs:
+    if rec.name1 == 'Gavin':
+       yield rec
+ENDEMBED;
+
+output(streamInOut(streamedNames(d'AA', u'là')));

+ 87 - 0
testing/regress/ecl/py3streame2.ecl

@@ -0,0 +1,87 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+//class=embedded
+
+IMPORT Python3;
+
+childrec := RECORD
+   string name => unsigned value;
+END;
+
+namerec := RECORD
+   string name;
+END;
+
+namerec2 := RECORD
+   string name;
+   string name2;
+END;
+
+// Test use of Python generator object for lazy evaluation...
+
+dataset(childrec) testGenerator(unsigned lim) := EMBED(Python3)
+  num = 0
+  while num < lim:
+    yield ("Generate:", num)
+    num += 1
+ENDEMBED;
+
+// Test use of Python named tuple...
+
+dataset(childrec) testNamedTuple(unsigned lim) := EMBED(Python3)
+  import collections
+  ChildRec = collections.namedtuple("childrec", "value, name") # Note - order is reverse of childrec - but works as we get fields by name
+  c1 = ChildRec(1, "name1")
+  c2 = ChildRec(name="name2", value=2)
+  return [ c1, c2 ]
+ENDEMBED;
+
+// Test 'missing tuple' case...
+
+dataset(namerec) testMissingTuple1(unsigned lim) := EMBED(Python3)
+  return [ '1', '2', '3' ]
+ENDEMBED;
+
+dataset(namerec) testMissingTuple2(unsigned lim) := EMBED(Python3)
+  return [ ('1'), ('2'), ('3') ]
+ENDEMBED;
+
+// Test returning a row
+childrec testRowReturn(unsigned lim) := EMBED(Python3)
+  return ("Hello", lim)
+ENDEMBED;
+
+// Test defining a transform
+transform(childrec) testTransform(unsigned lim) := EMBED(Python3)
+  return ("Hello", lim)
+ENDEMBED;
+
+// Test a transform with input and output rows
+transform(childrec) testTransform2(namerec inrec, unsigned p) := EMBED(Python3)
+  return (inrec.name, p)
+ENDEMBED;
+
+output (testGenerator(10));
+output (testNamedTuple(10));
+output (testMissingTuple1(10));
+output (testMissingTuple2(10));
+
+output(testRowReturn(10));
+output(row(testTransform(10)));
+d := dataset([{'Richard'},{'dsfg'}], namerec);
+output(project(d, testTransform2(LEFT, 10)));

+ 43 - 0
testing/regress/ecl/py3streame3.ecl

@@ -0,0 +1,43 @@
+/*##############################################################################
+
+    HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+############################################################################## */
+
+//class=embedded
+
+IMPORT Python3;
+
+childrec := RECORD
+   string name => unsigned value;
+END;
+
+titleRec := { string title };
+titles := dataset(['', 'Mr. ', 'Rev. '], titleRec);
+
+// Test defining a transform
+transform(childrec) testTransformTitle(titleRec inrec, unsigned lim) := EMBED(Python3)
+  return (inrec.title, lim)
+ENDEMBED;
+
+// Test defining a transform
+//MORE: The embed function shpo
+transform(childrec) testTransformTitle2(_linkcounted_ row(titleRec) inrec, unsigned lim) := EMBED(Python3)
+  return (inrec.title, lim)
+ENDEMBED;
+
+sequential(
+output(project(titles, testTransformTitle(LEFT, 10)));
+output(project(titles, testTransformTitle2(LEFT, 10)));
+);

+ 2 - 2
testing/regress/ecl/streame.ecl

@@ -50,8 +50,8 @@ ENDEMBED;
 
 dataset(namesRecord) streamedNames(data d, utf8 u) := EMBED(Python)
   return [  \
-     ("Gavin", "Halliday", [("a", 1),("b", 2),("c", 3)], [("aa", 11)], ("aaa", 111), 250, -1,  U'là',  U'là',  U'là', 1, d, False, set(["1","2"])), \
-     ("John", "Smith", [], [], ("c", 3), 250, -1,  U'là',  U'là',  u, 2, d, True, []) \
+     ("Gavin", "Halliday", [("a", 1),("b", 2),("c", 3)], [("aa", 11)], ("aaa", 111), 250, -1,  U'là',  U'là',  U'là', 1, d, False, ["1","2"]), \
+     ("John", "Smith", [], [], ("c", 3), 250, -1,  U'là',  U'là',  u, 2, d, True, set(["3"])) \
      ]
 ENDEMBED;