Browse Source

HPCC-18840 Support __future__ usage in embedded Python

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 7 năm trước cách đây
mục cha
commit
898d2d74b2

+ 23 - 7
plugins/py3embed/py3embed.cpp

@@ -30,6 +30,7 @@
 #include "jexcept.hpp"
 #include "jutil.hpp"
 #include "jthread.hpp"
+#include "jregexp.hpp"
 #include "hqlplugins.hpp"
 #include "deftype.hpp"
 #include "eclhelper.hpp"
@@ -431,15 +432,30 @@ public:
 protected:
     static StringBuffer &wrapPythonText(StringBuffer &out, const char *in, const char *params)
     {
-        out.appendf("def __user__(%s):\n  ", params);
-        char c;
-        while ((c = *in++) != '\0')
+        // Complicated by needing to keep future import lines outside defined function
+        // Per python spec, a future statement must appear near the top of the module. The only lines that can appear before a future statement are:
+        //   the module docstring (if any),
+        //   comments,
+        //   blank lines, and
+        //   other future statements.
+        // We don't attempt to parse the python to spot these - instead, we pull all lines up to and including the last future statement out to the global scope.
+        // Because this is a little unsophisticated it will be fooled by code that includes things that look like future statements inside multiline strings.
+        // I don't care.
+        StringArray lines;
+        lines.appendList(in, "\n", false);
+        RegExpr expr("^ *from +__future__ +import ");
+        unsigned leadingLines = 0;
+        ForEachItemIn(idx, lines)
         {
-            out.append(c);
-            if (c=='\n')
-                out.append("  ");
+            if (expr.find(lines.item(idx)))
+                leadingLines = idx+1;
         }
-        out.appendf("\n__result__ = __user__(%s)\n", params);
+        for (unsigned leadingLine = 0; leadingLine < leadingLines; leadingLine++)
+            out.append(lines.item(leadingLine)).append('\n');
+        out.appendf("def __user__(%s):\n", params);
+        for (unsigned line = leadingLines; line < lines.length(); line++)
+            out.append("  ").append(lines.item(line)).append('\n');
+        out.appendf("__result__ = __user__(%s)\n", params);
         return out;
     }
     PyThreadState *tstate;

+ 23 - 7
plugins/pyembed/pyembed.cpp

@@ -29,6 +29,7 @@
 #include "frameobject.h"
 #include "jexcept.hpp"
 #include "jthread.hpp"
+#include "jregexp.hpp"
 #include "hqlplugins.hpp"
 #include "deftype.hpp"
 #include "eclhelper.hpp"
@@ -436,15 +437,30 @@ public:
 protected:
     static StringBuffer &wrapPythonText(StringBuffer &out, const char *in, const char *params)
     {
-        out.appendf("def __user__(%s):\n  ", params);
-        char c;
-        while ((c = *in++) != '\0')
+        // Complicated by needing to keep future import lines outside defined function
+        // Per python spec, a future statement must appear near the top of the module. The only lines that can appear before a future statement are:
+        //   the module docstring (if any),
+        //   comments,
+        //   blank lines, and
+        //   other future statements.
+        // We don't attempt to parse the python to spot these - instead, we pull all lines up to and including the last future statement out to the global scope.
+        // Because this is a little unsophisticated it will be fooled by code that includes things that look like future statements inside multiline strings.
+        // I don't care.
+        StringArray lines;
+        lines.appendList(in, "\n", false);
+        RegExpr expr("^ *from +__future__ +import ");
+        unsigned leadingLines = 0;
+        ForEachItemIn(idx, lines)
         {
-            out.append(c);
-            if (c=='\n')
-                out.append("  ");
+            if (expr.find(lines.item(idx)))
+                leadingLines = idx+1;
         }
-        out.appendf("\n__result__ = __user__(%s)\n", params);
+        for (unsigned leadingLine = 0; leadingLine < leadingLines; leadingLine++)
+            out.append(lines.item(leadingLine)).append('\n');
+        out.appendf("def __user__(%s):\n", params);
+        for (unsigned line = leadingLines; line < lines.length(); line++)
+            out.append("  ").append(lines.item(line)).append('\n');
+        out.appendf("__result__ = __user__(%s)\n", params);
         return out;
     }
     PyThreadState *tstate = nullptr;

+ 13 - 9
system/jlib/jutil.cpp

@@ -1435,7 +1435,7 @@ void JBASE32_Decode(const char *bi,StringBuffer &out)
 }
 
 
-static void DelimToStringArray(const char *csl, StringArray &dst, const char *delim, bool deldup)
+static void DelimToStringArray(const char *csl, StringArray &dst, const char *delim, bool deldup, bool trimSpaces)
 {
     if (!csl)
         return;
@@ -1449,9 +1449,11 @@ static void DelimToStringArray(const char *csl, StringArray &dst, const char *de
         c = 0;
     StringBuffer str;
     unsigned dstlen=dst.ordinality();
-    for (;;) {
-        while (isspace(*s))
-            s++;
+    for (;;)
+    {
+        if (trimSpaces)
+            while (isspace(*s))
+                s++;
         if (!*s&&(dst.ordinality()==dstlen)) // this check is to allow trailing separators (e.g. ",," is 3 (NULL) entries) but not generate an entry for ""
             break;
         const char *e = s;
@@ -1464,7 +1466,9 @@ static void DelimToStringArray(const char *csl, StringArray &dst, const char *de
                 break;
             e++;
         }
-        str.clear().append((size32_t)(e-s),s).clip();
+        str.clear().append((size32_t)(e-s),s);
+        if (trimSpaces)
+            str.clip();
         if (deldup) {
             const char *s1 = str.str();
             unsigned i;
@@ -1482,14 +1486,14 @@ static void DelimToStringArray(const char *csl, StringArray &dst, const char *de
     }
 }
 
-void StringArray::appendList(const char *list, const char *delim)
+void StringArray::appendList(const char *list, const char *delim, bool trimSpaces)
 {
-    DelimToStringArray(list, *this, delim, false);
+    DelimToStringArray(list, *this, delim, false, trimSpaces);
 }
 
-void StringArray::appendListUniq(const char *list, const char *delim)
+void StringArray::appendListUniq(const char *list, const char *delim, bool trimSpaces)
 {
-    DelimToStringArray(list, *this, delim, true);
+    DelimToStringArray(list, *this, delim, true, trimSpaces);
 }
 
 StringBuffer &StringArray::getString(StringBuffer &ret, const char *delim)

+ 2 - 2
system/jlib/jutil.hpp

@@ -226,9 +226,9 @@ class jlib_decl StringArray : public ArrayOf<const char *, const char *, StringP
     typedef ArrayOf<const char *, const char *, StringPointerArrayMapper> PARENT;
 public:
     // Appends a list in a string delimited by 'delim'
-    void appendList(const char *list, const char *delim);
+    void appendList(const char *list, const char *delim, bool trimSpaces = true);
     // Appends a list in a string delimited by 'delim' without duplicates
-    void appendListUniq(const char *list, const char *delim);
+    void appendListUniq(const char *list, const char *delim, bool trimSpaces = true);
     StringBuffer &getString(StringBuffer &ret, const char *delim); // get CSV string of array contents
     void sortAscii(bool nocase=false);
     void sortAsciiReverse(bool nocase=false);

+ 12 - 0
testing/regress/ecl/embedpy2.ecl

@@ -93,6 +93,16 @@ set of data testSet9(set of data val) := EMBED(Python)
 return val
 ENDEMBED;
 
+real8 realdivide(integer v1, integer v2) := EMBED(Python)
+from __future__ import division
+return v1/v2
+ENDEMBED;
+
+unsigned8 truncdivide(integer v1, integer v2) := EMBED(Python)
+return v1/v2
+ENDEMBED;
+
+
 add1(10);
 add2('Hello');
 add3('World');
@@ -102,6 +112,8 @@ add6(U'Oh là là Straße');
 add7(U'Стоял');
 
 add2('Oh là là Straße');  // Passing latin chars - should be untranslated
+realdivide(3,2);
+truncdivide(3,2);
 
 testData(D'aa');
 testSet([1,3,2]);

+ 16 - 0
testing/regress/ecl/embedpy3.ecl

@@ -92,6 +92,19 @@ set of data testSet9(set of data val) := EMBED(Python3)
 return val
 ENDEMBED;
 
+real8 realdivide1(integer v1, integer v2) := EMBED(Python3)
+from __future__ import division
+return v1/v2
+ENDEMBED;
+
+real8 realdivide2(integer v1, integer v2) := EMBED(Python3)
+return v1/v2
+ENDEMBED;
+
+unsigned8 truncdivide(integer v1, integer v2) := EMBED(Python3)
+return v1//v2
+ENDEMBED;
+
 add1(10);
 add2('Hello');
 add3('World');
@@ -101,6 +114,9 @@ add6(U'Oh là là Straße');
 add7(U'Стоял');
 
 add2('Oh là là Straße');  // Passing latin chars - should be untranslated
+realdivide1(3,2);
+realdivide2(3,2);
+truncdivide(3,2);
 
 testData(D'aa');
 testSet([1,3,2]);

+ 23 - 17
testing/regress/ecl/key/embedpy2.xml

@@ -26,40 +26,40 @@
  <Row><Result_9>Oh l&#224; l&#224; Stra&#223;e1</Result_9></Row>
 </Dataset>
 <Dataset name='Result 10'>
- <Row><Result_10>6261</Result_10></Row>
+ <Row><Result_10>1.5</Result_10></Row>
 </Dataset>
 <Dataset name='Result 11'>
- <Row><Result_11><Item>1</Item><Item>2</Item><Item>3</Item></Result_11></Row>
+ <Row><Result_11>1</Result_11></Row>
 </Dataset>
 <Dataset name='Result 12'>
- <Row><Result_12><Item>green</Item><Item>red</Item><Item>yellow</Item></Result_12></Row>
+ <Row><Result_12>6261</Result_12></Row>
 </Dataset>
 <Dataset name='Result 13'>
- <Row><Result_13><Item>one     </Item><Item>three   </Item><Item>two     </Item></Result_13></Row>
+ <Row><Result_13><Item>1</Item><Item>2</Item><Item>3</Item></Result_13></Row>
 </Dataset>
 <Dataset name='Result 14'>
- <Row><Result_14><Item>Oh</Item><Item>Straße</Item><Item>là</Item></Result_14></Row>
+ <Row><Result_14><Item>green</Item><Item>red</Item><Item>yellow</Item></Result_14></Row>
 </Dataset>
 <Dataset name='Result 15'>
- <Row><Result_15><Item>Deux</Item><Item>Trois</Item><Item>Un</Item></Result_15></Row>
+ <Row><Result_15><Item>one</Item><Item>three</Item><Item>two</Item></Result_15></Row>
 </Dataset>
 <Dataset name='Result 16'>
- <Row><Result_16><Item>Dos</Item><Item>Tre</Item><Item>Uno</Item></Result_16></Row>
+ <Row><Result_16><Item>Oh</Item><Item>Straße</Item><Item>là</Item></Result_16></Row>
 </Dataset>
 <Dataset name='Result 17'>
- <Row><Result_17><Item>On</Item><Item>Straße</Item><Item>der</Item></Result_17></Row>
+ <Row><Result_17><Item>Deux</Item><Item>Trois</Item><Item>Un</Item></Result_17></Row>
 </Dataset>
 <Dataset name='Result 18'>
- <Row><Result_18><Item>Aus     </Item><Item>Straße  </Item><Item>zum     </Item></Result_18></Row>
+ <Row><Result_18><Item>Dos</Item><Item>Tre</Item><Item>Uno</Item></Result_18></Row>
 </Dataset>
 <Dataset name='Result 19'>
- <Row><Result_19><Item>417573</Item><Item>7A756D</Item><Item>537472616465</Item></Result_19></Row>
+ <Row><Result_19><Item>On</Item><Item>Straße</Item><Item>der</Item></Result_19></Row>
 </Dataset>
 <Dataset name='Result 20'>
- <Row><Result_20>46875625000</Result_20></Row>
+ <Row><Result_20><Item>Aus</Item><Item>Straße</Item><Item>zum</Item></Result_20></Row>
 </Dataset>
 <Dataset name='Result 21'>
- <Row><Result_21>328126500000</Result_21></Row>
+ <Row><Result_21><Item>417573</Item><Item>7A756D</Item><Item>537472616465</Item></Result_21></Row>
 </Dataset>
 <Dataset name='Result 22'>
  <Row><Result_22>46875625000</Result_22></Row>
@@ -68,17 +68,23 @@
  <Row><Result_23>328126500000</Result_23></Row>
 </Dataset>
 <Dataset name='Result 24'>
- <Row><Result_24>1</Result_24></Row>
+ <Row><Result_24>46875625000</Result_24></Row>
 </Dataset>
 <Dataset name='Result 25'>
- <Row><Result_25>1</Result_25></Row>
+ <Row><Result_25>328126500000</Result_25></Row>
 </Dataset>
 <Dataset name='Result 26'>
- <Row><Result_26>3</Result_26></Row>
+ <Row><Result_26>1</Result_26></Row>
 </Dataset>
 <Dataset name='Result 27'>
- <Row><Result_27>13</Result_27></Row>
+ <Row><Result_27>1</Result_27></Row>
 </Dataset>
 <Dataset name='Result 28'>
- <Row><Result_28>12</Result_28></Row>
+ <Row><Result_28>3</Result_28></Row>
+</Dataset>
+<Dataset name='Result 29'>
+ <Row><Result_29>13</Result_29></Row>
+</Dataset>
+<Dataset name='Result 30'>
+ <Row><Result_30>12</Result_30></Row>
 </Dataset>

+ 27 - 18
testing/regress/ecl/key/embedpy3.xml

@@ -26,56 +26,65 @@
  <Row><Result_9>Oh l&#224; l&#224; Stra&#223;e1</Result_9></Row>
 </Dataset>
 <Dataset name='Result 10'>
- <Row><Result_10>6261</Result_10></Row>
+ <Row><Result_10>1.5</Result_10></Row>
 </Dataset>
 <Dataset name='Result 11'>
- <Row><Result_11><Item>1</Item><Item>2</Item><Item>3</Item></Result_11></Row>
+ <Row><Result_11>1.5</Result_11></Row>
 </Dataset>
 <Dataset name='Result 12'>
- <Row><Result_12><Item>green</Item><Item>red</Item><Item>yellow</Item></Result_12></Row>
+ <Row><Result_12>1</Result_12></Row>
 </Dataset>
 <Dataset name='Result 13'>
- <Row><Result_13><Item>one     </Item><Item>three   </Item><Item>two     </Item></Result_13></Row>
+ <Row><Result_13>6261</Result_13></Row>
 </Dataset>
 <Dataset name='Result 14'>
- <Row><Result_14><Item>Oh</Item><Item>Straße</Item><Item>là</Item></Result_14></Row>
+ <Row><Result_14><Item>1</Item><Item>2</Item><Item>3</Item></Result_14></Row>
 </Dataset>
 <Dataset name='Result 15'>
- <Row><Result_15><Item>Deux</Item><Item>Trois</Item><Item>Un</Item></Result_15></Row>
+ <Row><Result_15><Item>green</Item><Item>red</Item><Item>yellow</Item></Result_15></Row>
 </Dataset>
 <Dataset name='Result 16'>
- <Row><Result_16><Item>Dos</Item><Item>Tre</Item><Item>Uno</Item></Result_16></Row>
+ <Row><Result_16><Item>one</Item><Item>three</Item><Item>two</Item></Result_16></Row>
 </Dataset>
 <Dataset name='Result 17'>
- <Row><Result_17><Item>On</Item><Item>Straße</Item><Item>der</Item></Result_17></Row>
+ <Row><Result_17><Item>Oh</Item><Item>Straße</Item><Item>là</Item></Result_17></Row>
 </Dataset>
 <Dataset name='Result 18'>
- <Row><Result_18><Item>Aus     </Item><Item>Straße  </Item><Item>zum     </Item></Result_18></Row>
+ <Row><Result_18><Item>Deux</Item><Item>Trois</Item><Item>Un</Item></Result_18></Row>
 </Dataset>
 <Dataset name='Result 19'>
- <Row><Result_19><Item>417573</Item><Item>7A756D</Item><Item>537472616465</Item></Result_19></Row>
+ <Row><Result_19><Item>Dos</Item><Item>Tre</Item><Item>Uno</Item></Result_19></Row>
 </Dataset>
 <Dataset name='Result 20'>
- <Row><Result_20>46875625000</Result_20></Row>
+ <Row><Result_20><Item>On</Item><Item>Straße</Item><Item>der</Item></Result_20></Row>
 </Dataset>
 <Dataset name='Result 21'>
- <Row><Result_21>328126500000</Result_21></Row>
+ <Row><Result_21><Item>Aus</Item><Item>Straße</Item><Item>zum</Item></Result_21></Row>
 </Dataset>
 <Dataset name='Result 22'>
- <Row><Result_22>46875625000</Result_22></Row>
+ <Row><Result_22><Item>417573</Item><Item>7A756D</Item><Item>537472616465</Item></Result_22></Row>
 </Dataset>
 <Dataset name='Result 23'>
- <Row><Result_23>328126500000</Result_23></Row>
+ <Row><Result_23>46875625000</Result_23></Row>
 </Dataset>
 <Dataset name='Result 24'>
- <Row><Result_24>1</Result_24></Row>
+ <Row><Result_24>328126500000</Result_24></Row>
 </Dataset>
 <Dataset name='Result 25'>
- <Row><Result_25>1</Result_25></Row>
+ <Row><Result_25>46875625000</Result_25></Row>
 </Dataset>
 <Dataset name='Result 26'>
- <Row><Result_26>3</Result_26></Row>
+ <Row><Result_26>328126500000</Result_26></Row>
 </Dataset>
 <Dataset name='Result 27'>
- <Row><Result_27>13</Result_27></Row>
+ <Row><Result_27>1</Result_27></Row>
+</Dataset>
+<Dataset name='Result 28'>
+ <Row><Result_28>1</Result_28></Row>
+</Dataset>
+<Dataset name='Result 29'>
+ <Row><Result_29>3</Result_29></Row>
+</Dataset>
+<Dataset name='Result 30'>
+ <Row><Result_30>13</Result_30></Row>
 </Dataset>