Преглед изворни кода

HPCC-8030 R/Java/JavaScript/Python language support in ECL

Use utf8 for the script body (including when supplied as an expression).

Other changes as suggested by code review.

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman пре 12 година
родитељ
комит
d00c1d7fda

+ 2 - 2
ecl/hql/hqlgram.y

@@ -1016,14 +1016,14 @@ embedBody
                         }
                         }
     | EMBED '(' abstractModule ',' expression ')'
     | EMBED '(' abstractModule ',' expression ')'
                         {
                         {
-                            parser->normalizeExpression($5, type_string, true);
+                            parser->normalizeExpression($5, type_stringorunicode, true);
                             OwnedHqlExpr language = $3.getExpr();
                             OwnedHqlExpr language = $3.getExpr();
                             OwnedHqlExpr embedText = $5.getExpr();
                             OwnedHqlExpr embedText = $5.getExpr();
                             $$.setExpr(parser->processEmbedBody($5, embedText, language, NULL), $1);
                             $$.setExpr(parser->processEmbedBody($5, embedText, language, NULL), $1);
                         }
                         }
     | IMPORT '(' abstractModule ',' expression attribs ')'
     | IMPORT '(' abstractModule ',' expression attribs ')'
                         {
                         {
-                            parser->normalizeExpression($5, type_string, true);
+                            parser->normalizeExpression($5, type_stringorunicode, true);
                             OwnedHqlExpr language = $3.getExpr();
                             OwnedHqlExpr language = $3.getExpr();
                             OwnedHqlExpr funcname = $5.getExpr();
                             OwnedHqlExpr funcname = $5.getExpr();
                             OwnedHqlExpr attribs = createComma(createAttribute(importAtom), $6.getExpr());
                             OwnedHqlExpr attribs = createComma(createAttribute(importAtom), $6.getExpr());

+ 1 - 1
ecl/hql/hqllex.l

@@ -1488,7 +1488,7 @@ FUNCTIONMACRO|MACRO {
                         yyless(CUR_TOKEN_LENGTH - delta);
                         yyless(CUR_TOKEN_LENGTH - delta);
                         lexer->yyPosition -= delta;
                         lexer->yyPosition -= delta;
                         lexer->yyColumn -= delta;
                         lexer->yyColumn -= delta;
-                        OwnedHqlExpr cppText = createConstant(createStringValue(lexer->yyBuffer+startpos, len));
+                        OwnedHqlExpr cppText = createConstant(createUnicodeValue(lexer->yyBuffer+startpos, len, "", true, false));
                         OwnedHqlExpr annotated = createLocationAnnotation(cppText.getClear(), returnToken.pos);
                         OwnedHqlExpr annotated = createLocationAnnotation(cppText.getClear(), returnToken.pos);
                         OwnedHqlExpr options = extractCppBodyAttrs(len, lexer->yyBuffer+startpos);
                         OwnedHqlExpr options = extractCppBodyAttrs(len, lexer->yyBuffer+startpos);
                         returnToken.setExpr(createComma(annotated.getClear(), options.getClear()));
                         returnToken.setExpr(createComma(annotated.getClear(), options.getClear()));

+ 2 - 1
ecl/hqlcpp/hqlcerrors.hpp

@@ -206,6 +206,7 @@
 #define HQLERR_SkipInsideCreateRow              4184
 #define HQLERR_SkipInsideCreateRow              4184
 #define HQLERR_KeyedJoinNoRightIndex_X          4185
 #define HQLERR_KeyedJoinNoRightIndex_X          4185
 #define HQLERR_ScalarOutputWithinApply          4186
 #define HQLERR_ScalarOutputWithinApply          4186
+#define HQLERR_EmbeddedTypeNotSupported_X       4187
 
 
 //Warnings....
 //Warnings....
 #define HQLWRN_PersistDataNotLikely             4500
 #define HQLWRN_PersistDataNotLikely             4500
@@ -484,7 +485,7 @@
 #define HQLERR_SkipInsideCreateRow_Text         "SKIP inside a ROW(<transform>) not supported.  It is only allowed in a DATASET transform."
 #define HQLERR_SkipInsideCreateRow_Text         "SKIP inside a ROW(<transform>) not supported.  It is only allowed in a DATASET transform."
 #define HQLERR_ScalarOutputWithinApply_Text     "A scalar output within an APPLY is undefined and may fail.  Use OUTPUT(dataset,EXTEND) instead."
 #define HQLERR_ScalarOutputWithinApply_Text     "A scalar output within an APPLY is undefined and may fail.  Use OUTPUT(dataset,EXTEND) instead."
 #define HQLERR_KeyedJoinNoRightIndex_X_Text     "Right dataset (%s) for a keyed join isn't a key"
 #define HQLERR_KeyedJoinNoRightIndex_X_Text     "Right dataset (%s) for a keyed join isn't a key"
-
+#define HQLERR_EmbeddedTypeNotSupported_X_Text  "Type %s not supported for embedded/external scripts"
 //Warnings.
 //Warnings.
 #define HQLWRN_CannotRecreateDistribution_Text  "Cannot recreate the distribution for a persistent dataset"
 #define HQLWRN_CannotRecreateDistribution_Text  "Cannot recreate the distribution for a persistent dataset"
 #define HQLWRN_RecursiveDependendencies_Text    "Recursive filename dependency"
 #define HQLWRN_RecursiveDependendencies_Text    "Recursive filename dependency"

+ 10 - 5
ecl/hqlcpp/hqlcpp.cpp

@@ -7247,7 +7247,7 @@ void HqlCppTranslator::doBuildExprEmbedBody(BuildCtx & ctx, IHqlExpression * exp
         UNIMPLEMENTED;  // It's not clear if this can ever happen - perhaps a parameterless function that used EMBED ?
         UNIMPLEMENTED;  // It's not clear if this can ever happen - perhaps a parameterless function that used EMBED ?
     }
     }
     StringBuffer text;
     StringBuffer text;
-    expr->queryChild(0)->queryValue()->getStringValue(text);
+    expr->queryChild(0)->queryValue()->getUTF8Value(text);
     text.setLength(cleanupEmbeddedCpp(text.length(), (char*)text.str()));
     text.setLength(cleanupEmbeddedCpp(text.length(), (char*)text.str()));
     OwnedHqlExpr quoted = createQuoted(text.str(), expr->getType());
     OwnedHqlExpr quoted = createQuoted(text.str(), expr->getType());
 
 
@@ -11319,7 +11319,8 @@ static IHqlExpression * replaceInlineParameters(IHqlExpression * funcdef, IHqlEx
     ForEachChild(i, formals)
     ForEachChild(i, formals)
     {
     {
         IHqlExpression * param = formals->queryChild(i);
         IHqlExpression * param = formals->queryChild(i);
-        simpleTransformer.setMapping(param, createActualFromFormal(param));
+        OwnedHqlExpr formal = createActualFromFormal(param);
+        simpleTransformer.setMapping(param, formal);
     }
     }
 
 
     return simpleTransformer.transformRoot(expr);
     return simpleTransformer.transformRoot(expr);
@@ -11372,7 +11373,7 @@ void HqlCppTranslator::buildCppFunctionDefinition(BuildCtx &funcctx, IHqlExpress
         cppBody = bodyCode->queryChild(1);
         cppBody = bodyCode->queryChild(1);
 
 
     StringBuffer text;
     StringBuffer text;
-    cppBody->queryValue()->getStringValue(text);
+    cppBody->queryValue()->getUTF8Value(text);
     //remove #option, and remove /r so we don't end up with mixed format end of lines.
     //remove #option, and remove /r so we don't end up with mixed format end of lines.
     text.setLength(cleanupEmbeddedCpp(text.length(), (char*)text.str()));
     text.setLength(cleanupEmbeddedCpp(text.length(), (char*)text.str()));
 
 
@@ -11494,7 +11495,9 @@ void HqlCppTranslator::buildScriptFunctionDefinition(BuildCtx &funcctx, IHqlExpr
             bindFunc = bindUnicodeParamAtom;
             bindFunc = bindUnicodeParamAtom;
             break;
             break;
         default:
         default:
-            UNIMPLEMENTED;
+            StringBuffer typeText;
+            getFriendlyTypeStr(paramType, typeText);
+            throwError1(HQLERR_EmbeddedTypeNotSupported_X, typeText.str());
         }
         }
         args.append(*createActualFromFormal(param));
         args.append(*createActualFromFormal(param));
         buildFunctionCall(funcctx, bindFunc, args);
         buildFunctionCall(funcctx, bindFunc, args);
@@ -11523,7 +11526,9 @@ void HqlCppTranslator::buildScriptFunctionDefinition(BuildCtx &funcctx, IHqlExpr
         returnFunc = getUTF8ResultAtom;
         returnFunc = getUTF8ResultAtom;
         break;
         break;
     default:
     default:
-        UNIMPLEMENTED;
+        StringBuffer typeText;
+        getFriendlyTypeStr(returnType, typeText);
+        throwError1(HQLERR_EmbeddedTypeNotSupported_X, typeText.str());
     }
     }
     noargs.append(*LINK(ctxVar));
     noargs.append(*LINK(ctxVar));
     OwnedHqlExpr call = bindFunctionCall(returnFunc, noargs);
     OwnedHqlExpr call = bindFunctionCall(returnFunc, noargs);

+ 2 - 2
ecl/hqlcpp/hqlcppsys.ecl

@@ -822,8 +822,8 @@ const char * cppSystemText[]  = {
     "   utf8 getUTF8Result() : method,entrypoint='getUTF8Result';",
     "   utf8 getUTF8Result() : method,entrypoint='getUTF8Result';",
     "   unicode getUnicodeResult() : method,entrypoint='getUnicodeResult';",
     "   unicode getUnicodeResult() : method,entrypoint='getUnicodeResult';",
 
 
-    "   compileEmbeddedScript(const varstring script) : method,entrypoint='compileEmbeddedScript';",
-    "   import(const varstring script) : method,entrypoint='importFunction';",
+    "   compileEmbeddedScript(const utf8 script) : method,entrypoint='compileEmbeddedScript';",
+    "   import(const utf8 script) : method,entrypoint='importFunction';",
     "   END;",
     "   END;",
     NULL };
     NULL };
 
 

+ 7 - 7
plugins/Rembed/Rembed.cpp

@@ -135,11 +135,11 @@ public:
     }
     }
     virtual __int64 getSignedResult()
     virtual __int64 getSignedResult()
     {
     {
-        return ::Rcpp::as<long int>(result); // MORE - is this the best R can do ?
+        return ::Rcpp::as<long int>(result); // Should really be long long, but RInside does not support that
     }
     }
     virtual unsigned __int64 getUnsignedResult()
     virtual unsigned __int64 getUnsignedResult()
     {
     {
-        return ::Rcpp::as<unsigned long int>(result); // MORE - is this the best R can do ?
+        return ::Rcpp::as<unsigned long int>(result); // Should really be long long, but RInside does not support that
     }
     }
     virtual void getStringResult(size32_t &__len, char * &__result)
     virtual void getStringResult(size32_t &__len, char * &__result)
     {
     {
@@ -148,11 +148,11 @@ public:
     }
     }
     virtual void getUTF8Result(size32_t &chars, char * &result)
     virtual void getUTF8Result(size32_t &chars, char * &result)
     {
     {
-        UNIMPLEMENTED;
+        throw MakeStringException(MSGAUD_user, 0, "Rembed: %s: Unicode/UTF8 results not supported", func.c_str());
     }
     }
     virtual void getUnicodeResult(size32_t &chars, UChar * &result)
     virtual void getUnicodeResult(size32_t &chars, UChar * &result)
     {
     {
-        UNIMPLEMENTED;
+        throw MakeStringException(MSGAUD_user, 0, "Rembed: %s: Unicode/UTF8 results not supported", func.c_str());
     }
     }
 
 
     virtual void bindBooleanParam(const char *name, bool val)
     virtual void bindBooleanParam(const char *name, bool val)
@@ -189,13 +189,13 @@ public:
         UNIMPLEMENTED;
         UNIMPLEMENTED;
     }
     }
 
 
-    virtual void importFunction(const char *text)
+    virtual void importFunction(size32_t lenChars, const char *utf)
     {
     {
         throwUnexpected();
         throwUnexpected();
     }
     }
-    virtual void compileEmbeddedScript(const char *text)
+    virtual void compileEmbeddedScript(size32_t lenChars, const char *utf)
     {
     {
-        func = text;
+        func.assign(utf, rtlUtf8Size(lenChars, utf));
     }
     }
 
 
     virtual void callFunction()
     virtual void callFunction()

+ 9 - 7
plugins/javaembed/javaembed.cpp

@@ -154,18 +154,20 @@ public:
         }
         }
     }
     }
 
 
-    inline void importFunction(const char *text)
+    inline void importFunction(size32_t lenChars, const char *utf)
     {
     {
+        size32_t bytes = rtlUtf8Size(lenChars, utf);
+        StringBuffer text(bytes, utf);
         if (!prevtext || strcmp(text, prevtext) != 0)
         if (!prevtext || strcmp(text, prevtext) != 0)
         {
         {
             prevtext.clear();
             prevtext.clear();
             // Name should be in the form class.method:signature
             // Name should be in the form class.method:signature
             const char *funcname = strchr(text, '.');
             const char *funcname = strchr(text, '.');
             if (!funcname)
             if (!funcname)
-                throw MakeStringException(MSGAUD_user, 0, "javaembed: Invalid import name %s - Expected classname.methodname:signature", text);
+                throw MakeStringException(MSGAUD_user, 0, "javaembed: Invalid import name %s - Expected classname.methodname:signature", text.str());
             const char *signature = strchr(funcname, ':');
             const char *signature = strchr(funcname, ':');
             if (!signature)
             if (!signature)
-                throw MakeStringException(MSGAUD_user, 0, "javaembed: Invalid import name %s - Expected classname.methodname:signature", text);
+                throw MakeStringException(MSGAUD_user, 0, "javaembed: Invalid import name %s - Expected classname.methodname:signature", text.str());
             StringBuffer classname(funcname-text, text);
             StringBuffer classname(funcname-text, text);
             funcname++;  // skip the '.'
             funcname++;  // skip the '.'
             StringBuffer methodname(signature-funcname, funcname);
             StringBuffer methodname(signature-funcname, funcname);
@@ -179,7 +181,7 @@ public:
             if (!javaMethodID)
             if (!javaMethodID)
                 throw MakeStringException(MSGAUD_user, 0, "javaembed: Failed to resolve method name %s with signature %s", methodname.str(), signature);
                 throw MakeStringException(MSGAUD_user, 0, "javaembed: Failed to resolve method name %s with signature %s", methodname.str(), signature);
             const char *returnSig = strrchr(signature, ')');
             const char *returnSig = strrchr(signature, ')');
-            assertex(returnSig);
+            assertex(returnSig);  // Otherwise how did Java accept it??
             returnSig++;
             returnSig++;
             returnType.set(returnSig);
             returnType.set(returnSig);
             argsig.set(signature);
             argsig.set(signature);
@@ -514,9 +516,9 @@ public:
         addArg(v);
         addArg(v);
     }
     }
 
 
-    virtual void importFunction(const char *text)
+    virtual void importFunction(size32_t lenChars, const char *utf)
     {
     {
-        sharedCtx->importFunction(text);
+        sharedCtx->importFunction(lenChars, utf);
         argsig = sharedCtx->querySignature();
         argsig = sharedCtx->querySignature();
         assertex(*argsig == '(');
         assertex(*argsig == '(');
         argsig++;
         argsig++;
@@ -526,7 +528,7 @@ public:
         sharedCtx->callFunction(result, args);
         sharedCtx->callFunction(result, args);
     }
     }
 
 
-    virtual void compileEmbeddedScript(const char *script)
+    virtual void compileEmbeddedScript(size32_t lenChars, const char *script)
     {
     {
         throwUnexpected();  // The java language helper supports only imported functions, not embedding java code in ECL.
         throwUnexpected();  // The java language helper supports only imported functions, not embedding java code in ECL.
     }
     }

+ 12 - 8
plugins/pyembed/pyembed.cpp

@@ -139,8 +139,10 @@ public:
         script.clear();
         script.clear();
     }
     }
 
 
-    inline PyObject * importFunction(const char *text)
+    inline PyObject * importFunction(size32_t lenChars, const char *utf)
     {
     {
+        size32_t bytes = rtlUtf8Size(lenChars, utf);
+        StringBuffer text(bytes, utf);
         if (!prevtext || strcmp(text, prevtext) != 0)
         if (!prevtext || strcmp(text, prevtext) != 0)
         {
         {
             prevtext.clear();
             prevtext.clear();
@@ -176,8 +178,10 @@ public:
         return script.getLink();
         return script.getLink();
     }
     }
 
 
-    inline PyObject *compileEmbeddedScript(const char *text)
+    inline PyObject *compileEmbeddedScript(size32_t lenChars, const char *utf)
     {
     {
+        size32_t bytes = rtlUtf8Size(lenChars, utf);
+        StringBuffer text(bytes, utf);
         if (!prevtext || strcmp(text, prevtext) != 0)
         if (!prevtext || strcmp(text, prevtext) != 0)
         {
         {
             prevtext.clear();
             prevtext.clear();
@@ -412,13 +416,13 @@ public:
         rtlFree(unicode);
         rtlFree(unicode);
     }
     }
 
 
-    virtual void importFunction(const char *text)
+    virtual void importFunction(size32_t lenChars, const char *text)
     {
     {
         throwUnexpected();
         throwUnexpected();
     }
     }
-    virtual void compileEmbeddedScript(const char *text)
+    virtual void compileEmbeddedScript(size32_t lenChars, const char *utf)
     {
     {
-        script.setown(sharedCtx->compileEmbeddedScript(text));
+        script.setown(sharedCtx->compileEmbeddedScript(lenChars, utf));
     }
     }
 
 
     virtual void callFunction()
     virtual void callFunction()
@@ -485,11 +489,11 @@ public:
         rtlFree(unicode);
         rtlFree(unicode);
     }
     }
 
 
-    virtual void importFunction(const char *text)
+    virtual void importFunction(size32_t lenChars, const char *utf)
     {
     {
-        script.setown(sharedCtx->importFunction(text));
+        script.setown(sharedCtx->importFunction(lenChars, utf));
     }
     }
-    virtual void compileEmbeddedScript(const char *text)
+    virtual void compileEmbeddedScript(size32_t len, const char *text)
     {
     {
         throwUnexpected();
         throwUnexpected();
     }
     }

+ 9 - 7
plugins/v8embed/v8embed.cpp

@@ -107,13 +107,15 @@ public:
     }
     }
     virtual void bindStringParam(const char *name, size32_t len, const char *val)
     virtual void bindStringParam(const char *name, size32_t len, const char *val)
     {
     {
-        v8::HandleScope handle_scope;
-        context->Global()->Set(v8::String::New(name), v8::String::New(val, len));
+        size32_t utfCharCount;
+        char *utfText;
+        rtlStrToUtf8X(utfCharCount, utfText, len, val);
+        bindUTF8Param(name, utfCharCount, utfText);
+        rtlFree(utfText);
     }
     }
     virtual void bindVStringParam(const char *name, const char *val)
     virtual void bindVStringParam(const char *name, const char *val)
     {
     {
-        v8::HandleScope handle_scope;
-        context->Global()->Set(v8::String::New(name), v8::String::New(val));
+        bindStringParam(name, strlen(val), val);
     }
     }
     virtual void bindUTF8Param(const char *name, size32_t chars, const char *val)
     virtual void bindUTF8Param(const char *name, size32_t chars, const char *val)
     {
     {
@@ -174,14 +176,14 @@ public:
         rtlUtf8ToUnicodeX(__chars, __result, numchars, *utf8);
         rtlUtf8ToUnicodeX(__chars, __result, numchars, *utf8);
     }
     }
 
 
-    virtual void compileEmbeddedScript(const char *text)
+    virtual void compileEmbeddedScript(size32_t lenChars, const char *utf)
     {
     {
         v8::HandleScope handle_scope;
         v8::HandleScope handle_scope;
-        v8::Handle<v8::String> source = v8::String::New(text);
+        v8::Handle<v8::String> source = v8::String::New(utf, rtlUtf8Size(lenChars, utf));
         v8::Handle<v8::Script> lscript = v8::Script::Compile(source);
         v8::Handle<v8::Script> lscript = v8::Script::Compile(source);
         script = v8::Persistent<v8::Script>::New(lscript);
         script = v8::Persistent<v8::Script>::New(lscript);
     }
     }
-    virtual void importFunction(const char *text)
+    virtual void importFunction(size32_t lenChars, const char *utf)
     {
     {
         UNIMPLEMENTED; // Not sure if meaningful for js
         UNIMPLEMENTED; // Not sure if meaningful for js
     }
     }

+ 2 - 2
rtl/eclrtl/eclrtl.hpp

@@ -750,8 +750,8 @@ interface IEmbedFunctionContext : extends IInterface
     virtual void getUTF8Result(size32_t &chars, char * &result) = 0;
     virtual void getUTF8Result(size32_t &chars, char * &result) = 0;
     virtual void getUnicodeResult(size32_t &chars, UChar * &result) = 0;
     virtual void getUnicodeResult(size32_t &chars, UChar * &result) = 0;
 
 
-    virtual void importFunction(const char *function) = 0;
-    virtual void compileEmbeddedScript(const char *script) = 0;
+    virtual void importFunction(size32_t len, const char *function) = 0;
+    virtual void compileEmbeddedScript(size32_t len, const char *script) = 0;
     virtual void callFunction() = 0;
     virtual void callFunction() = 0;
 };
 };
 
 

+ 3 - 0
system/jlib/jthread.hpp

@@ -56,6 +56,9 @@ extern jlib_decl unsigned threadLogID();  // for use in logging
 // terminates. Such a function should call on to the previously registered function (if any) - generally you
 // terminates. Such a function should call on to the previously registered function (if any) - generally you
 // would expect to store that value in thread-local storage.
 // would expect to store that value in thread-local storage.
 // This can be used to ensure that thread-specific objects can be properly destructed.
 // This can be used to ensure that thread-specific objects can be properly destructed.
+// Note that threadpools also call the thread termination hook when each thread's main function terminates,
+// so the hook function should clear any variables if necessary rather than assuming that they will be cleared
+// at thread startup time.
 
 
 typedef void (*ThreadTermFunc)();
 typedef void (*ThreadTermFunc)();
 extern jlib_decl ThreadTermFunc addThreadTermFunc(ThreadTermFunc onTerm);
 extern jlib_decl ThreadTermFunc addThreadTermFunc(ThreadTermFunc onTerm);

+ 1 - 1
testing/ecl/embedjs.ecl

@@ -6,7 +6,7 @@ integer add1(integer val) := EMBED(javascript) val+1; ENDEMBED;
 string add2(string val) := EMBED(javascript) val+'1'; ENDEMBED;
 string add2(string val) := EMBED(javascript) val+'1'; ENDEMBED;
 string add3(varstring val) := EMBED(javascript) val+'1'; ENDEMBED;
 string add3(varstring val) := EMBED(javascript) val+'1'; ENDEMBED;
 utf8 add4(utf8 val) := EMBED(javascript) val+'1'; ENDEMBED;
 utf8 add4(utf8 val) := EMBED(javascript) val+'1'; ENDEMBED;
-unicode add5(unicode val) := EMBED(javascript) val+'1'; ENDEMBED;
+unicode add5(unicode val) := EMBED(javascript, U' val+\' at Oh là là Straße\';');
 
 
 integer testThrow(integer val) := EMBED(javascript) throw new Error("Error from JavaScript"); ENDEMBED;
 integer testThrow(integer val) := EMBED(javascript) throw new Error("Error from JavaScript"); ENDEMBED;
 
 

+ 2 - 2
testing/ecl/key/embedjs.xml

@@ -14,10 +14,10 @@
  <Row><Result_5>Oh là là Straße1</Result_5></Row>
  <Row><Result_5>Oh là là Straße1</Result_5></Row>
 </Dataset>
 </Dataset>
 <Dataset name='Result 6'>
 <Dataset name='Result 6'>
- <Row><Result_6>Стоял1</Result_6></Row>
+ <Row><Result_6>Стоял at Oh là là Straße</Result_6></Row>
 </Dataset>
 </Dataset>
 <Dataset name='Result 7'>
 <Dataset name='Result 7'>
- <Row><Result_7>Oh l&#253; l&#253; Stra&#253;e1</Result_7></Row>
+ <Row><Result_7>Oh l&#224; l&#224; Stra&#223;e1</Result_7></Row>
 </Dataset>
 </Dataset>
 <Dataset name='Result 8'>
 <Dataset name='Result 8'>
  <Row><a>0</a><m>v8embed: Error: Error from JavaScript</m></Row>
  <Row><a>0</a><m>v8embed: Error: Error from JavaScript</m></Row>