Rembed.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "platform.h"
  14. #include "RInside.h"
  15. #include "jexcept.hpp"
  16. #include "jthread.hpp"
  17. #include "hqlplugins.hpp"
  18. #include "deftype.hpp"
  19. #include "eclrtl.hpp"
  20. #include "eclrtl_imp.hpp"
  21. #ifdef _WIN32
  22. #define EXPORT __declspec(dllexport)
  23. #else
  24. #define EXPORT
  25. #endif
  26. static const char * compatibleVersions[] =
  27. { "R Embed Helper 1.0.0", NULL };
  28. static const char *version = "R Embed Helper 1.0.0";
  29. static const char * EclDefinition =
  30. "EXPORT Language := SERVICE\n"
  31. " boolean getEmbedContext():cpp,pure,namespace='Rembed',entrypoint='getEmbedContext',prototype='IEmbedContext* getEmbedContext()';\n"
  32. " boolean syntaxCheck(const varstring src):cpp,pure,namespace='Rembed',entrypoint='syntaxCheck';\n"
  33. " unload():cpp,pure,namespace='Rembed',entrypoint='unload';\n"
  34. "END;"
  35. "EXPORT getEmbedContext := Language.getEmbedContext;"
  36. "EXPORT syntaxCheck := Language.syntaxCheck;"
  37. "EXPORT boolean supportsImport := false;"
  38. "EXPORT boolean supportsScript := true;";
  39. extern "C" EXPORT bool getECLPluginDefinition(ECLPluginDefinitionBlock *pb)
  40. {
  41. if (pb->size == sizeof(ECLPluginDefinitionBlockEx))
  42. {
  43. ECLPluginDefinitionBlockEx * pbx = (ECLPluginDefinitionBlockEx *) pb;
  44. pbx->compatibleVersions = compatibleVersions;
  45. }
  46. else if (pb->size != sizeof(ECLPluginDefinitionBlock))
  47. return false;
  48. pb->magicVersion = PLUGIN_VERSION;
  49. pb->version = version;
  50. pb->moduleName = "R";
  51. pb->ECL = EclDefinition;
  52. pb->flags = PLUGIN_DLL_MODULE | PLUGIN_MULTIPLE_VERSIONS;
  53. pb->description = "R Embed Helper";
  54. return true;
  55. }
  56. namespace Rembed
  57. {
  58. // Use a global object to ensure that the R instance is initialized only once
  59. static class RGlobalState
  60. {
  61. public:
  62. RGlobalState()
  63. {
  64. const char *args[] = {"R", "--slave" };
  65. R = new RInside(2, args, true, false, false);
  66. }
  67. ~RGlobalState()
  68. {
  69. delete R;
  70. }
  71. RInside *R;
  72. }* globalState = NULL;
  73. static CriticalSection RCrit; // R is single threaded - need to own this before making any call to R
  74. static RGlobalState *queryGlobalState()
  75. {
  76. CriticalBlock b(RCrit);
  77. if (!globalState)
  78. globalState = new RGlobalState;
  79. return globalState;
  80. }
  81. extern void unload()
  82. {
  83. CriticalBlock b(RCrit);
  84. if (globalState)
  85. delete globalState;
  86. globalState = NULL;
  87. }
  88. MODULE_INIT(INIT_PRIORITY_STANDARD)
  89. {
  90. return true;
  91. }
  92. MODULE_EXIT()
  93. {
  94. unload();
  95. }
  96. // Each call to a R function will use a new REmbedFunctionContext object
  97. // This takes care of ensuring that the critsec is locked while we are executing R code,
  98. // and released when we are not
  99. class REmbedFunctionContext: public CInterfaceOf<IEmbedFunctionContext>
  100. {
  101. public:
  102. REmbedFunctionContext(RInside &_R, const char *options)
  103. : R(_R), block(RCrit), result(R_NilValue)
  104. {
  105. }
  106. ~REmbedFunctionContext()
  107. {
  108. }
  109. virtual bool getBooleanResult()
  110. {
  111. return ::Rcpp::as<bool>(result);
  112. }
  113. virtual void getDataResult(size32_t &__len, void * &__result)
  114. {
  115. std::vector<byte> vval = ::Rcpp::as<std::vector<byte> >(result);
  116. rtlStrToDataX(__len, __result, vval.size(), vval.data());
  117. }
  118. virtual double getRealResult()
  119. {
  120. return ::Rcpp::as<double>(result);
  121. }
  122. virtual __int64 getSignedResult()
  123. {
  124. return ::Rcpp::as<long int>(result); // Should really be long long, but RInside does not support that
  125. }
  126. virtual unsigned __int64 getUnsignedResult()
  127. {
  128. return ::Rcpp::as<unsigned long int>(result); // Should really be long long, but RInside does not support that
  129. }
  130. virtual void getStringResult(size32_t &__len, char * &__result)
  131. {
  132. std::string str = ::Rcpp::as<std::string>(result);
  133. rtlStrToStrX(__len, __result, str.length(), str.data());
  134. }
  135. virtual void getUTF8Result(size32_t &chars, char * &result)
  136. {
  137. throw MakeStringException(MSGAUD_user, 0, "Rembed: %s: Unicode/UTF8 results not supported", func.c_str());
  138. }
  139. virtual void getUnicodeResult(size32_t &chars, UChar * &result)
  140. {
  141. throw MakeStringException(MSGAUD_user, 0, "Rembed: %s: Unicode/UTF8 results not supported", func.c_str());
  142. }
  143. virtual void getSetResult(bool & __isAllResult, size32_t & __resultBytes, void * & __result, int _elemType, size32_t elemSize)
  144. {
  145. type_t elemType = (type_t) _elemType;
  146. __isAllResult = false;
  147. switch(elemType)
  148. {
  149. #define FETCH_ARRAY(type) \
  150. { \
  151. std::vector<type> vval = ::Rcpp::as< std::vector<type> >(result); \
  152. rtlStrToDataX(__resultBytes, __result, vval.size()*elemSize, (const void *) vval.data()); \
  153. }
  154. case type_boolean:
  155. {
  156. std::vector<bool> vval = ::Rcpp::as< std::vector<bool> >(result);
  157. size32_t size = vval.size();
  158. // Vector of bool is odd, and can't be retrieved via data()
  159. // Instead we need to iterate, I guess
  160. rtlDataAttr out(size);
  161. bool *outData = (bool *) out.getdata();
  162. for (std::vector<bool>::iterator iter = vval.begin(); iter < vval.end(); iter++)
  163. {
  164. *outData++ = *iter;
  165. }
  166. __resultBytes = size;
  167. __result = out.detachdata();
  168. break;
  169. }
  170. case type_int:
  171. if (elemSize == sizeof(byte))
  172. FETCH_ARRAY(byte)
  173. else if (elemSize == sizeof(short))
  174. FETCH_ARRAY(short)
  175. else if (elemSize == sizeof(int))
  176. FETCH_ARRAY(int)
  177. else if (elemSize == sizeof(long)) // __int64 / long long does not work...
  178. FETCH_ARRAY(long)
  179. else
  180. rtlFail(0, "Rembed: Unsupported result type");
  181. break;
  182. case type_real:
  183. if (elemSize == sizeof(float))
  184. FETCH_ARRAY(float)
  185. else if (elemSize == sizeof(double))
  186. FETCH_ARRAY(double)
  187. else
  188. rtlFail(0, "Rembed: Unsupported result type");
  189. break;
  190. case type_string:
  191. case type_varstring:
  192. {
  193. std::vector<std::string> vval = ::Rcpp::as< std::vector<std::string> >(result);
  194. size32_t numResults = vval.size();
  195. rtlRowBuilder out;
  196. byte *outData = NULL;
  197. size32_t outBytes = 0;
  198. if (elemSize != UNKNOWN_LENGTH)
  199. {
  200. outBytes = numResults * elemSize; // MORE - check for overflow?
  201. out.ensureAvailable(outBytes);
  202. outData = out.getbytes();
  203. }
  204. for (std::vector<std::string>::iterator iter = vval.begin(); iter < vval.end(); iter++)
  205. {
  206. size32_t lenBytes = (*iter).size();
  207. const char *text = (*iter).data();
  208. if (elemType == type_string)
  209. {
  210. if (elemSize == UNKNOWN_LENGTH)
  211. {
  212. out.ensureAvailable(outBytes + lenBytes + sizeof(size32_t));
  213. outData = out.getbytes() + outBytes;
  214. * (size32_t *) outData = lenBytes;
  215. rtlStrToStr(lenBytes, outData+sizeof(size32_t), lenBytes, text);
  216. outBytes += lenBytes + sizeof(size32_t);
  217. }
  218. else
  219. {
  220. rtlStrToStr(elemSize, outData, lenBytes, text);
  221. outData += elemSize;
  222. }
  223. }
  224. else
  225. {
  226. if (elemSize == UNKNOWN_LENGTH)
  227. {
  228. out.ensureAvailable(outBytes + lenBytes + 1);
  229. outData = out.getbytes() + outBytes;
  230. rtlStrToVStr(0, outData, lenBytes, text);
  231. outBytes += lenBytes + 1;
  232. }
  233. else
  234. {
  235. rtlStrToVStr(elemSize, outData, lenBytes, text); // Fixed size null terminated strings... weird.
  236. outData += elemSize;
  237. }
  238. }
  239. }
  240. __resultBytes = outBytes;
  241. __result = out.detachdata();
  242. break;
  243. }
  244. default:
  245. rtlFail(0, "REmbed: Unsupported result type");
  246. }
  247. }
  248. virtual void bindBooleanParam(const char *name, bool val)
  249. {
  250. R[name] = val;
  251. }
  252. virtual void bindDataParam(const char *name, size32_t len, const void *val)
  253. {
  254. std::vector<byte> vval;
  255. const byte *cval = (const byte *) val;
  256. vval.assign(cval, cval+len);
  257. R[name] = vval;
  258. }
  259. virtual void bindRealParam(const char *name, double val)
  260. {
  261. R[name] = val;
  262. }
  263. virtual void bindSignedParam(const char *name, __int64 val)
  264. {
  265. R[name] = (long int) val;
  266. }
  267. virtual void bindUnsignedParam(const char *name, unsigned __int64 val)
  268. {
  269. R[name] = (unsigned long int) val;
  270. }
  271. virtual void bindStringParam(const char *name, size32_t len, const char *val)
  272. {
  273. std::string s(val, len);
  274. R[name] = s;
  275. }
  276. virtual void bindVStringParam(const char *name, const char *val)
  277. {
  278. R[name] = val;
  279. }
  280. virtual void bindUTF8Param(const char *name, size32_t chars, const char *val)
  281. {
  282. rtlFail(0, "Rembed: Unsupported parameter type UTF8");
  283. }
  284. virtual void bindUnicodeParam(const char *name, size32_t chars, const UChar *val)
  285. {
  286. rtlFail(0, "Rembed: Unsupported parameter type UNICODE");
  287. }
  288. virtual void bindSetParam(const char *name, int _elemType, size32_t elemSize, bool isAll, size32_t totalBytes, void *setData)
  289. {
  290. if (isAll)
  291. rtlFail(0, "Rembed: Unsupported parameter type ALL");
  292. type_t elemType = (type_t) _elemType;
  293. int numElems = totalBytes / elemSize;
  294. switch(elemType)
  295. {
  296. #define BIND_ARRAY(type) \
  297. { \
  298. std::vector<type> vval; \
  299. const type *start = (const type *) setData; \
  300. vval.assign(start, start+numElems); \
  301. R[name] = vval; \
  302. }
  303. case type_boolean:
  304. BIND_ARRAY(bool)
  305. break;
  306. case type_int:
  307. /* if (elemSize == sizeof(signed char)) // No binding exists in rcpp
  308. BIND_ARRAY(signed char)
  309. else */ if (elemSize == sizeof(short))
  310. BIND_ARRAY(short)
  311. else if (elemSize == sizeof(int))
  312. BIND_ARRAY(int)
  313. else if (elemSize == sizeof(long)) // __int64 / long long does not work...
  314. BIND_ARRAY(long)
  315. else
  316. rtlFail(0, "Rembed: Unsupported parameter type");
  317. break;
  318. case type_unsigned:
  319. if (elemSize == sizeof(unsigned char))
  320. BIND_ARRAY(unsigned char)
  321. else if (elemSize == sizeof(unsigned short))
  322. BIND_ARRAY(unsigned short)
  323. else if (elemSize == sizeof(unsigned int))
  324. BIND_ARRAY(unsigned int)
  325. else if (elemSize == sizeof(unsigned long)) // __int64 / long long does not work...
  326. BIND_ARRAY(unsigned long)
  327. else
  328. rtlFail(0, "Rembed: Unsupported parameter type");
  329. break;
  330. case type_real:
  331. if (elemSize == sizeof(float))
  332. BIND_ARRAY(float)
  333. else if (elemSize == sizeof(double))
  334. BIND_ARRAY(double)
  335. else
  336. rtlFail(0, "Rembed: Unsupported parameter type");
  337. break;
  338. case type_string:
  339. case type_varstring:
  340. {
  341. std::vector<std::string> vval;
  342. const byte *inData = (const byte *) setData;
  343. const byte *endData = inData + totalBytes;
  344. while (inData < endData)
  345. {
  346. int thisSize;
  347. if (elemSize == UNKNOWN_LENGTH)
  348. {
  349. if (elemType==type_varstring)
  350. thisSize = strlen((const char *) inData) + 1;
  351. else
  352. {
  353. thisSize = * (size32_t *) inData;
  354. inData += sizeof(size32_t);
  355. }
  356. }
  357. else
  358. thisSize = elemSize;
  359. std::string s((const char *) inData, thisSize);
  360. vval.push_back(s);
  361. inData += thisSize;
  362. numElems++;
  363. }
  364. R[name] = vval;
  365. break;
  366. }
  367. default:
  368. rtlFail(0, "REmbed: Unsupported parameter type");
  369. }
  370. }
  371. virtual void importFunction(size32_t lenChars, const char *utf)
  372. {
  373. throwUnexpected();
  374. }
  375. virtual void compileEmbeddedScript(size32_t lenChars, const char *utf)
  376. {
  377. func.assign(utf, rtlUtf8Size(lenChars, utf));
  378. }
  379. virtual void callFunction()
  380. {
  381. result = R.parseEval(func);
  382. }
  383. private:
  384. RInside &R;
  385. RInside::Proxy result;
  386. std::string func;
  387. CriticalBlock block;
  388. };
  389. class REmbedContext: public CInterfaceOf<IEmbedContext>
  390. {
  391. public:
  392. virtual IEmbedFunctionContext *createFunctionContext(bool isImport, const char *options)
  393. {
  394. return new REmbedFunctionContext(*queryGlobalState()->R, options);
  395. }
  396. };
  397. extern IEmbedContext* getEmbedContext()
  398. {
  399. return new REmbedContext;
  400. }
  401. extern bool syntaxCheck(const char *script)
  402. {
  403. return true; // MORE
  404. }
  405. } // namespace