pyembed.cpp 50 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "platform.h"
  14. #include "Python.h"
  15. #include "jexcept.hpp"
  16. #include "jthread.hpp"
  17. #include "hqlplugins.hpp"
  18. #include "deftype.hpp"
  19. #include "eclhelper.hpp"
  20. #include "eclrtl.hpp"
  21. #include "eclrtl_imp.hpp"
  22. #include "rtlds_imp.hpp"
  23. #include "rtlfield_imp.hpp"
  24. #include "nbcd.hpp"
  25. #include "roxiemem.hpp"
  26. #ifdef _WIN32
  27. #define EXPORT __declspec(dllexport)
  28. #else
  29. #define EXPORT
  30. #endif
  31. static const char * compatibleVersions[] = {
  32. "Python2.7 Embed Helper 1.0.0",
  33. NULL };
  34. static const char *version = "Python2.7 Embed Helper 1.0.0";
  35. extern "C" EXPORT bool getECLPluginDefinition(ECLPluginDefinitionBlock *pb)
  36. {
  37. if (pb->size == sizeof(ECLPluginDefinitionBlockEx))
  38. {
  39. ECLPluginDefinitionBlockEx * pbx = (ECLPluginDefinitionBlockEx *) pb;
  40. pbx->compatibleVersions = compatibleVersions;
  41. }
  42. else if (pb->size != sizeof(ECLPluginDefinitionBlock))
  43. return false;
  44. pb->magicVersion = PLUGIN_VERSION;
  45. pb->version = version;
  46. pb->moduleName = "python";
  47. pb->ECL = NULL;
  48. pb->flags = PLUGIN_MULTIPLE_VERSIONS;
  49. pb->description = "Python2.7 Embed Helper";
  50. return true;
  51. }
  52. namespace pyembed {
  53. // Use class OwnedPyObject for any objects that are not 'borrowed references'
  54. // so that the appropriate Py_DECREF call is made when the OwnedPyObject goes
  55. // out of scope, even if the function returns prematurely (such as via an exception).
  56. // In particular, checkPythonError is a lot easier to call safely if this is used.
  57. class OwnedPyObject
  58. {
  59. PyObject *ptr;
  60. public:
  61. inline OwnedPyObject() : ptr(NULL) {}
  62. inline OwnedPyObject(PyObject *_ptr) : ptr(_ptr) {}
  63. inline ~OwnedPyObject() { if (ptr) Py_DECREF(ptr); }
  64. inline PyObject * get() const { return ptr; }
  65. inline PyObject * getClear() { PyObject *ret = ptr; ptr = NULL; return ret; }
  66. inline PyObject * operator -> () const { return ptr; }
  67. inline operator PyObject *() const { return ptr; }
  68. inline void clear() { if (ptr) Py_DECREF(ptr); ptr = NULL; }
  69. inline void setown(PyObject *_ptr) { clear(); ptr = _ptr; }
  70. inline void set(PyObject *_ptr) { clear(); ptr = _ptr; if (ptr) Py_INCREF(ptr);}
  71. inline PyObject *getLink() { if (ptr) Py_INCREF(ptr); return ptr;}
  72. inline PyObject **ref() { return &ptr; }
  73. };
  74. // call checkPythonError to throw an exception if Python error state is set
  75. static void checkPythonError()
  76. {
  77. PyObject* err = PyErr_Occurred();
  78. if (err)
  79. {
  80. OwnedPyObject pType, pValue, pTraceBack;
  81. PyErr_Fetch(pType.ref(), pValue.ref(), pTraceBack.ref());
  82. OwnedPyObject valStr = PyObject_Str(pValue);
  83. PyErr_Clear();
  84. VStringBuffer errMessage("pyembed: %s", PyString_AsString(valStr));
  85. rtlFail(0, errMessage.str());
  86. }
  87. }
  88. // The Python Global Interpreter Lock (GIL) won't know about C++-created threads, so we need to
  89. // call PyGILState_Ensure() and PyGILState_Release at the start and end of every function.
  90. // Wrapping them in a class like this ensures that the release always happens even if
  91. // the function exists prematurely
  92. class GILstateWrapper
  93. {
  94. PyGILState_STATE gstate;
  95. public:
  96. GILstateWrapper()
  97. {
  98. gstate = PyGILState_Ensure();
  99. }
  100. ~GILstateWrapper()
  101. {
  102. PyGILState_Release(gstate);
  103. }
  104. };
  105. // There is a singleton PythonThreadContext per thread. This allows us to
  106. // ensure that we can make repeated calls to a Python function efficiently.
  107. class PythonThreadContext
  108. {
  109. public:
  110. PyThreadState *threadState;
  111. public:
  112. PythonThreadContext()
  113. {
  114. threadState = PyEval_SaveThread();
  115. lrutype = NULL;
  116. }
  117. ~PythonThreadContext()
  118. {
  119. PyEval_RestoreThread(threadState);
  120. script.clear();
  121. }
  122. inline PyObject * importFunction(size32_t lenChars, const char *utf)
  123. {
  124. size32_t bytes = rtlUtf8Size(lenChars, utf);
  125. StringBuffer text(bytes, utf);
  126. if (!prevtext || strcmp(text, prevtext) != 0)
  127. {
  128. prevtext.clear();
  129. // Name should be in the form module.function
  130. const char *funcname = strrchr(text, '.');
  131. if (!funcname)
  132. rtlFail(0, "pyembed: Expected module.function");
  133. StringBuffer modname(funcname-text, text);
  134. funcname++; // skip the '.'
  135. // If the modname is preceded by a path, add it to the python path before importing
  136. const char *pathsep = strrchr(modname, PATHSEPCHAR);
  137. if (pathsep)
  138. {
  139. StringBuffer path(pathsep-modname, modname);
  140. modname.remove(0, 1+pathsep-modname);
  141. PyObject *sys_path = PySys_GetObject((char *) "path");
  142. OwnedPyObject new_path = PyString_FromString(path);
  143. if (sys_path)
  144. {
  145. PyList_Insert(sys_path, 0, new_path);
  146. checkPythonError();
  147. }
  148. }
  149. module.setown(PyImport_ImportModule(modname));
  150. checkPythonError();
  151. PyObject *dict = PyModule_GetDict(module); // this is a borrowed reference and does not need to be released
  152. script.set(PyDict_GetItemString(dict, funcname));
  153. checkPythonError();
  154. if (!script || !PyCallable_Check(script))
  155. rtlFail(0, "pyembed: Object is not callable");
  156. prevtext.set(text);
  157. }
  158. return script.getLink();
  159. }
  160. PyObject *compileEmbeddedScript(size32_t lenChars, const char *utf);
  161. PyObject *getNamedTupleType(const RtlTypeInfo *type);
  162. private:
  163. GILstateWrapper GILState;
  164. OwnedPyObject module;
  165. OwnedPyObject script;
  166. OwnedPyObject lru;
  167. const RtlTypeInfo *lrutype;
  168. StringAttr prevtext;
  169. };
  170. static __thread PythonThreadContext* threadContext; // We reuse per thread, for speed
  171. static __thread ThreadTermFunc threadHookChain;
  172. static void releaseContext()
  173. {
  174. if (threadContext)
  175. {
  176. delete threadContext;
  177. threadContext = NULL;
  178. }
  179. if (threadHookChain)
  180. {
  181. (*threadHookChain)();
  182. threadHookChain = NULL;
  183. }
  184. }
  185. // Use a global object to ensure that the Python interpreter is initialized on main thread
  186. static class Python27GlobalState
  187. {
  188. public:
  189. Python27GlobalState()
  190. {
  191. pythonLibrary = (HINSTANCE) 0;
  192. #ifndef _WIN32
  193. // If Py_Initialize is called when stdin is set to a directory, it calls exit()
  194. // We don't want that to happen - just disable Python support in such situations
  195. struct stat sb;
  196. if (fstat(fileno(stdin), &sb) == 0 && S_ISDIR(sb.st_mode))
  197. {
  198. initialized = false;
  199. return;
  200. }
  201. #endif
  202. #ifndef _WIN32
  203. // We need to ensure all symbols in the python2.6 so are loaded - due to bugs in some distro's python installations
  204. FILE *diskfp = fopen("/proc/self/maps", "r");
  205. if (diskfp)
  206. {
  207. char ln[_MAX_PATH];
  208. while (fgets(ln, sizeof(ln), diskfp))
  209. {
  210. if (strstr(ln, "libpython2"))
  211. {
  212. const char *fullName = strchr(ln, '/');
  213. if (fullName)
  214. {
  215. char * lf = (char *) strchr(fullName, '\n');
  216. if (lf)
  217. {
  218. *lf = 0;
  219. pythonLibrary = dlopen((char *)fullName, RTLD_NOW|RTLD_GLOBAL);
  220. // DBGLOG("dlopen %s returns %"I64F"x", fullName, (__uint64) pythonLibrary);
  221. break;
  222. }
  223. }
  224. }
  225. }
  226. fclose(diskfp);
  227. }
  228. #endif
  229. // Initialize the Python Interpreter
  230. Py_Initialize();
  231. PyEval_InitThreads();
  232. tstate = PyEval_SaveThread();
  233. initialized = true;
  234. }
  235. ~Python27GlobalState()
  236. {
  237. if (threadContext)
  238. delete threadContext; // The one on the main thread won't get picked up by the thread hook mechanism
  239. threadContext = NULL;
  240. if (initialized)
  241. {
  242. PyEval_RestoreThread(tstate);
  243. // Finish the Python Interpreter
  244. namedtuple.clear();
  245. namedtupleTypes.clear();
  246. compiledScripts.clear();
  247. Py_Finalize();
  248. }
  249. if (pythonLibrary)
  250. FreeSharedObject(pythonLibrary);
  251. }
  252. bool isInitialized()
  253. {
  254. return initialized;
  255. }
  256. PyObject *getNamedTupleType(const RtlTypeInfo *type)
  257. {
  258. // It seems the customized namedtuple types leak, and they are slow to create, so take care to reuse
  259. CriticalBlock b(lock); // Not sure if this is really needed, as we have effectively locked out other threads using the GIL
  260. if (!namedtuple)
  261. {
  262. namedtupleTypes.setown(PyDict_New());
  263. OwnedPyObject pName = PyString_FromString("collections");
  264. OwnedPyObject collections = PyImport_Import(pName);
  265. checkPythonError();
  266. namedtuple.setown(PyObject_GetAttrString(collections, "namedtuple"));
  267. checkPythonError();
  268. assertex(PyCallable_Check(namedtuple));
  269. }
  270. const RtlFieldInfo * const *fields = type->queryFields();
  271. StringBuffer names;
  272. while (*fields)
  273. {
  274. const RtlFieldInfo *field = *fields;
  275. if (names.length())
  276. names.append(',');
  277. names.append(field->name->str());
  278. fields++;
  279. }
  280. OwnedPyObject pnames = PyString_FromString(names.str());
  281. OwnedPyObject mynamedtupletype;
  282. mynamedtupletype.set(PyDict_GetItem(namedtupleTypes, pnames)); // NOTE - returns borrowed reference
  283. if (!mynamedtupletype)
  284. {
  285. OwnedPyObject recname = PyString_FromString("namerec"); // MORE - do we care what the name is?
  286. OwnedPyObject ntargs = PyTuple_Pack(2, recname.get(), pnames.get());
  287. mynamedtupletype.setown(PyObject_CallObject(namedtuple, ntargs));
  288. PyDict_SetItem(namedtupleTypes, pnames, mynamedtupletype);
  289. }
  290. checkPythonError();
  291. assertex(PyCallable_Check(mynamedtupletype));
  292. return mynamedtupletype.getClear();
  293. }
  294. PyObject *compileScript(const char *text)
  295. {
  296. CriticalBlock b(lock); // Not sure if this is really needed, as we have effectively locked out other threads using the GIL
  297. if (!compiledScripts)
  298. compiledScripts.setown(PyDict_New());
  299. OwnedPyObject code;
  300. code.set(PyDict_GetItemString(compiledScripts, text));
  301. if (!code)
  302. {
  303. code.setown(Py_CompileString(text, "", Py_eval_input));
  304. if (!code)
  305. {
  306. PyErr_Clear();
  307. StringBuffer wrapped;
  308. wrapPythonText(wrapped, text);
  309. PyCompilerFlags flags = { PyCF_SOURCE_IS_UTF8 };
  310. code.setown(Py_CompileStringFlags(wrapped, "<embed>", Py_file_input, &flags));
  311. }
  312. checkPythonError();
  313. if (code)
  314. PyDict_SetItemString(compiledScripts, text, code);
  315. }
  316. return code.getClear();
  317. }
  318. protected:
  319. static StringBuffer &wrapPythonText(StringBuffer &out, const char *in)
  320. {
  321. out.append("def __user__():\n ");
  322. char c;
  323. while ((c = *in++) != '\0')
  324. {
  325. out.append(c);
  326. if (c=='\n')
  327. out.append(" ");
  328. }
  329. out.append("\n__result__ = __user__()\n");
  330. return out;
  331. }
  332. PyThreadState *tstate;
  333. bool initialized;
  334. HINSTANCE pythonLibrary;
  335. OwnedPyObject namedtuple; // collections.namedtuple
  336. OwnedPyObject namedtupleTypes; // dictionary of return values from namedtuple()
  337. OwnedPyObject compiledScripts; // dictionary of previously compiled scripts
  338. CriticalSection lock;
  339. } globalState;
  340. PyObject *PythonThreadContext::getNamedTupleType(const RtlTypeInfo *type)
  341. {
  342. if (!lru || (type!=lrutype))
  343. {
  344. lru.setown(globalState.getNamedTupleType(type));
  345. lrutype = type;
  346. }
  347. return lru.getLink();
  348. }
  349. PyObject *PythonThreadContext::compileEmbeddedScript(size32_t lenChars, const char *utf)
  350. {
  351. size32_t bytes = rtlUtf8Size(lenChars, utf);
  352. StringBuffer text(bytes, utf);
  353. if (!prevtext || strcmp(text, prevtext) != 0)
  354. {
  355. prevtext.clear();
  356. script.setown(globalState.compileScript(text));
  357. prevtext.set(utf, bytes);
  358. }
  359. return script.getLink();
  360. }
  361. static int countFields(const RtlFieldInfo * const * fields)
  362. {
  363. unsigned count = 0;
  364. loop
  365. {
  366. if (!*fields)
  367. break;
  368. fields++;
  369. count++;
  370. }
  371. return count;
  372. }
  373. // Conversions from Python objects to ECL data
  374. static void typeError(const char *expected, const RtlFieldInfo *field) __attribute__((noreturn));
  375. static void typeError(const char *expected, const RtlFieldInfo *field)
  376. {
  377. VStringBuffer msg("pyembed: type mismatch - %s expected", expected);
  378. if (field)
  379. msg.appendf(" for field %s", field->name->str());
  380. rtlFail(0, msg.str());
  381. }
  382. static bool getBooleanResult(const RtlFieldInfo *field, PyObject *obj)
  383. {
  384. assertex(obj && obj != Py_None);
  385. if (!PyBool_Check(obj))
  386. typeError("boolean", field);
  387. return obj == Py_True;
  388. }
  389. static void getDataResult(const RtlFieldInfo *field, PyObject *obj, size32_t &chars, void * &result)
  390. {
  391. assertex(obj && obj != Py_None);
  392. if (!PyByteArray_Check(obj))
  393. typeError("bytearray", field);
  394. rtlStrToDataX(chars, result, PyByteArray_Size(obj), PyByteArray_AsString(obj));
  395. }
  396. static double getRealResult(const RtlFieldInfo *field, PyObject *obj)
  397. {
  398. assertex(obj && obj != Py_None);
  399. if (!PyFloat_Check(obj))
  400. typeError("real", field);
  401. return PyFloat_AsDouble(obj);
  402. }
  403. static __int64 getSignedResult(const RtlFieldInfo *field, PyObject *obj)
  404. {
  405. assertex(obj && obj != Py_None);
  406. __int64 ret;
  407. if (PyInt_Check(obj))
  408. ret = PyInt_AsUnsignedLongLongMask(obj);
  409. else if (PyLong_Check(obj))
  410. ret = (__int64) PyLong_AsLongLong(obj);
  411. else
  412. typeError("integer", field);
  413. return ret;
  414. }
  415. static unsigned __int64 getUnsignedResult(const RtlFieldInfo *field, PyObject *obj)
  416. {
  417. assertex(obj && obj != Py_None);
  418. unsigned __int64 ret;
  419. if (PyInt_Check(obj))
  420. ret = PyInt_AsUnsignedLongLongMask(obj);
  421. else if (PyLong_Check(obj))
  422. ret = (unsigned __int64) PyLong_AsUnsignedLongLong(obj);
  423. else
  424. typeError("integer", field);
  425. return ret;
  426. }
  427. static void getStringResult(const RtlFieldInfo *field, PyObject *obj, size32_t &chars, char * &result)
  428. {
  429. assertex(obj && obj != Py_None);
  430. if (PyString_Check(obj))
  431. {
  432. const char * text = PyString_AsString(obj);
  433. checkPythonError();
  434. size_t lenBytes = PyString_Size(obj);
  435. rtlStrToStrX(chars, result, lenBytes, text);
  436. }
  437. else
  438. typeError("string", field);
  439. }
  440. static void getUTF8Result(const RtlFieldInfo *field, PyObject *obj, size32_t &chars, char * &result)
  441. {
  442. assertex(obj && obj != Py_None);
  443. if (PyUnicode_Check(obj))
  444. {
  445. OwnedPyObject utf8 = PyUnicode_AsUTF8String(obj);
  446. checkPythonError();
  447. size_t lenBytes = PyString_Size(utf8);
  448. const char * text = PyString_AsString(utf8);
  449. checkPythonError();
  450. size32_t numchars = rtlUtf8Length(lenBytes, text);
  451. rtlUtf8ToUtf8X(chars, result, numchars, text);
  452. }
  453. else
  454. typeError("unicode string", field);
  455. }
  456. static void getSetResult(PyObject *obj, bool & isAllResult, size32_t & resultBytes, void * & result, int elemType, size32_t elemSize)
  457. {
  458. // MORE - should probably recode to use the getResultDataset mechanism
  459. assertex(obj && obj != Py_None);
  460. if (!PyList_Check(obj) && !PySet_Check(obj))
  461. rtlFail(0, "pyembed: type mismatch - list or set expected");
  462. rtlRowBuilder out;
  463. size32_t outBytes = 0;
  464. byte *outData = NULL;
  465. OwnedPyObject iter = PyObject_GetIter(obj);
  466. OwnedPyObject elem;
  467. for (elem.setown(PyIter_Next(iter)); elem != NULL; elem.setown(PyIter_Next(iter)))
  468. {
  469. if (elemSize != UNKNOWN_LENGTH)
  470. {
  471. out.ensureAvailable(outBytes + elemSize);
  472. outData = out.getbytes() + outBytes;
  473. outBytes += elemSize;
  474. }
  475. switch ((type_t) elemType)
  476. {
  477. case type_int:
  478. rtlWriteInt(outData, pyembed::getSignedResult(NULL, elem), elemSize);
  479. break;
  480. case type_unsigned:
  481. rtlWriteInt(outData, pyembed::getUnsignedResult(NULL, elem), elemSize);
  482. break;
  483. case type_real:
  484. if (elemSize == sizeof(double))
  485. * (double *) outData = (double) pyembed::getRealResult(NULL, elem);
  486. else
  487. {
  488. assertex(elemSize == sizeof(float));
  489. * (float *) outData = (float) pyembed::getRealResult(NULL, elem);
  490. }
  491. break;
  492. case type_boolean:
  493. assertex(elemSize == sizeof(bool));
  494. * (bool *) outData = pyembed::getBooleanResult(NULL, elem);
  495. break;
  496. case type_string:
  497. case type_varstring:
  498. {
  499. if (!PyString_Check(elem))
  500. rtlFail(0, "pyembed: type mismatch - return value in list was not a STRING");
  501. const char * text = PyString_AsString(elem);
  502. checkPythonError();
  503. size_t lenBytes = PyString_Size(elem);
  504. if (elemSize == UNKNOWN_LENGTH)
  505. {
  506. if (elemType == type_string)
  507. {
  508. out.ensureAvailable(outBytes + lenBytes + sizeof(size32_t));
  509. outData = out.getbytes() + outBytes;
  510. * (size32_t *) outData = lenBytes;
  511. rtlStrToStr(lenBytes, outData+sizeof(size32_t), lenBytes, text);
  512. outBytes += lenBytes + sizeof(size32_t);
  513. }
  514. else
  515. {
  516. out.ensureAvailable(outBytes + lenBytes + 1);
  517. outData = out.getbytes() + outBytes;
  518. rtlStrToVStr(0, outData, lenBytes, text);
  519. outBytes += lenBytes + 1;
  520. }
  521. }
  522. else
  523. {
  524. if (elemType == type_string)
  525. rtlStrToStr(elemSize, outData, lenBytes, text);
  526. else
  527. rtlStrToVStr(elemSize, outData, lenBytes, text); // Fixed size null terminated strings... weird.
  528. }
  529. break;
  530. }
  531. case type_unicode:
  532. case type_utf8:
  533. {
  534. if (!PyUnicode_Check(elem))
  535. rtlFail(0, "pyembed: type mismatch - return value in list was not a unicode STRING");
  536. OwnedPyObject utf8 = PyUnicode_AsUTF8String(elem);
  537. checkPythonError();
  538. size_t lenBytes = PyString_Size(utf8);
  539. const char * text = PyString_AsString(utf8);
  540. checkPythonError();
  541. size32_t numchars = rtlUtf8Length(lenBytes, text);
  542. if (elemType == type_utf8)
  543. {
  544. assertex (elemSize == UNKNOWN_LENGTH);
  545. out.ensureAvailable(outBytes + lenBytes + sizeof(size32_t));
  546. outData = out.getbytes() + outBytes;
  547. * (size32_t *) outData = numchars;
  548. rtlStrToStr(lenBytes, outData+sizeof(size32_t), lenBytes, text);
  549. outBytes += lenBytes + sizeof(size32_t);
  550. }
  551. else
  552. {
  553. if (elemSize == UNKNOWN_LENGTH)
  554. {
  555. out.ensureAvailable(outBytes + numchars*sizeof(UChar) + sizeof(size32_t));
  556. outData = out.getbytes() + outBytes;
  557. // You can't assume that number of chars in utf8 matches number in unicode16 ...
  558. size32_t numchars16;
  559. rtlDataAttr unicode16;
  560. rtlUtf8ToUnicodeX(numchars16, unicode16.refustr(), numchars, text);
  561. * (size32_t *) outData = numchars16;
  562. rtlUnicodeToUnicode(numchars16, (UChar *) (outData+sizeof(size32_t)), numchars16, unicode16.getustr());
  563. outBytes += numchars16*sizeof(UChar) + sizeof(size32_t);
  564. }
  565. else
  566. rtlUtf8ToUnicode(elemSize / sizeof(UChar), (UChar *) outData, numchars, text);
  567. }
  568. break;
  569. }
  570. case type_data:
  571. {
  572. if (!PyByteArray_Check(elem))
  573. rtlFail(0, "pyembed: type mismatch - return value in list was not a bytearray");
  574. size_t lenBytes = PyByteArray_Size(elem); // Could check does not overflow size32_t
  575. const char *data = PyByteArray_AsString(elem);
  576. if (elemSize == UNKNOWN_LENGTH)
  577. {
  578. out.ensureAvailable(outBytes + lenBytes + sizeof(size32_t));
  579. outData = out.getbytes() + outBytes;
  580. * (size32_t *) outData = lenBytes;
  581. rtlStrToData(lenBytes, outData+sizeof(size32_t), lenBytes, data);
  582. outBytes += lenBytes + sizeof(size32_t);
  583. }
  584. else
  585. rtlStrToData(elemSize, outData, lenBytes, data);
  586. break;
  587. }
  588. default:
  589. rtlFail(0, "pyembed: type mismatch - unsupported return type");
  590. break;
  591. }
  592. checkPythonError();
  593. }
  594. isAllResult = false;
  595. resultBytes = outBytes;
  596. result = out.detachdata();
  597. }
  598. static void getUnicodeResult(const RtlFieldInfo *field, PyObject *obj, size32_t &chars, UChar * &result)
  599. {
  600. assertex(obj && obj != Py_None);
  601. if (PyUnicode_Check(obj))
  602. {
  603. OwnedPyObject utf8 = PyUnicode_AsUTF8String(obj);
  604. checkPythonError();
  605. size_t lenBytes = PyString_Size(utf8);
  606. const char * text = PyString_AsString(utf8);
  607. checkPythonError();
  608. size32_t numchars = rtlUtf8Length(lenBytes, text);
  609. rtlUtf8ToUnicodeX(chars, result, numchars, text);
  610. }
  611. else
  612. typeError("unicode string", field);
  613. }
  614. // A PythonRowBuilder object is used to construct an ECL row from a python object
  615. class PythonRowBuilder : public CInterfaceOf<IFieldSource>
  616. {
  617. public:
  618. PythonRowBuilder(PyObject *_row)
  619. : iter(NULL), elem(NULL), named(false)
  620. {
  621. pushback.set(_row);
  622. }
  623. virtual bool getBooleanResult(const RtlFieldInfo *field)
  624. {
  625. nextField(field);
  626. return pyembed::getBooleanResult(field, elem);
  627. }
  628. virtual void getDataResult(const RtlFieldInfo *field, size32_t &len, void * &result)
  629. {
  630. nextField(field);
  631. pyembed::getDataResult(field, elem, len, result);
  632. }
  633. virtual double getRealResult(const RtlFieldInfo *field)
  634. {
  635. nextField(field);
  636. return pyembed::getRealResult(field, elem);
  637. }
  638. virtual __int64 getSignedResult(const RtlFieldInfo *field)
  639. {
  640. nextField(field);
  641. return pyembed::getSignedResult(field, elem);
  642. }
  643. virtual unsigned __int64 getUnsignedResult(const RtlFieldInfo *field)
  644. {
  645. nextField(field);
  646. return pyembed::getUnsignedResult(field, elem);
  647. }
  648. virtual void getStringResult(const RtlFieldInfo *field, size32_t &chars, char * &result)
  649. {
  650. nextField(field);
  651. pyembed::getStringResult(field, elem, chars, result);
  652. }
  653. virtual void getUTF8Result(const RtlFieldInfo *field, size32_t &chars, char * &result)
  654. {
  655. nextField(field);
  656. pyembed::getUTF8Result(field, elem, chars, result);
  657. }
  658. virtual void getUnicodeResult(const RtlFieldInfo *field, size32_t &chars, UChar * &result)
  659. {
  660. nextField(field);
  661. pyembed::getUnicodeResult(field, elem, chars, result);
  662. }
  663. virtual void getDecimalResult(const RtlFieldInfo *field, Decimal &value)
  664. {
  665. nextField(field);
  666. double ret = pyembed::getRealResult(field, elem);
  667. value.setReal(ret);
  668. }
  669. virtual void processBeginSet(const RtlFieldInfo * field, bool &isAll)
  670. {
  671. nextField(field);
  672. isAll = false; // No concept of an 'all' set in Python
  673. assertex(elem && elem != Py_None);
  674. if (!PyList_Check(elem) && !PySet_Check(elem))
  675. typeError("list or set", field);
  676. push();
  677. }
  678. virtual bool processNextSet(const RtlFieldInfo * field)
  679. {
  680. nextField(NULL);
  681. pushback.setown(elem.getClear());
  682. return pushback != NULL;
  683. }
  684. virtual void processBeginDataset(const RtlFieldInfo * field)
  685. {
  686. nextField(field);
  687. if (!PyList_Check(elem))
  688. typeError("list", field);
  689. push();
  690. }
  691. virtual void processBeginRow(const RtlFieldInfo * field)
  692. {
  693. // Expect to see a tuple here, or possibly (if the ECL record has a single field), an arbitrary scalar object
  694. // If it's a tuple, we push it onto our stack as the active object
  695. nextField(NULL); // MORE - should it be passing field?
  696. if (!PyTuple_Check(elem))
  697. {
  698. if (countFields(field->type->queryFields())==1)
  699. {
  700. // Python doesn't seem to support the concept of a tuple containing a single element.
  701. // If we are expecting a single field in our row, then the 'tuple' layer will be missing
  702. elem.setown(PyTuple_Pack(1, elem.get()));
  703. }
  704. else
  705. typeError("tuple", field);
  706. }
  707. push();
  708. }
  709. virtual bool processNextRow(const RtlFieldInfo * field)
  710. {
  711. nextField(NULL);
  712. pushback.setown(elem.getClear());
  713. return pushback != NULL;
  714. }
  715. virtual void processEndSet(const RtlFieldInfo * field)
  716. {
  717. pop();
  718. }
  719. virtual void processEndDataset(const RtlFieldInfo * field)
  720. {
  721. pop();
  722. }
  723. virtual void processEndRow(const RtlFieldInfo * field)
  724. {
  725. pop();
  726. }
  727. protected:
  728. void pop()
  729. {
  730. iter.setown((PyObject *) iterStack.popGet());
  731. parent.setown((PyObject *) parentStack.popGet());
  732. named = namedStack.popGet();
  733. elem.clear();
  734. }
  735. void push()
  736. {
  737. iterStack.append(iter.getClear());
  738. parentStack.append(parent.getClear());
  739. namedStack.append(named);
  740. parent.set(elem);
  741. iter.setown(PyObject_GetIter(elem));
  742. named = isNamedTuple(elem);
  743. elem.clear();
  744. }
  745. bool isNamedTuple(PyObject *obj)
  746. {
  747. return PyObject_HasAttrString((PyObject *) obj->ob_type, "_fields");
  748. }
  749. void nextField(const RtlFieldInfo * field)
  750. {
  751. if (pushback)
  752. elem.setown(pushback.getClear());
  753. else if (field && named) // If it's named tuple, expect to always resolve fields by name, not position
  754. {
  755. elem.setown(PyObject_GetAttrString(parent, field->name->str()));
  756. }
  757. else if (iter)
  758. elem.setown(PyIter_Next(iter));
  759. else
  760. elem = NULL;
  761. checkPythonError();
  762. }
  763. OwnedPyObject iter;
  764. OwnedPyObject pushback;
  765. OwnedPyObject elem;
  766. OwnedPyObject parent;
  767. bool named;
  768. PointerArray iterStack;
  769. PointerArray parentStack;
  770. BoolArray namedStack;
  771. };
  772. static size32_t getRowResult(PyObject *result, ARowBuilder &builder)
  773. {
  774. PythonRowBuilder pyRowBuilder(result);
  775. const RtlTypeInfo *typeInfo = builder.queryAllocator()->queryOutputMeta()->queryTypeInfo();
  776. assertex(typeInfo);
  777. RtlFieldStrInfo dummyField("<row>", NULL, typeInfo);
  778. return typeInfo->build(builder, 0, &dummyField, pyRowBuilder);
  779. }
  780. // A PythonNamedTupleBuilder object is used to construct a Python named tuple from an ECL row
  781. class PythonNamedTupleBuilder : public CInterfaceOf<IFieldProcessor>
  782. {
  783. public:
  784. PythonNamedTupleBuilder(PythonThreadContext *_sharedCtx, const RtlFieldInfo *_outerRow)
  785. : outerRow(_outerRow), sharedCtx(_sharedCtx)
  786. {
  787. }
  788. virtual void processString(unsigned len, const char *value, const RtlFieldInfo * field)
  789. {
  790. addArg(PyString_FromStringAndSize(value, len));
  791. }
  792. virtual void processBool(bool value, const RtlFieldInfo * field)
  793. {
  794. addArg(PyBool_FromLong(value ? 1 : 0));
  795. }
  796. virtual void processData(unsigned len, const void *value, const RtlFieldInfo * field)
  797. {
  798. addArg(PyByteArray_FromStringAndSize((const char *) value, len));
  799. }
  800. virtual void processInt(__int64 value, const RtlFieldInfo * field)
  801. {
  802. addArg(PyLong_FromLongLong(value));
  803. }
  804. virtual void processUInt(unsigned __int64 value, const RtlFieldInfo * field)
  805. {
  806. addArg(PyLong_FromUnsignedLongLong(value));
  807. }
  808. virtual void processReal(double value, const RtlFieldInfo * field)
  809. {
  810. addArg(PyFloat_FromDouble(value));
  811. }
  812. virtual void processDecimal(const void *value, unsigned digits, unsigned precision, const RtlFieldInfo * field)
  813. {
  814. Decimal val;
  815. val.setDecimal(digits, precision, value);
  816. addArg(PyFloat_FromDouble(val.getReal()));
  817. }
  818. virtual void processUDecimal(const void *value, unsigned digits, unsigned precision, const RtlFieldInfo * field)
  819. {
  820. Decimal val;
  821. val.setUDecimal(digits, precision, value);
  822. addArg(PyFloat_FromDouble(val.getReal()));
  823. }
  824. virtual void processUnicode(unsigned len, const UChar *value, const RtlFieldInfo * field)
  825. {
  826. // You don't really know what size Py_UNICODE is (varies from system to system), so go via utf8
  827. unsigned unicodeChars;
  828. rtlDataAttr unicode;
  829. rtlUnicodeToUtf8X(unicodeChars, unicode.refstr(), len, value);
  830. processUtf8(unicodeChars, unicode.getstr(), field);
  831. }
  832. virtual void processQString(unsigned len, const char *value, const RtlFieldInfo * field)
  833. {
  834. size32_t charCount;
  835. rtlDataAttr text;
  836. rtlQStrToStrX(charCount, text.refstr(), len, value);
  837. processString(charCount, text.getstr(), field);
  838. }
  839. virtual void processUtf8(unsigned len, const char *value, const RtlFieldInfo * field)
  840. {
  841. size32_t sizeBytes = rtlUtf8Size(len, value);
  842. PyObject *vval = PyUnicode_FromStringAndSize(value, sizeBytes); // NOTE - requires size in bytes not chars
  843. checkPythonError();
  844. addArg(vval);
  845. }
  846. virtual bool processBeginSet(const RtlFieldInfo * field, unsigned numElements, bool isAll, const byte *data)
  847. {
  848. push();
  849. if (isAll)
  850. rtlFail(0, "pyembed: ALL sets are not supported");
  851. return true;
  852. }
  853. virtual bool processBeginDataset(const RtlFieldInfo * field, unsigned numRows)
  854. {
  855. push();
  856. return true;
  857. }
  858. virtual bool processBeginRow(const RtlFieldInfo * field)
  859. {
  860. if (field != outerRow)
  861. push();
  862. return true;
  863. }
  864. virtual void processEndSet(const RtlFieldInfo * field)
  865. {
  866. pop();
  867. }
  868. virtual void processEndDataset(const RtlFieldInfo * field)
  869. {
  870. pop();
  871. }
  872. virtual void processEndRow(const RtlFieldInfo * field)
  873. {
  874. if (field != outerRow)
  875. {
  876. args.setown(getTuple(field->type));
  877. pop();
  878. }
  879. }
  880. PyObject *getTuple(const RtlTypeInfo *type)
  881. {
  882. OwnedPyObject mynamedtupletype = sharedCtx ? sharedCtx->getNamedTupleType(type) : globalState.getNamedTupleType(type);
  883. OwnedPyObject argsTuple = PyList_AsTuple(args);
  884. OwnedPyObject mynamedtuple = PyObject_CallObject(mynamedtupletype, argsTuple); // Creates a namedtuple from the supplied tuple
  885. checkPythonError();
  886. return mynamedtuple.getClear();
  887. }
  888. protected:
  889. void push()
  890. {
  891. stack.append(args.getClear());
  892. }
  893. void pop()
  894. {
  895. addArg(args.getClear());
  896. args.setown((PyObject *) stack.popGet());
  897. }
  898. void addArg(PyObject *arg)
  899. {
  900. if (!args)
  901. {
  902. args.setown(PyList_New(0));
  903. }
  904. PyList_Append(args, arg);
  905. Py_DECREF(arg);
  906. }
  907. OwnedPyObject args;
  908. PointerArray stack;
  909. const RtlFieldInfo *outerRow;
  910. PythonThreadContext *sharedCtx;
  911. };
  912. //----------------------------------------------------------------------
  913. // Wrap an IRowStream into a Python generator
  914. struct ECLDatasetIterator
  915. {
  916. PyObject_HEAD;
  917. const RtlTypeInfo *typeInfo; // Not linked (or linkable)
  918. IRowStream * val; // Linked
  919. };
  920. PyObject* ECLDatasetIterator_iter(PyObject *self)
  921. {
  922. Py_INCREF(self);
  923. return self;
  924. }
  925. void ECLDatasetIterator_dealloc(PyObject *self)
  926. {
  927. ECLDatasetIterator *p = (ECLDatasetIterator *)self;
  928. if (p->val)
  929. {
  930. p->val->stop();
  931. ::Release(p->val);
  932. p->val = NULL;
  933. }
  934. self->ob_type->tp_free(self);
  935. }
  936. PyObject* ECLDatasetIterator_iternext(PyObject *self)
  937. {
  938. ECLDatasetIterator *p = (ECLDatasetIterator *)self;
  939. if (p->val)
  940. {
  941. roxiemem::OwnedConstRoxieRow nextRow = p->val->ungroupedNextRow();
  942. if (!nextRow)
  943. {
  944. p->val->stop();
  945. ::Release(p->val);
  946. p->val = NULL;
  947. }
  948. else
  949. {
  950. RtlFieldStrInfo dummyField("<row>", NULL, p->typeInfo);
  951. PythonNamedTupleBuilder tupleBuilder(NULL, &dummyField);
  952. const byte *brow = (const byte *) nextRow.get();
  953. p->typeInfo->process(brow, brow, &dummyField, tupleBuilder);
  954. return tupleBuilder.getTuple(p->typeInfo);
  955. }
  956. }
  957. // If we get here, it's EOF
  958. PyErr_SetNone(PyExc_StopIteration);
  959. return NULL;
  960. }
  961. static PyTypeObject ECLDatasetIteratorType =
  962. {
  963. PyObject_HEAD_INIT(NULL)
  964. 0, /*ob_size*/
  965. "ECLDatasetIterator._MyIter", /*tp_name*/
  966. sizeof(ECLDatasetIterator), /*tp_basicsize*/
  967. 0, /*tp_itemsize*/
  968. ECLDatasetIterator_dealloc, /*tp_dealloc*/
  969. 0, /*tp_print*/
  970. 0, /*tp_getattr*/
  971. 0, /*tp_setattr*/
  972. 0, /*tp_compare*/
  973. 0, /*tp_repr*/
  974. 0, /*tp_as_number*/
  975. 0, /*tp_as_sequence*/
  976. 0, /*tp_as_mapping*/
  977. 0, /*tp_hash */
  978. 0, /*tp_call*/
  979. 0, /*tp_str*/
  980. 0, /*tp_getattro*/
  981. 0, /*tp_setattro*/
  982. 0, /*tp_as_buffer*/
  983. Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER, /* tp_flags: tell python to use tp_iter and tp_iternext fields. */
  984. "ECL dataset iterator object.", /* tp_doc */
  985. 0, /* tp_traverse */
  986. 0, /* tp_clear */
  987. 0, /* tp_richcompare */
  988. 0, /* tp_weaklistoffset */
  989. ECLDatasetIterator_iter, /* tp_iter: __iter__() method */
  990. ECLDatasetIterator_iternext /* tp_iternext: next() method */
  991. };
  992. static PyObject *createECLDatasetIterator(const RtlTypeInfo *_typeInfo, IRowStream * _val)
  993. {
  994. ECLDatasetIteratorType.tp_new = PyType_GenericNew;
  995. if (PyType_Ready(&ECLDatasetIteratorType) < 0) return NULL;
  996. ECLDatasetIterator *p = PyObject_New(ECLDatasetIterator, &ECLDatasetIteratorType);
  997. if (!p)
  998. {
  999. checkPythonError();
  1000. rtlFail(0, "pyembed: failed to create dataset iterator");
  1001. }
  1002. p->typeInfo = _typeInfo;
  1003. p->val = _val;
  1004. return (PyObject *)p;
  1005. }
  1006. //-----------------------------------------------------
  1007. // GILBlock ensures the we hold the Python "Global interpreter lock" for the appropriate duration
  1008. class GILBlock
  1009. {
  1010. public:
  1011. GILBlock(PyThreadState * &_state) : state(_state)
  1012. {
  1013. PyEval_RestoreThread(state);
  1014. }
  1015. ~GILBlock()
  1016. {
  1017. state = PyEval_SaveThread();
  1018. }
  1019. private:
  1020. PyThreadState * &state;
  1021. };
  1022. // A Python function that returns a dataset will return a PythonRowStream object that can be
  1023. // interrogated to return each row of the result in turn
  1024. class PythonRowStream : public CInterfaceOf<IRowStream>
  1025. {
  1026. public:
  1027. PythonRowStream(PythonThreadContext *_sharedCtx, PyObject *result, IEngineRowAllocator *_resultAllocator)
  1028. : sharedCtx(_sharedCtx), resultIterator(NULL)
  1029. {
  1030. // NOTE - the caller should already have the GIL lock before creating me
  1031. if (!result || result == Py_None)
  1032. typeError("list or generator", NULL);
  1033. resultIterator.setown(PyObject_GetIter(result)); // We allow anything that is iterable to be returned for a row stream
  1034. checkPythonError();
  1035. resultAllocator.set(_resultAllocator);
  1036. }
  1037. virtual const void *nextRow()
  1038. {
  1039. GILBlock b(sharedCtx->threadState);
  1040. if (!resultIterator)
  1041. return NULL;
  1042. OwnedPyObject row = PyIter_Next(resultIterator);
  1043. if (!row)
  1044. return NULL;
  1045. RtlDynamicRowBuilder rowBuilder(resultAllocator);
  1046. size32_t len = pyembed::getRowResult(row, rowBuilder);
  1047. return rowBuilder.finalizeRowClear(len);
  1048. }
  1049. virtual void stop()
  1050. {
  1051. resultAllocator.clear();
  1052. resultIterator.clear();
  1053. }
  1054. protected:
  1055. PythonThreadContext *sharedCtx;
  1056. Linked<IEngineRowAllocator> resultAllocator;
  1057. OwnedPyObject resultIterator;
  1058. };
  1059. // Each call to a Python function will use a new Python27EmbedFunctionContext object
  1060. // This takes care of ensuring that the Python GIL is locked while we are executing python code,
  1061. // and released when we are not
  1062. class Python27EmbedContextBase : public CInterfaceOf<IEmbedFunctionContext>
  1063. {
  1064. public:
  1065. Python27EmbedContextBase(PythonThreadContext *_sharedCtx)
  1066. : sharedCtx(_sharedCtx)
  1067. {
  1068. PyEval_RestoreThread(sharedCtx->threadState);
  1069. locals.setown(PyDict_New());
  1070. globals.setown(PyDict_New());
  1071. PyDict_SetItemString(locals, "__builtins__", PyEval_GetBuiltins()); // required for import to work
  1072. }
  1073. ~Python27EmbedContextBase()
  1074. {
  1075. // We need to clear these before calling savethread, or we won't own the GIL
  1076. locals.clear();
  1077. globals.clear();
  1078. result.clear();
  1079. script.clear();
  1080. sharedCtx->threadState = PyEval_SaveThread();
  1081. }
  1082. virtual bool getBooleanResult()
  1083. {
  1084. return pyembed::getBooleanResult(NULL, result);
  1085. }
  1086. virtual void getDataResult(size32_t &__chars, void * &__result)
  1087. {
  1088. pyembed::getDataResult(NULL, result, __chars, __result);
  1089. }
  1090. virtual double getRealResult()
  1091. {
  1092. return pyembed::getRealResult(NULL, result);
  1093. }
  1094. virtual __int64 getSignedResult()
  1095. {
  1096. return pyembed::getSignedResult(NULL, result);
  1097. }
  1098. virtual unsigned __int64 getUnsignedResult()
  1099. {
  1100. return pyembed::getUnsignedResult(NULL, result);
  1101. }
  1102. virtual void getStringResult(size32_t &__chars, char * &__result)
  1103. {
  1104. pyembed::getStringResult(NULL, result, __chars, __result);
  1105. }
  1106. virtual void getUTF8Result(size32_t &__chars, char * &__result)
  1107. {
  1108. pyembed::getUTF8Result(NULL, result, __chars, __result);
  1109. }
  1110. virtual void getUnicodeResult(size32_t &__chars, UChar * &__result)
  1111. {
  1112. pyembed::getUnicodeResult(NULL, result, __chars, __result);
  1113. }
  1114. virtual void getSetResult(bool & __isAllResult, size32_t & __resultBytes, void * & __result, int elemType, size32_t elemSize)
  1115. {
  1116. pyembed::getSetResult(result, __isAllResult, __resultBytes, __result, elemType, elemSize);
  1117. }
  1118. virtual IRowStream *getDatasetResult(IEngineRowAllocator * _resultAllocator)
  1119. {
  1120. return new PythonRowStream(sharedCtx, result, _resultAllocator);
  1121. }
  1122. virtual byte * getRowResult(IEngineRowAllocator * _resultAllocator)
  1123. {
  1124. RtlDynamicRowBuilder rowBuilder(_resultAllocator);
  1125. size32_t len = pyembed::getRowResult(result, rowBuilder);
  1126. return (byte *) rowBuilder.finalizeRowClear(len);
  1127. }
  1128. virtual size32_t getTransformResult(ARowBuilder & builder)
  1129. {
  1130. return pyembed::getRowResult(result, builder);
  1131. }
  1132. virtual void bindBooleanParam(const char *name, bool val)
  1133. {
  1134. addArg(name, PyBool_FromLong(val ? 1 : 0));
  1135. }
  1136. virtual void bindDataParam(const char *name, size32_t len, const void *val)
  1137. {
  1138. addArg(name, PyByteArray_FromStringAndSize((const char *) val, len));
  1139. }
  1140. virtual void bindFloatParam(const char *name, float val)
  1141. {
  1142. addArg(name, PyFloat_FromDouble((double) val));
  1143. }
  1144. virtual void bindRealParam(const char *name, double val)
  1145. {
  1146. addArg(name, PyFloat_FromDouble(val));
  1147. }
  1148. virtual void bindSignedSizeParam(const char *name, int size, __int64 val)
  1149. {
  1150. addArg(name, PyLong_FromLongLong(val));
  1151. }
  1152. virtual void bindSignedParam(const char *name, __int64 val)
  1153. {
  1154. addArg(name, PyLong_FromLongLong(val));
  1155. }
  1156. virtual void bindUnsignedSizeParam(const char *name, int size, unsigned __int64 val)
  1157. {
  1158. addArg(name, PyLong_FromUnsignedLongLong(val));
  1159. }
  1160. virtual void bindUnsignedParam(const char *name, unsigned __int64 val)
  1161. {
  1162. addArg(name, PyLong_FromUnsignedLongLong(val));
  1163. }
  1164. virtual void bindStringParam(const char *name, size32_t len, const char *val)
  1165. {
  1166. addArg(name, PyString_FromStringAndSize(val, len));
  1167. }
  1168. virtual void bindVStringParam(const char *name, const char *val)
  1169. {
  1170. addArg(name, PyString_FromString(val));
  1171. }
  1172. virtual void bindUTF8Param(const char *name, size32_t chars, const char *val)
  1173. {
  1174. size32_t sizeBytes = rtlUtf8Size(chars, val);
  1175. PyObject *vval = PyUnicode_FromStringAndSize(val, sizeBytes); // NOTE - requires size in bytes not chars
  1176. checkPythonError();
  1177. addArg(name, vval);
  1178. }
  1179. virtual void bindUnicodeParam(const char *name, size32_t chars, const UChar *val)
  1180. {
  1181. // You don't really know what size Py_UNICODE is (varies from system to system), so go via utf8
  1182. unsigned unicodeChars;
  1183. char *unicode;
  1184. rtlUnicodeToUtf8X(unicodeChars, unicode, chars, val);
  1185. size32_t sizeBytes = rtlUtf8Size(unicodeChars, unicode);
  1186. PyObject *vval = PyUnicode_FromStringAndSize(unicode, sizeBytes); // NOTE - requires size in bytes not chars
  1187. checkPythonError();
  1188. addArg(name, vval);
  1189. rtlFree(unicode);
  1190. }
  1191. virtual void bindSetParam(const char *name, int elemType, size32_t elemSize, bool isAll, size32_t totalBytes, void *setData)
  1192. {
  1193. if (isAll)
  1194. rtlFail(0, "pyembed: Cannot pass ALL");
  1195. type_t typecode = (type_t) elemType;
  1196. const byte *inData = (const byte *) setData;
  1197. const byte *endData = inData + totalBytes;
  1198. OwnedPyObject vval = PyList_New(0);
  1199. while (inData < endData)
  1200. {
  1201. OwnedPyObject thisElem;
  1202. size32_t thisSize = elemSize;
  1203. switch (typecode)
  1204. {
  1205. case type_int:
  1206. thisElem.setown(PyLong_FromLongLong(rtlReadInt(inData, elemSize)));
  1207. break;
  1208. case type_unsigned:
  1209. thisElem.setown(PyLong_FromUnsignedLongLong(rtlReadUInt(inData, elemSize)));
  1210. break;
  1211. case type_varstring:
  1212. {
  1213. size32_t numChars = strlen((const char *) inData);
  1214. thisElem.setown(PyString_FromStringAndSize((const char *) inData, numChars));
  1215. if (elemSize == UNKNOWN_LENGTH)
  1216. thisSize = numChars + 1;
  1217. break;
  1218. }
  1219. case type_string:
  1220. if (elemSize == UNKNOWN_LENGTH)
  1221. {
  1222. thisSize = * (size32_t *) inData;
  1223. inData += sizeof(size32_t);
  1224. }
  1225. thisElem.setown(PyString_FromStringAndSize((const char *) inData, thisSize));
  1226. break;
  1227. case type_real:
  1228. if (elemSize == sizeof(double))
  1229. thisElem.setown(PyFloat_FromDouble(* (double *) inData));
  1230. else
  1231. thisElem.setown(PyFloat_FromDouble(* (float *) inData));
  1232. break;
  1233. case type_boolean:
  1234. assertex(elemSize == sizeof(bool));
  1235. thisElem.setown(PyBool_FromLong(*(bool*)inData ? 1 : 0));
  1236. break;
  1237. case type_unicode:
  1238. {
  1239. if (elemSize == UNKNOWN_LENGTH)
  1240. {
  1241. thisSize = (* (size32_t *) inData) * sizeof(UChar); // NOTE - it's in chars...
  1242. inData += sizeof(size32_t);
  1243. }
  1244. unsigned unicodeChars;
  1245. rtlDataAttr unicode;
  1246. rtlUnicodeToUtf8X(unicodeChars, unicode.refstr(), thisSize / sizeof(UChar), (const UChar *) inData);
  1247. size32_t sizeBytes = rtlUtf8Size(unicodeChars, unicode.getstr());
  1248. thisElem.setown(PyUnicode_FromStringAndSize(unicode.getstr(), sizeBytes)); // NOTE - requires size in bytes not chars
  1249. checkPythonError();
  1250. break;
  1251. }
  1252. case type_utf8:
  1253. {
  1254. assertex (elemSize == UNKNOWN_LENGTH);
  1255. size32_t numChars = * (size32_t *) inData;
  1256. inData += sizeof(size32_t);
  1257. thisSize = rtlUtf8Size(numChars, inData);
  1258. thisElem.setown(PyUnicode_FromStringAndSize((const char *) inData, thisSize)); // NOTE - requires size in bytes not chars
  1259. break;
  1260. }
  1261. case type_data:
  1262. if (elemSize == UNKNOWN_LENGTH)
  1263. {
  1264. thisSize = * (size32_t *) inData;
  1265. inData += sizeof(size32_t);
  1266. }
  1267. thisElem.setown(PyByteArray_FromStringAndSize((const char *) inData, thisSize));
  1268. break;
  1269. }
  1270. checkPythonError();
  1271. inData += thisSize;
  1272. PyList_Append(vval, thisElem);
  1273. }
  1274. addArg(name, vval.getLink());
  1275. }
  1276. virtual void bindRowParam(const char *name, IOutputMetaData & metaVal, byte *val)
  1277. {
  1278. const RtlTypeInfo *typeInfo = metaVal.queryTypeInfo();
  1279. assertex(typeInfo);
  1280. RtlFieldStrInfo dummyField("<row>", NULL, typeInfo);
  1281. PythonNamedTupleBuilder tupleBuilder(sharedCtx, &dummyField);
  1282. typeInfo->process(val, val, &dummyField, tupleBuilder); // Creates a tuple from the incoming ECL row
  1283. addArg(name, tupleBuilder.getTuple(typeInfo));
  1284. }
  1285. virtual void bindDatasetParam(const char *name, IOutputMetaData & metaVal, IRowStream * val)
  1286. {
  1287. addArg(name, createECLDatasetIterator(metaVal.queryTypeInfo(), LINK(val)));
  1288. }
  1289. protected:
  1290. virtual void addArg(const char *name, PyObject *arg) = 0;
  1291. PythonThreadContext *sharedCtx;
  1292. OwnedPyObject locals;
  1293. OwnedPyObject globals;
  1294. OwnedPyObject result;
  1295. OwnedPyObject script;
  1296. };
  1297. class Python27EmbedScriptContext : public Python27EmbedContextBase
  1298. {
  1299. public:
  1300. Python27EmbedScriptContext(PythonThreadContext *_sharedCtx, const char *options)
  1301. : Python27EmbedContextBase(_sharedCtx)
  1302. {
  1303. }
  1304. ~Python27EmbedScriptContext()
  1305. {
  1306. }
  1307. virtual void importFunction(size32_t lenChars, const char *text)
  1308. {
  1309. throwUnexpected();
  1310. }
  1311. virtual void compileEmbeddedScript(size32_t lenChars, const char *utf)
  1312. {
  1313. script.setown(sharedCtx->compileEmbeddedScript(lenChars, utf));
  1314. }
  1315. virtual void callFunction()
  1316. {
  1317. result.setown(PyEval_EvalCode((PyCodeObject *) script.get(), locals, globals));
  1318. checkPythonError();
  1319. if (!result || result == Py_None)
  1320. result.set(PyDict_GetItemString(locals, "__result__"));
  1321. if (!result || result == Py_None)
  1322. result.set(PyDict_GetItemString(globals, "__result__"));
  1323. }
  1324. protected:
  1325. virtual void addArg(const char *name, PyObject *arg)
  1326. {
  1327. if (!arg)
  1328. return;
  1329. assertex(arg);
  1330. PyDict_SetItemString(locals, name, arg);
  1331. Py_DECREF(arg);
  1332. checkPythonError();
  1333. }
  1334. };
  1335. class Python27EmbedImportContext : public Python27EmbedContextBase
  1336. {
  1337. public:
  1338. Python27EmbedImportContext(PythonThreadContext *_sharedCtx, const char *options)
  1339. : Python27EmbedContextBase(_sharedCtx)
  1340. {
  1341. argcount = 0;
  1342. }
  1343. ~Python27EmbedImportContext()
  1344. {
  1345. }
  1346. virtual void importFunction(size32_t lenChars, const char *utf)
  1347. {
  1348. script.setown(sharedCtx->importFunction(lenChars, utf));
  1349. }
  1350. virtual void compileEmbeddedScript(size32_t len, const char *text)
  1351. {
  1352. throwUnexpected();
  1353. }
  1354. virtual void callFunction()
  1355. {
  1356. result.setown(PyObject_CallObject(script, args));
  1357. checkPythonError();
  1358. }
  1359. private:
  1360. virtual void addArg(const char *name, PyObject *arg)
  1361. {
  1362. if (argcount)
  1363. _PyTuple_Resize(args.ref(), argcount+1);
  1364. else
  1365. args.setown(PyTuple_New(1));
  1366. PyTuple_SET_ITEM((PyTupleObject *) args.get(), argcount++, arg); // Note - 'steals' the arg reference
  1367. }
  1368. int argcount;
  1369. OwnedPyObject args;
  1370. };
  1371. class Python27EmbedContext : public CInterfaceOf<IEmbedContext>
  1372. {
  1373. public:
  1374. virtual IEmbedFunctionContext *createFunctionContext(unsigned flags, const char *options)
  1375. {
  1376. return createFunctionContextEx(NULL, flags, options);
  1377. }
  1378. virtual IEmbedFunctionContext *createFunctionContextEx(ICodeContext * ctx, unsigned flags, const char *options)
  1379. {
  1380. if (!threadContext)
  1381. {
  1382. if (!globalState.isInitialized())
  1383. rtlFail(0, "Python not initialized");
  1384. threadContext = new PythonThreadContext;
  1385. threadHookChain = addThreadTermFunc(releaseContext);
  1386. }
  1387. if (flags & EFimport)
  1388. return new Python27EmbedImportContext(threadContext, options);
  1389. else
  1390. return new Python27EmbedScriptContext(threadContext, options);
  1391. }
  1392. };
  1393. extern IEmbedContext* getEmbedContext()
  1394. {
  1395. return new Python27EmbedContext;
  1396. }
  1397. extern bool syntaxCheck(const char *script)
  1398. {
  1399. return true; // MORE
  1400. }
  1401. } // namespace