pyembed.cpp 71 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #ifdef _WIN32
  14. // There's an issue with Python redefining ssize_t resulting in errors - hide their definition
  15. #define ssize_t python_ssize_t
  16. #include "Python.h"
  17. #undef ssize_t
  18. #else
  19. #define register
  20. #include "Python.h"
  21. #endif
  22. #include "platform.h"
  23. #include "frameobject.h"
  24. #include "jexcept.hpp"
  25. #include "jutil.hpp"
  26. #include "jthread.hpp"
  27. #include "jregexp.hpp"
  28. #include "hqlplugins.hpp"
  29. #include "deftype.hpp"
  30. #include "eclhelper.hpp"
  31. #include "eclrtl.hpp"
  32. #include "eclrtl_imp.hpp"
  33. #include "rtlds_imp.hpp"
  34. #include "rtlfield.hpp"
  35. #include "nbcd.hpp"
  36. #include "roxiemem.hpp"
  37. #include "enginecontext.hpp"
  38. #include <regex>
  39. static const char * compatibleVersions[] = {
  40. "Python2.7 Embed Helper 1.0.0",
  41. NULL };
  42. static const char *version = "Python2.7 Embed Helper 1.0.0";
  43. extern "C" DECL_EXPORT bool getECLPluginDefinition(ECLPluginDefinitionBlock *pb)
  44. {
  45. if (pb->size == sizeof(ECLPluginDefinitionBlockEx))
  46. {
  47. ECLPluginDefinitionBlockEx * pbx = (ECLPluginDefinitionBlockEx *) pb;
  48. pbx->compatibleVersions = compatibleVersions;
  49. }
  50. else if (pb->size != sizeof(ECLPluginDefinitionBlock))
  51. return false;
  52. pb->magicVersion = PLUGIN_VERSION;
  53. pb->version = version;
  54. pb->moduleName = "python";
  55. pb->ECL = NULL;
  56. pb->flags = PLUGIN_MULTIPLE_VERSIONS;
  57. pb->description = "Python2.7 Embed Helper";
  58. return true;
  59. }
  60. namespace py2embed {
  61. // Use class OwnedPyObject for any objects that are not 'borrowed references'
  62. // so that the appropriate Py_DECREF call is made when the OwnedPyObject goes
  63. // out of scope, even if the function returns prematurely (such as via an exception).
  64. // In particular, checkPythonError is a lot easier to call safely if this is used.
  65. class OwnedPyObject
  66. {
  67. PyObject *ptr;
  68. public:
  69. inline OwnedPyObject() : ptr(NULL) {}
  70. inline OwnedPyObject(PyObject *_ptr) : ptr(_ptr) {}
  71. inline ~OwnedPyObject() { if (ptr) Py_DECREF(ptr); }
  72. inline PyObject * get() const { return ptr; }
  73. inline PyObject * getClear() { PyObject *ret = ptr; ptr = NULL; return ret; }
  74. inline PyObject * operator -> () const { return ptr; }
  75. inline operator PyObject *() const { return ptr; }
  76. inline void clear() { if (ptr) Py_DECREF(ptr); ptr = NULL; }
  77. inline void setown(PyObject *_ptr) { clear(); ptr = _ptr; }
  78. inline void set(PyObject *_ptr) { if (_ptr) Py_INCREF(_ptr); clear(); ptr = _ptr; }
  79. inline PyObject *getLink() { if (ptr) Py_INCREF(ptr); return ptr;}
  80. inline PyObject **ref() { return &ptr; }
  81. };
  82. template <class X>
  83. class OwnedPyX
  84. {
  85. X *ptr;
  86. public:
  87. inline OwnedPyX<X>() : ptr(NULL) {}
  88. inline OwnedPyX<X>(X *_ptr) : ptr(_ptr) {}
  89. inline ~OwnedPyX<X>() { if (ptr) Py_DECREF(ptr); }
  90. inline X * get() const { return ptr; }
  91. inline X * getClear() { PyObject *ret = ptr; ptr = NULL; return ret; }
  92. inline X * operator -> () const { return ptr; }
  93. inline operator X *() const { return ptr; }
  94. inline void clear() { if (ptr) Py_DECREF(ptr); ptr = NULL; }
  95. inline void setown(X *_ptr) { clear(); ptr = _ptr; }
  96. inline void set(X *_ptr) { if (_ptr) Py_INCREF(_ptr); clear(); ptr = _ptr; }
  97. inline X *getLink() { if (ptr) Py_INCREF(ptr); return ptr;}
  98. inline X **ref() { return &ptr; }
  99. };
  100. __declspec(noreturn) static void failx(const char *msg, ...) __attribute__((format(printf, 1, 2), noreturn));
  101. static void failx(const char *message, ...)
  102. {
  103. va_list args;
  104. va_start(args,message);
  105. StringBuffer msg;
  106. msg.append("pyembed: ").valist_appendf(message,args);
  107. va_end(args);
  108. rtlFail(0, msg.str());
  109. }
  110. // call checkPythonError to throw an exception if Python error state is set
  111. static void checkPythonError()
  112. {
  113. PyObject* err = PyErr_Occurred();
  114. if (err)
  115. {
  116. OwnedPyObject pType, pValue, pTraceBack;
  117. PyErr_Fetch(pType.ref(), pValue.ref(), pTraceBack.ref());
  118. OwnedPyObject valStr = PyObject_Str(pValue);
  119. PyErr_Clear();
  120. failx("%s", PyString_AsString(valStr));
  121. }
  122. }
  123. // The Python Global Interpreter Lock (GIL) won't know about C++-created threads, so we need to
  124. // call PyGILState_Ensure() and PyGILState_Release at the start and end of every function.
  125. // Wrapping them in a class like this ensures that the release always happens even if
  126. // the function exists prematurely
  127. class GILstateWrapper
  128. {
  129. PyGILState_STATE gstate;
  130. public:
  131. GILstateWrapper()
  132. {
  133. gstate = PyGILState_Ensure();
  134. }
  135. ~GILstateWrapper()
  136. {
  137. PyGILState_Release(gstate);
  138. }
  139. };
  140. // There is a singleton PythonThreadContext per thread. This allows us to
  141. // ensure that we can make repeated calls to a Python function efficiently.
  142. // Note that we assume that a thread is not shared between workunits/queries
  143. class PythonThreadContext
  144. {
  145. public:
  146. PyThreadState *threadState;
  147. public:
  148. PythonThreadContext()
  149. {
  150. threadState = PyEval_SaveThread();
  151. lrutype = NULL;
  152. }
  153. ~PythonThreadContext()
  154. {
  155. PyEval_RestoreThread(threadState);
  156. script.clear();
  157. module.clear();
  158. lru.clear();
  159. }
  160. void addManifestFiles(ICodeContext *codeCtx);
  161. PyObject * importFunction(ICodeContext *codeCtx, size32_t lenChars, const char *utf)
  162. {
  163. size32_t bytes = rtlUtf8Size(lenChars, utf);
  164. StringBuffer text(bytes, utf);
  165. if (!prevtext || strcmp(text, prevtext) != 0)
  166. {
  167. prevtext.clear();
  168. // Name should be in the form module.function
  169. const char *funcname = strrchr(text, '.');
  170. if (!funcname)
  171. rtlFail(0, "pyembed: Expected module.function");
  172. addManifestFiles(codeCtx);
  173. StringBuffer modname(funcname-text, text);
  174. funcname++; // skip the '.'
  175. // If the modname is preceded by a path, add it temporarily to the Python path before importing
  176. bool addedPath = false;
  177. PyObject *sysPath = PySys_GetObject((char *) "path");
  178. if (!sysPath)
  179. rtlFail(0, "pyembed: sys.path returned null");
  180. OwnedPyObject newpath;
  181. const char *pathsep = strrchr(modname, PATHSEPCHAR);
  182. if (pathsep)
  183. {
  184. StringBuffer path(pathsep-modname, modname);
  185. modname.remove(0, 1+pathsep-modname);
  186. newpath.setown(PyString_FromString(path));
  187. Py_ssize_t found = PySequence_Index(sysPath, newpath);
  188. if (found == (Py_ssize_t)-1)
  189. {
  190. PyErr_Clear();
  191. PyList_Insert(sysPath, 0, newpath);
  192. addedPath = true;
  193. }
  194. checkPythonError();
  195. }
  196. module.setown(PyImport_ImportModule(modname));
  197. checkPythonError();
  198. if (pathsep)
  199. {
  200. // Immediately remove the temporary location from the path (if we added it),
  201. // and the just-imported module from the system cache,
  202. // otherwise other code that imports similar name from other location fails.
  203. if (addedPath)
  204. {
  205. Py_ssize_t found = PySequence_Index(sysPath, newpath); // Very likely to be zero, but should we assume? You could argue we should restore path to state prior to import, whatever
  206. if (found != (Py_ssize_t)-1)
  207. PySequence_DelItem(sysPath, found);
  208. else
  209. PyErr_Clear();
  210. }
  211. PyObject *sysModules = PySys_GetObject((char *) "modules");
  212. DBGLOG("Unloading module %s", modname.str());
  213. OwnedPyObject pyMod = PyString_FromString(modname);
  214. PyDict_DelItem(sysModules, pyMod);
  215. checkPythonError();
  216. }
  217. PyObject *dict = PyModule_GetDict(module); // this is a borrowed reference and does not need to be released
  218. script.set(PyDict_GetItemString(dict, funcname));
  219. checkPythonError();
  220. if (!script || !PyCallable_Check(script))
  221. rtlFail(0, "pyembed: Object is not callable");
  222. prevtext.set(text);
  223. }
  224. return script.getLink();
  225. }
  226. PyObject *compileEmbeddedScript(ICodeContext *codeCtx, size32_t lenChars, const char *utf, const char *argstring);
  227. PyObject *getNamedTupleType(const RtlTypeInfo *type);
  228. private:
  229. GILstateWrapper GILState;
  230. OwnedPyObject module;
  231. OwnedPyObject script;
  232. OwnedPyObject lru;
  233. const RtlTypeInfo *lrutype;
  234. StringAttr prevtext;
  235. bool manifestAdded = false;
  236. };
  237. static __thread PythonThreadContext* threadContext; // We reuse per thread, for speed
  238. static bool releaseContext(bool isPooled)
  239. {
  240. if (threadContext)
  241. {
  242. delete threadContext;
  243. threadContext = NULL;
  244. }
  245. return false;
  246. }
  247. // Use a global object to ensure that the Python interpreter is initialized on main thread
  248. static HINSTANCE keepLoadedHandle;
  249. static class Python27GlobalState
  250. {
  251. public:
  252. Python27GlobalState()
  253. {
  254. pythonLibrary = (HINSTANCE) 0;
  255. #ifndef _WIN32
  256. // If Py_Initialize is called when stdin is set to a directory, it calls exit()
  257. // We don't want that to happen - just disable Python support in such situations
  258. struct stat sb;
  259. if (fstat(fileno(stdin), &sb) == 0 && S_ISDIR(sb.st_mode))
  260. {
  261. initialized = false;
  262. return;
  263. }
  264. StringBuffer py3modname;
  265. if (findLoadedModule(py3modname, "libpy3embed."))
  266. {
  267. initialized = false;
  268. multiPython = true;
  269. return;
  270. }
  271. #endif
  272. #ifndef _WIN32
  273. // We need to ensure all symbols in the python2.x so are loaded - due to bugs in some distro's python installations
  274. // However this will likely break python3.
  275. // Therefore on systems where both are present, do NOT do this - people using centos systems that suffer from issue
  276. // https://bugs.centos.org/view.php?id=6063 will need to choose which version of python plugin to install but not both
  277. StringBuffer modname;
  278. if (findLoadedModule(modname, "libpython2."))
  279. pythonLibrary = dlopen(modname.str(), RTLD_NOW|RTLD_GLOBAL);
  280. #endif
  281. // Initialize the Python Interpreter
  282. Py_Initialize();
  283. const char *argv[] = { nullptr };
  284. PySys_SetArgvEx(0, (char **) argv, 0);
  285. PyEval_InitThreads();
  286. preservedScopes.setown(PyDict_New());
  287. tstate = PyEval_SaveThread();
  288. skipPythonCleanup = queryEnvironmentConf().getPropBool("skipPythonCleanup", true);
  289. initialized = true;
  290. }
  291. ~Python27GlobalState()
  292. {
  293. if (threadContext)
  294. delete threadContext; // The one on the main thread won't get picked up by the thread hook mechanism
  295. threadContext = NULL;
  296. if (initialized && !skipPythonCleanup)
  297. {
  298. PyEval_RestoreThread(tstate);
  299. // Finish the Python Interpreter
  300. namedtuple.clear();
  301. namedtupleTypes.clear();
  302. compiledScripts.clear();
  303. preservedScopes.clear();
  304. Py_Finalize();
  305. if (pythonLibrary)
  306. FreeSharedObject(pythonLibrary);
  307. }
  308. else
  309. {
  310. // Need to avoid releasing the associated py objects when these members destructors are called.
  311. namedtuple.getClear();
  312. namedtupleTypes.getClear();
  313. compiledScripts.getClear();
  314. preservedScopes.getClear();
  315. }
  316. }
  317. void checkInitialized()
  318. {
  319. if (multiPython)
  320. rtlFail(0, "Python2 not initialized as Python3 already loaded");
  321. else if (!initialized)
  322. rtlFail(0, "Python2 not initialized");
  323. }
  324. bool isInitialized()
  325. {
  326. return initialized;
  327. }
  328. PyFrameObject *pushDummyFrame()
  329. {
  330. PyThreadState* threadstate = PyThreadState_GET();
  331. if (!threadstate->frame)
  332. {
  333. OwnedPyObject globals = PyDict_New();
  334. OwnedPyObject locals = PyDict_New();
  335. OwnedPyObject dummyString = PyString_FromString("Dummy");
  336. OwnedPyObject dummyTuple = PyTuple_New(0);
  337. OwnedPyObject empty = PyString_FromString("");
  338. OwnedPyX<PyCodeObject> code = PyCode_New(0,0,0,0,empty,dummyTuple,dummyTuple,dummyTuple,dummyTuple,dummyTuple,dummyString,dummyString,0,empty);
  339. // OwnedPyX<PyCodeObject> code = PyCode_NewEmpty("<dummy>","<dummy>", 0); // (this would be easier but won't compile in Python 2.6)
  340. checkPythonError();
  341. PyFrameObject *frame = PyFrame_New(threadstate, code, globals, locals);
  342. checkPythonError();
  343. threadstate->frame = frame;
  344. return frame;
  345. }
  346. return NULL;
  347. }
  348. void popDummyFrame(PyFrameObject *frame)
  349. {
  350. PyThreadState* threadstate = PyThreadState_GET();
  351. if (threadstate->frame == frame)
  352. threadstate->frame = NULL;
  353. }
  354. PyObject *getActivityContextTupleType()
  355. {
  356. // Note - we do not need (and must not have) a lock protecting this. It is protected by the Python GIL,
  357. // and if we add our own lock we are liable to deadlock as the code within Py_CompileStringFlags may
  358. // temporarily release then re-acquire the GIL.
  359. if (!activityContextTupleType)
  360. activityContextTupleType.setown(getNamedTupleType("isLocal,numSlaves,numStrands,slave,strand"));
  361. return activityContextTupleType.get();
  362. }
  363. PyObject *getNamedTupleType(const char *names)
  364. {
  365. // It seems the customized namedtuple types leak, and they are slow to create, so take care to reuse
  366. // Note - we do not need (and must not have) a lock protecting this. It is protected by the Python GIL,
  367. // and if we add our own lock we are liable to deadlock as the code within Py_CompileStringFlags may
  368. // temporarily release then re-acquire the GIL.
  369. if (!namedtuple)
  370. {
  371. namedtupleTypes.setown(PyDict_New());
  372. OwnedPyObject pName = PyString_FromString("collections");
  373. OwnedPyObject collections = PyImport_Import(pName);
  374. checkPythonError();
  375. namedtuple.setown(PyObject_GetAttrString(collections, "namedtuple"));
  376. checkPythonError();
  377. assertex(PyCallable_Check(namedtuple));
  378. }
  379. OwnedPyObject pnames = PyString_FromString(names);
  380. OwnedPyObject mynamedtupletype;
  381. checkPythonError();
  382. mynamedtupletype.set(PyDict_GetItem(namedtupleTypes, pnames)); // NOTE - returns borrowed reference
  383. if (!mynamedtupletype)
  384. {
  385. OwnedPyObject recname = PyString_FromString("namerec"); // MORE - do we care what the name is?
  386. OwnedPyObject ntargs = PyTuple_Pack(2, recname.get(), pnames.get());
  387. checkPythonError();
  388. OwnedPyX<PyFrameObject> frame = pushDummyFrame();
  389. mynamedtupletype.setown(PyObject_CallObject(namedtuple, ntargs));
  390. popDummyFrame(frame);
  391. checkPythonError();
  392. PyDict_SetItem(namedtupleTypes, pnames, mynamedtupletype);
  393. }
  394. checkPythonError();
  395. assertex(PyCallable_Check(mynamedtupletype));
  396. return mynamedtupletype.getClear();
  397. }
  398. PyObject *getNamedTupleType(const RtlTypeInfo *type)
  399. {
  400. const RtlFieldInfo * const *fields = type->queryFields();
  401. if (!fields && type->queryChildType())
  402. fields = type->queryChildType()->queryFields();
  403. assertex(fields);
  404. StringBuffer names;
  405. while (*fields)
  406. {
  407. const RtlFieldInfo *field = *fields;
  408. if (names.length())
  409. names.append(',');
  410. names.append(field->name);
  411. fields++;
  412. }
  413. return getNamedTupleType(names.str());
  414. }
  415. StringBuffer & reformatCompilerError(StringBuffer &ret, const char *error, unsigned leadingLines)
  416. {
  417. // Errors from compiler tend to look like this:
  418. // "('invalid syntax', ('<embed>', 3, 12, ' sfsf ss fs dfs f sfs\n'))"
  419. const char pattern [] = "\\('(.*)', \\('.*', ([0-9]*), ([0-9]*), (.*)\\)\\)";
  420. // Hopefully there are no embedded quotes in the error message or the filename
  421. rtlCompiledStrRegex r;
  422. size32_t outlen;
  423. char * out = NULL;
  424. r.setPattern(pattern, false);
  425. r->replace(outlen, out, strlen(error), error, 2, "$2");
  426. if (outlen < strlen(error))
  427. {
  428. unsigned line = atoi(out);
  429. rtlFree(out);
  430. if (line > leadingLines)
  431. line--;
  432. r->replace(outlen, out, strlen(error), error, 13, ", $3): $1: $4");
  433. ret.appendf("(%d", line);
  434. }
  435. ret.append(outlen, out);
  436. rtlFree(out);
  437. return ret;
  438. }
  439. PyObject *compileScript(const char *text, const char *parameters)
  440. {
  441. // Note - we do not need (and must not have) a lock protecting this. It is protected by the Python GIL,
  442. // and if we add our own lock we are liable to deadlock as the code within Py_CompileStringFlags may
  443. // temporarily release then re-acquire the GIL.
  444. if (!compiledScripts)
  445. compiledScripts.setown(PyDict_New());
  446. OwnedPyObject code;
  447. code.set(PyDict_GetItemString(compiledScripts, text));
  448. if (!code)
  449. {
  450. unsigned leadingLines = (unsigned) -1; // Number of lines from input that have not been offset by 1 line in input to compiler
  451. code.setown(Py_CompileString(text, "", Py_eval_input)); // try compiling as simple expression...
  452. if (!code)
  453. {
  454. PyErr_Clear();
  455. PyCompilerFlags flags = { PyCF_SOURCE_IS_UTF8 };
  456. code.setown(Py_CompileStringFlags(text, "<embed>", Py_file_input, &flags)); // try compiling as global code
  457. if (!code)
  458. {
  459. PyErr_Clear();
  460. StringBuffer wrapped;
  461. wrapPythonText(wrapped, text, parameters, leadingLines);
  462. code.setown(Py_CompileStringFlags(wrapped, "<embed>", Py_file_input, &flags)); // try compiling as a function body
  463. }
  464. }
  465. PyObject* err = PyErr_Occurred();
  466. if (err)
  467. {
  468. OwnedPyObject pType, pValue, pTraceBack;
  469. PyErr_Fetch(pType.ref(), pValue.ref(), pTraceBack.ref());
  470. OwnedPyObject valStr = PyObject_Str(pValue);
  471. PyErr_Clear();
  472. // We reformat the error message a little, to make it more helpful
  473. StringBuffer msg;
  474. reformatCompilerError(msg, PyString_AsString(valStr), leadingLines);
  475. rtlFail(0, msg.str());
  476. }
  477. if (code)
  478. PyDict_SetItemString(compiledScripts, text, code);
  479. }
  480. return code.getClear();
  481. }
  482. PyObject *getNamedScope(const char *key, bool &isNew)
  483. {
  484. if (!preservedScopes)
  485. preservedScopes.setown(PyDict_New());
  486. OwnedPyObject scope;
  487. scope.set(PyDict_GetItemString(preservedScopes, key));
  488. if (!scope)
  489. {
  490. scope.setown(PyDict_New());
  491. PyDict_SetItemString(preservedScopes, key, scope);
  492. isNew = true;
  493. }
  494. else
  495. isNew = false;
  496. return scope.getClear();
  497. }
  498. void releaseNamedScope(const char *key)
  499. {
  500. if (preservedScopes)
  501. {
  502. PyDict_DelItemString(preservedScopes, key);
  503. PyErr_Clear(); // Should be present, but ignore the error if it is not
  504. }
  505. }
  506. static void unregister(const char *key);
  507. static void removePath(const char *file);
  508. protected:
  509. static StringBuffer &wrapPythonText(StringBuffer &out, const char *in, const char *params, unsigned &leadingLines)
  510. {
  511. // Complicated by needing to keep future import lines outside defined function
  512. // Per python spec, a future statement must appear near the top of the module. The only lines that can appear before a future statement are:
  513. // the module docstring (if any),
  514. // comments,
  515. // blank lines, and
  516. // other future statements.
  517. // We don't attempt to parse the python to spot these - instead, we pull all lines up to and including the last future statement out to the global scope.
  518. // Because this is a little unsophisticated it will be fooled by code that includes things that look like future statements inside multiline strings.
  519. // I don't care.
  520. StringArray lines;
  521. lines.appendList(in, "\n", false);
  522. RegExpr expr("^ *from +__future__ +import ");
  523. leadingLines = 0;
  524. ForEachItemIn(idx, lines)
  525. {
  526. if (expr.find(lines.item(idx)))
  527. leadingLines = idx+1;
  528. }
  529. for (unsigned leadingLine = 0; leadingLine < leadingLines; leadingLine++)
  530. out.append(lines.item(leadingLine)).append('\n');
  531. out.appendf("def __user__(%s):\n", params);
  532. for (unsigned line = leadingLines; line < lines.length(); line++)
  533. out.append(" ").append(lines.item(line)).append('\n');
  534. out.appendf("__result__ = __user__(%s)\n", params);
  535. return out;
  536. }
  537. PyThreadState *tstate = nullptr;
  538. bool initialized = false;
  539. bool multiPython = false;
  540. bool skipPythonCleanup = true; // Tensorflow seems to often lockup in the python cleanup process.
  541. HINSTANCE pythonLibrary = 0;
  542. OwnedPyObject namedtuple; // collections.namedtuple
  543. OwnedPyObject namedtupleTypes; // dictionary of return values from namedtuple()
  544. OwnedPyObject compiledScripts; // dictionary of previously compiled scripts
  545. OwnedPyObject preservedScopes; // dictionary of preserved scopes
  546. OwnedPyObject activityContextTupleType; // type used for activity context
  547. } globalState;
  548. MODULE_INIT(INIT_PRIORITY_STANDARD)
  549. {
  550. // Make sure we are never dynamically unloaded (as Python may crash if we are)
  551. // we do this by doing a dynamic load of the pyembed library
  552. // This also allows eclcc to be able to use the library for constant folding
  553. #ifdef _WIN32
  554. HINSTANCE me = GetModuleHandle("py2embed");
  555. if (me)
  556. {
  557. char helperLibraryName[_MAX_PATH];
  558. ::GetModuleFileName(me, helperLibraryName, _MAX_PATH);
  559. if (strstr(helperLibraryName, "py2embed"))
  560. {
  561. HINSTANCE h = LoadSharedObject(helperLibraryName, false, false);
  562. DBGLOG("LoadSharedObject returned %p", h);
  563. }
  564. }
  565. #else
  566. StringBuffer modname;
  567. if (findLoadedModule(modname, "libpy2embed"))
  568. {
  569. keepLoadedHandle = LoadSharedObject(modname, false, false);
  570. }
  571. #endif
  572. return true;
  573. }
  574. static void checkThreadContext()
  575. {
  576. if (!threadContext)
  577. {
  578. globalState.checkInitialized();
  579. threadContext = new PythonThreadContext;
  580. addThreadTermFunc(releaseContext);
  581. }
  582. }
  583. void PythonThreadContext::addManifestFiles(ICodeContext *codeCtx)
  584. {
  585. if (codeCtx && !manifestAdded) // MORE - this assumes we never reuse a thread for a different workunit, without the thread termination hooks having been called
  586. {
  587. manifestAdded = true;
  588. IEngineContext *engine = codeCtx->queryEngineContext();
  589. if (engine)
  590. {
  591. StringArray manifestModules;
  592. engine->getManifestFiles("pyzip", manifestModules);
  593. if (manifestModules.length())
  594. {
  595. PyObject *sysPath = PySys_GetObject((char *) "path");
  596. if (!sysPath)
  597. rtlFail(0, "pyembed: sys.path returned null");
  598. ForEachItemIn(idx, manifestModules)
  599. {
  600. const char *path = manifestModules.item(idx);
  601. DBGLOG("Manifest zip %s", path);
  602. OwnedPyObject newPath = PyString_FromString(path);
  603. PyList_Insert(sysPath, 0, newPath);
  604. checkPythonError();
  605. engine->onTermination(Python27GlobalState::removePath, manifestModules.item(idx), true);
  606. }
  607. }
  608. }
  609. }
  610. }
  611. PyObject *PythonThreadContext::getNamedTupleType(const RtlTypeInfo *type)
  612. {
  613. if (!lru || (type!=lrutype))
  614. {
  615. lru.setown(globalState.getNamedTupleType(type));
  616. lrutype = type;
  617. }
  618. return lru.getLink();
  619. }
  620. PyObject *PythonThreadContext::compileEmbeddedScript(ICodeContext *codeCtx, size32_t lenChars, const char *utf, const char *argstring)
  621. {
  622. size32_t bytes = rtlUtf8Size(lenChars, utf);
  623. StringBuffer text(bytes, utf);
  624. if (!prevtext || strcmp(text, prevtext) != 0)
  625. {
  626. prevtext.clear();
  627. text.stripChar('\r');
  628. addManifestFiles(codeCtx);
  629. script.setown(globalState.compileScript(text, argstring));
  630. prevtext.set(utf, bytes);
  631. }
  632. return script.getLink();
  633. }
  634. // Conversions from Python objects to ECL data
  635. __declspec(noreturn) static void typeError(const char *expected, const RtlFieldInfo *field) __attribute__((noreturn));
  636. static void typeError(const char *expected, const RtlFieldInfo *field)
  637. {
  638. VStringBuffer msg("pyembed: type mismatch - %s expected", expected);
  639. if (field)
  640. msg.appendf(" for field %s", field->name);
  641. else
  642. msg.appendf(" for return value");
  643. rtlFail(0, msg.str());
  644. }
  645. static bool getBooleanResult(const RtlFieldInfo *field, PyObject *obj)
  646. {
  647. if (obj && obj != Py_None)
  648. {
  649. if (PyBool_Check(obj))
  650. return obj == Py_True;
  651. }
  652. typeError("boolean", field);
  653. }
  654. static void getDataResult(const RtlFieldInfo *field, PyObject *obj, size32_t &chars, void * &result)
  655. {
  656. if (obj && obj != Py_None && PyByteArray_Check(obj))
  657. rtlStrToDataX(chars, result, PyByteArray_Size(obj), PyByteArray_AsString(obj));
  658. else
  659. typeError("bytearray", field);
  660. }
  661. static double getRealResult(const RtlFieldInfo *field, PyObject *obj)
  662. {
  663. if (obj && obj != Py_None)
  664. {
  665. if (PyFloat_Check(obj))
  666. return PyFloat_AsDouble(obj);
  667. }
  668. typeError("real", field);
  669. }
  670. static __int64 getSignedResult(const RtlFieldInfo *field, PyObject *obj)
  671. {
  672. if (obj && obj != Py_None)
  673. {
  674. if (PyInt_Check(obj))
  675. return PyInt_AsUnsignedLongLongMask(obj);
  676. else if (PyLong_Check(obj))
  677. return (__int64) PyLong_AsLongLong(obj);
  678. }
  679. typeError("integer", field);
  680. }
  681. static unsigned __int64 getUnsignedResult(const RtlFieldInfo *field, PyObject *obj)
  682. {
  683. if (obj && obj != Py_None)
  684. {
  685. if (PyInt_Check(obj))
  686. return PyInt_AsUnsignedLongLongMask(obj);
  687. else if (PyLong_Check(obj))
  688. return (unsigned __int64) PyLong_AsUnsignedLongLong(obj);
  689. }
  690. typeError("integer", field);
  691. }
  692. static void getStringResult(const RtlFieldInfo *field, PyObject *obj, size32_t &chars, char * &result)
  693. {
  694. if (obj && obj != Py_None && PyString_Check(obj))
  695. {
  696. const char * text = PyString_AsString(obj);
  697. checkPythonError();
  698. size_t lenBytes = PyString_Size(obj);
  699. rtlStrToStrX(chars, result, lenBytes, text);
  700. }
  701. else
  702. typeError("string", field);
  703. }
  704. static void getUTF8Result(const RtlFieldInfo *field, PyObject *obj, size32_t &chars, char * &result)
  705. {
  706. if (obj && obj != Py_None && PyUnicode_Check(obj))
  707. {
  708. OwnedPyObject utf8 = PyUnicode_AsUTF8String(obj);
  709. checkPythonError();
  710. size_t lenBytes = PyString_Size(utf8);
  711. const char * text = PyString_AsString(utf8);
  712. checkPythonError();
  713. size32_t numchars = rtlUtf8Length(lenBytes, text);
  714. rtlUtf8ToUtf8X(chars, result, numchars, text);
  715. }
  716. else
  717. typeError("unicode string", field);
  718. }
  719. static void getSetResult(PyObject *obj, bool & isAllResult, size32_t & resultBytes, void * & result, int elemType, size32_t elemSize)
  720. {
  721. // MORE - should probably recode to use the getResultDataset mechanism
  722. if (!obj || obj == Py_None || (!PyList_Check(obj) && !PySet_Check(obj)))
  723. rtlFail(0, "pyembed: type mismatch - list or set expected");
  724. rtlRowBuilder out;
  725. size32_t outBytes = 0;
  726. byte *outData = NULL;
  727. OwnedPyObject iter = PyObject_GetIter(obj);
  728. OwnedPyObject elem;
  729. for (elem.setown(PyIter_Next(iter)); elem != NULL; elem.setown(PyIter_Next(iter)))
  730. {
  731. if (elemSize != UNKNOWN_LENGTH)
  732. {
  733. out.ensureAvailable(outBytes + elemSize);
  734. outData = out.getbytes() + outBytes;
  735. outBytes += elemSize;
  736. }
  737. switch ((type_t) elemType)
  738. {
  739. case type_int:
  740. rtlWriteInt(outData, py2embed::getSignedResult(NULL, elem), elemSize);
  741. break;
  742. case type_unsigned:
  743. rtlWriteInt(outData, py2embed::getUnsignedResult(NULL, elem), elemSize);
  744. break;
  745. case type_real:
  746. if (elemSize == sizeof(double))
  747. * (double *) outData = (double) py2embed::getRealResult(NULL, elem);
  748. else
  749. {
  750. assertex(elemSize == sizeof(float));
  751. * (float *) outData = (float) py2embed::getRealResult(NULL, elem);
  752. }
  753. break;
  754. case type_boolean:
  755. assertex(elemSize == sizeof(bool));
  756. * (bool *) outData = py2embed::getBooleanResult(NULL, elem);
  757. break;
  758. case type_string:
  759. case type_varstring:
  760. {
  761. if (!PyString_Check(elem))
  762. rtlFail(0, "pyembed: type mismatch - return value in list was not a STRING");
  763. const char * text = PyString_AsString(elem);
  764. checkPythonError();
  765. size_t lenBytes = PyString_Size(elem);
  766. if (elemSize == UNKNOWN_LENGTH)
  767. {
  768. if (elemType == type_string)
  769. {
  770. out.ensureAvailable(outBytes + lenBytes + sizeof(size32_t));
  771. outData = out.getbytes() + outBytes;
  772. * (size32_t *) outData = lenBytes;
  773. rtlStrToStr(lenBytes, outData+sizeof(size32_t), lenBytes, text);
  774. outBytes += lenBytes + sizeof(size32_t);
  775. }
  776. else
  777. {
  778. out.ensureAvailable(outBytes + lenBytes + 1);
  779. outData = out.getbytes() + outBytes;
  780. rtlStrToVStr(0, outData, lenBytes, text);
  781. outBytes += lenBytes + 1;
  782. }
  783. }
  784. else
  785. {
  786. if (elemType == type_string)
  787. rtlStrToStr(elemSize, outData, lenBytes, text);
  788. else
  789. rtlStrToVStr(elemSize, outData, lenBytes, text); // Fixed size null terminated strings... weird.
  790. }
  791. break;
  792. }
  793. case type_unicode:
  794. case type_utf8:
  795. {
  796. if (!PyUnicode_Check(elem))
  797. rtlFail(0, "pyembed: type mismatch - return value in list was not a unicode STRING");
  798. OwnedPyObject utf8 = PyUnicode_AsUTF8String(elem);
  799. checkPythonError();
  800. size_t lenBytes = PyString_Size(utf8);
  801. const char * text = PyString_AsString(utf8);
  802. checkPythonError();
  803. size32_t numchars = rtlUtf8Length(lenBytes, text);
  804. if (elemType == type_utf8)
  805. {
  806. assertex (elemSize == UNKNOWN_LENGTH);
  807. out.ensureAvailable(outBytes + lenBytes + sizeof(size32_t));
  808. outData = out.getbytes() + outBytes;
  809. * (size32_t *) outData = numchars;
  810. rtlStrToStr(lenBytes, outData+sizeof(size32_t), lenBytes, text);
  811. outBytes += lenBytes + sizeof(size32_t);
  812. }
  813. else
  814. {
  815. if (elemSize == UNKNOWN_LENGTH)
  816. {
  817. out.ensureAvailable(outBytes + numchars*sizeof(UChar) + sizeof(size32_t));
  818. outData = out.getbytes() + outBytes;
  819. // You can't assume that number of chars in utf8 matches number in unicode16 ...
  820. size32_t numchars16;
  821. rtlDataAttr unicode16;
  822. rtlUtf8ToUnicodeX(numchars16, unicode16.refustr(), numchars, text);
  823. * (size32_t *) outData = numchars16;
  824. rtlUnicodeToUnicode(numchars16, (UChar *) (outData+sizeof(size32_t)), numchars16, unicode16.getustr());
  825. outBytes += numchars16*sizeof(UChar) + sizeof(size32_t);
  826. }
  827. else
  828. rtlUtf8ToUnicode(elemSize / sizeof(UChar), (UChar *) outData, numchars, text);
  829. }
  830. break;
  831. }
  832. case type_data:
  833. {
  834. if (!PyByteArray_Check(elem))
  835. rtlFail(0, "pyembed: type mismatch - return value in list was not a bytearray");
  836. size_t lenBytes = PyByteArray_Size(elem); // Could check does not overflow size32_t
  837. const char *data = PyByteArray_AsString(elem);
  838. if (elemSize == UNKNOWN_LENGTH)
  839. {
  840. out.ensureAvailable(outBytes + lenBytes + sizeof(size32_t));
  841. outData = out.getbytes() + outBytes;
  842. * (size32_t *) outData = lenBytes;
  843. rtlStrToData(lenBytes, outData+sizeof(size32_t), lenBytes, data);
  844. outBytes += lenBytes + sizeof(size32_t);
  845. }
  846. else
  847. rtlStrToData(elemSize, outData, lenBytes, data);
  848. break;
  849. }
  850. default:
  851. rtlFail(0, "pyembed: type mismatch - unsupported return type");
  852. break;
  853. }
  854. checkPythonError();
  855. }
  856. isAllResult = false;
  857. resultBytes = outBytes;
  858. result = out.detachdata();
  859. }
  860. static void getUnicodeResult(const RtlFieldInfo *field, PyObject *obj, size32_t &chars, UChar * &result)
  861. {
  862. if (obj && obj != Py_None && PyUnicode_Check(obj))
  863. {
  864. OwnedPyObject utf8 = PyUnicode_AsUTF8String(obj);
  865. checkPythonError();
  866. size_t lenBytes = PyString_Size(utf8);
  867. const char * text = PyString_AsString(utf8);
  868. checkPythonError();
  869. size32_t numchars = rtlUtf8Length(lenBytes, text);
  870. rtlUtf8ToUnicodeX(chars, result, numchars, text);
  871. }
  872. else
  873. typeError("unicode string", field);
  874. }
  875. // A PythonRowBuilder object is used to construct an ECL row from a python object
  876. class PythonRowBuilder : public CInterfaceOf<IFieldSource>
  877. {
  878. public:
  879. PythonRowBuilder(PyObject *_row)
  880. : iter(NULL), elem(NULL), named(false)
  881. {
  882. pushback.set(_row);
  883. }
  884. virtual bool getBooleanResult(const RtlFieldInfo *field)
  885. {
  886. nextField(field);
  887. return py2embed::getBooleanResult(field, elem);
  888. }
  889. virtual void getDataResult(const RtlFieldInfo *field, size32_t &len, void * &result)
  890. {
  891. nextField(field);
  892. py2embed::getDataResult(field, elem, len, result);
  893. }
  894. virtual double getRealResult(const RtlFieldInfo *field)
  895. {
  896. nextField(field);
  897. return py2embed::getRealResult(field, elem);
  898. }
  899. virtual __int64 getSignedResult(const RtlFieldInfo *field)
  900. {
  901. nextField(field);
  902. return py2embed::getSignedResult(field, elem);
  903. }
  904. virtual unsigned __int64 getUnsignedResult(const RtlFieldInfo *field)
  905. {
  906. nextField(field);
  907. return py2embed::getUnsignedResult(field, elem);
  908. }
  909. virtual void getStringResult(const RtlFieldInfo *field, size32_t &chars, char * &result)
  910. {
  911. nextField(field);
  912. py2embed::getStringResult(field, elem, chars, result);
  913. }
  914. virtual void getUTF8Result(const RtlFieldInfo *field, size32_t &chars, char * &result)
  915. {
  916. nextField(field);
  917. py2embed::getUTF8Result(field, elem, chars, result);
  918. }
  919. virtual void getUnicodeResult(const RtlFieldInfo *field, size32_t &chars, UChar * &result)
  920. {
  921. nextField(field);
  922. py2embed::getUnicodeResult(field, elem, chars, result);
  923. }
  924. virtual void getDecimalResult(const RtlFieldInfo *field, Decimal &value)
  925. {
  926. nextField(field);
  927. double ret = py2embed::getRealResult(field, elem);
  928. value.setReal(ret);
  929. }
  930. virtual void processBeginSet(const RtlFieldInfo * field, bool &isAll)
  931. {
  932. nextField(field);
  933. isAll = false; // No concept of an 'all' set in Python
  934. if (!elem || elem == Py_None || (!PyList_Check(elem) && !PySet_Check(elem)))
  935. typeError("list or set", field);
  936. push();
  937. }
  938. virtual bool processNextSet(const RtlFieldInfo * field)
  939. {
  940. nextField(NULL);
  941. pushback.setown(elem.getClear());
  942. return pushback != NULL;
  943. }
  944. virtual void processBeginDataset(const RtlFieldInfo * field)
  945. {
  946. nextField(field);
  947. if (!PyList_Check(elem))
  948. typeError("list", field);
  949. push();
  950. }
  951. virtual void processBeginRow(const RtlFieldInfo * field)
  952. {
  953. // Expect to see a tuple here, or possibly (if the ECL record has a single field), an arbitrary scalar object
  954. // If it's a tuple, we push it onto our stack as the active object
  955. nextField(field);
  956. if (!PyTuple_Check(elem))
  957. {
  958. if (countFields(field->type->queryFields())==1)
  959. {
  960. // Python doesn't seem to support the concept of a tuple containing a single element.
  961. // If we are expecting a single field in our row, then the 'tuple' layer will be missing
  962. elem.setown(PyTuple_Pack(1, elem.get()));
  963. }
  964. else
  965. typeError("tuple", field);
  966. }
  967. push();
  968. }
  969. virtual bool processNextRow(const RtlFieldInfo * field)
  970. {
  971. nextField(NULL);
  972. pushback.setown(elem.getClear());
  973. return pushback != NULL;
  974. }
  975. virtual void processEndSet(const RtlFieldInfo * field)
  976. {
  977. pop();
  978. }
  979. virtual void processEndDataset(const RtlFieldInfo * field)
  980. {
  981. pop();
  982. }
  983. virtual void processEndRow(const RtlFieldInfo * field)
  984. {
  985. pop();
  986. }
  987. protected:
  988. void pop()
  989. {
  990. iter.setown((PyObject *) iterStack.popGet());
  991. parent.setown((PyObject *) parentStack.popGet());
  992. named = namedStack.popGet();
  993. elem.clear();
  994. }
  995. void push()
  996. {
  997. iterStack.append(iter.getClear());
  998. parentStack.append(parent.getClear());
  999. namedStack.append(named);
  1000. parent.set(elem);
  1001. iter.setown(PyObject_GetIter(elem));
  1002. named = isNamedTuple(elem);
  1003. elem.clear();
  1004. }
  1005. bool isNamedTuple(PyObject *obj)
  1006. {
  1007. return PyObject_HasAttrString((PyObject *) obj->ob_type, "_fields");
  1008. }
  1009. void nextField(const RtlFieldInfo * field)
  1010. {
  1011. if (pushback)
  1012. elem.setown(pushback.getClear());
  1013. else if (field && named) // If it's named tuple, expect to always resolve fields by name, not position
  1014. {
  1015. elem.setown(PyObject_GetAttrString(parent, field->name));
  1016. }
  1017. else if (iter)
  1018. elem.setown(PyIter_Next(iter));
  1019. else
  1020. elem = NULL;
  1021. checkPythonError();
  1022. }
  1023. OwnedPyObject iter;
  1024. OwnedPyObject pushback;
  1025. OwnedPyObject elem;
  1026. OwnedPyObject parent;
  1027. bool named;
  1028. PointerArray iterStack;
  1029. PointerArray parentStack;
  1030. BoolArray namedStack;
  1031. };
  1032. static size32_t getRowResult(PyObject *result, ARowBuilder &builder)
  1033. {
  1034. PythonRowBuilder pyRowBuilder(result);
  1035. const RtlTypeInfo *typeInfo = builder.queryAllocator()->queryOutputMeta()->queryTypeInfo();
  1036. assertex(typeInfo);
  1037. RtlFieldStrInfo dummyField("<row>", NULL, typeInfo);
  1038. return typeInfo->build(builder, 0, &dummyField, pyRowBuilder);
  1039. }
  1040. // A PythonNamedTupleBuilder object is used to construct a Python named tuple from an ECL row
  1041. class PythonNamedTupleBuilder : public CInterfaceOf<IFieldProcessor>
  1042. {
  1043. public:
  1044. PythonNamedTupleBuilder(PythonThreadContext *_sharedCtx, const RtlFieldInfo *_outerRow)
  1045. : outerRow(_outerRow), sharedCtx(_sharedCtx)
  1046. {
  1047. }
  1048. virtual void processString(unsigned len, const char *value, const RtlFieldInfo * field)
  1049. {
  1050. addArg(PyString_FromStringAndSize(value, len));
  1051. }
  1052. virtual void processBool(bool value, const RtlFieldInfo * field)
  1053. {
  1054. addArg(PyBool_FromLong(value ? 1 : 0));
  1055. }
  1056. virtual void processData(unsigned len, const void *value, const RtlFieldInfo * field)
  1057. {
  1058. addArg(PyByteArray_FromStringAndSize((const char *) value, len));
  1059. }
  1060. virtual void processInt(__int64 value, const RtlFieldInfo * field)
  1061. {
  1062. addArg(PyLong_FromLongLong(value));
  1063. }
  1064. virtual void processUInt(unsigned __int64 value, const RtlFieldInfo * field)
  1065. {
  1066. addArg(PyLong_FromUnsignedLongLong(value));
  1067. }
  1068. virtual void processReal(double value, const RtlFieldInfo * field)
  1069. {
  1070. addArg(PyFloat_FromDouble(value));
  1071. }
  1072. virtual void processDecimal(const void *value, unsigned digits, unsigned precision, const RtlFieldInfo * field)
  1073. {
  1074. Decimal val;
  1075. val.setDecimal(digits, precision, value);
  1076. addArg(PyFloat_FromDouble(val.getReal()));
  1077. }
  1078. virtual void processUDecimal(const void *value, unsigned digits, unsigned precision, const RtlFieldInfo * field)
  1079. {
  1080. Decimal val;
  1081. val.setUDecimal(digits, precision, value);
  1082. addArg(PyFloat_FromDouble(val.getReal()));
  1083. }
  1084. virtual void processUnicode(unsigned len, const UChar *value, const RtlFieldInfo * field)
  1085. {
  1086. // You don't really know what size Py_UNICODE is (varies from system to system), so go via utf8
  1087. unsigned unicodeChars;
  1088. rtlDataAttr unicode;
  1089. rtlUnicodeToUtf8X(unicodeChars, unicode.refstr(), len, value);
  1090. processUtf8(unicodeChars, unicode.getstr(), field);
  1091. }
  1092. virtual void processQString(unsigned len, const char *value, const RtlFieldInfo * field)
  1093. {
  1094. size32_t charCount;
  1095. rtlDataAttr text;
  1096. rtlQStrToStrX(charCount, text.refstr(), len, value);
  1097. processString(charCount, text.getstr(), field);
  1098. }
  1099. virtual void processUtf8(unsigned len, const char *value, const RtlFieldInfo * field)
  1100. {
  1101. size32_t sizeBytes = rtlUtf8Size(len, value);
  1102. PyObject *vval = PyUnicode_FromStringAndSize(value, sizeBytes); // NOTE - requires size in bytes not chars
  1103. checkPythonError();
  1104. addArg(vval);
  1105. }
  1106. virtual bool processBeginSet(const RtlFieldInfo * field, unsigned numElements, bool isAll, const byte *data)
  1107. {
  1108. push();
  1109. if (isAll)
  1110. rtlFail(0, "pyembed: ALL sets are not supported");
  1111. return true;
  1112. }
  1113. virtual bool processBeginDataset(const RtlFieldInfo * field, unsigned numRows)
  1114. {
  1115. push();
  1116. return true;
  1117. }
  1118. virtual bool processBeginRow(const RtlFieldInfo * field)
  1119. {
  1120. if (field != outerRow)
  1121. push();
  1122. return true;
  1123. }
  1124. virtual void processEndSet(const RtlFieldInfo * field)
  1125. {
  1126. pop();
  1127. }
  1128. virtual void processEndDataset(const RtlFieldInfo * field)
  1129. {
  1130. pop();
  1131. }
  1132. virtual void processEndRow(const RtlFieldInfo * field)
  1133. {
  1134. if (field != outerRow)
  1135. {
  1136. args.setown(getTuple(field->type));
  1137. pop();
  1138. }
  1139. }
  1140. PyObject *getTuple(const RtlTypeInfo *type)
  1141. {
  1142. OwnedPyObject mynamedtupletype = sharedCtx ? sharedCtx->getNamedTupleType(type) : globalState.getNamedTupleType(type);
  1143. OwnedPyObject argsTuple = PyList_AsTuple(args);
  1144. OwnedPyObject mynamedtuple = PyObject_CallObject(mynamedtupletype, argsTuple); // Creates a namedtuple from the supplied tuple
  1145. checkPythonError();
  1146. return mynamedtuple.getClear();
  1147. }
  1148. protected:
  1149. void push()
  1150. {
  1151. stack.append(args.getClear());
  1152. args.setown(PyList_New(0));
  1153. }
  1154. void pop()
  1155. {
  1156. OwnedPyObject arg = args.getClear();
  1157. args.setown((PyObject *) stack.popGet());
  1158. addArg(arg.getClear());
  1159. }
  1160. void addArg(PyObject *arg)
  1161. {
  1162. if (!args)
  1163. {
  1164. args.setown(PyList_New(0));
  1165. }
  1166. PyList_Append(args, arg);
  1167. Py_DECREF(arg);
  1168. }
  1169. OwnedPyObject args;
  1170. PointerArray stack;
  1171. const RtlFieldInfo *outerRow;
  1172. PythonThreadContext *sharedCtx;
  1173. };
  1174. //----------------------------------------------------------------------
  1175. // GILBlock ensures the we hold the Python "Global interpreter lock" for the appropriate duration
  1176. class GILBlock
  1177. {
  1178. public:
  1179. GILBlock(PyThreadState * &_state) : state(_state)
  1180. {
  1181. PyEval_RestoreThread(state);
  1182. }
  1183. ~GILBlock()
  1184. {
  1185. state = PyEval_SaveThread();
  1186. }
  1187. private:
  1188. PyThreadState * &state;
  1189. };
  1190. // GILUnblock ensures the we release the Python "Global interpreter lock" for the appropriate duration
  1191. class GILUnblock
  1192. {
  1193. public:
  1194. GILUnblock()
  1195. {
  1196. state = PyEval_SaveThread();
  1197. }
  1198. ~GILUnblock()
  1199. {
  1200. PyEval_RestoreThread(state);
  1201. }
  1202. private:
  1203. PyThreadState *state;
  1204. };
  1205. //----------------------------------------------------------------------
  1206. // Wrap an IRowStream into a Python generator
  1207. struct ECLDatasetIterator
  1208. {
  1209. PyObject_HEAD;
  1210. const RtlTypeInfo *typeInfo; // Not linked (or linkable)
  1211. IRowStream * val; // Linked
  1212. };
  1213. PyObject* ECLDatasetIterator_iter(PyObject *self)
  1214. {
  1215. Py_INCREF(self);
  1216. return self;
  1217. }
  1218. void ECLDatasetIterator_dealloc(PyObject *self)
  1219. {
  1220. ECLDatasetIterator *p = (ECLDatasetIterator *)self;
  1221. if (p->val)
  1222. {
  1223. GILUnblock b;
  1224. p->val->stop();
  1225. ::Release(p->val);
  1226. p->val = NULL;
  1227. }
  1228. self->ob_type->tp_free(self);
  1229. }
  1230. PyObject* ECLDatasetIterator_iternext(PyObject *self)
  1231. {
  1232. ECLDatasetIterator *p = (ECLDatasetIterator *)self;
  1233. roxiemem::OwnedConstRoxieRow nextRow;
  1234. if (p->val)
  1235. {
  1236. GILUnblock b;
  1237. nextRow.setown(p->val->ungroupedNextRow());
  1238. if (!nextRow)
  1239. {
  1240. p->val->stop();
  1241. ::Release(p->val);
  1242. p->val = NULL;
  1243. }
  1244. }
  1245. if (p->val)
  1246. {
  1247. RtlFieldStrInfo dummyField("<row>", NULL, p->typeInfo);
  1248. PythonNamedTupleBuilder tupleBuilder(NULL, &dummyField);
  1249. const byte *brow = (const byte *) nextRow.get();
  1250. p->typeInfo->process(brow, brow, &dummyField, tupleBuilder);
  1251. return tupleBuilder.getTuple(p->typeInfo);
  1252. }
  1253. else
  1254. {
  1255. // If we get here, it's EOF
  1256. PyErr_SetNone(PyExc_StopIteration);
  1257. return NULL;
  1258. }
  1259. }
  1260. static PyTypeObject ECLDatasetIteratorType =
  1261. {
  1262. PyObject_HEAD_INIT(NULL)
  1263. 0, /*ob_size*/
  1264. "ECLDatasetIterator._MyIter", /*tp_name*/
  1265. sizeof(ECLDatasetIterator), /*tp_basicsize*/
  1266. 0, /*tp_itemsize*/
  1267. ECLDatasetIterator_dealloc, /*tp_dealloc*/
  1268. 0, /*tp_print*/
  1269. 0, /*tp_getattr*/
  1270. 0, /*tp_setattr*/
  1271. 0, /*tp_compare*/
  1272. 0, /*tp_repr*/
  1273. 0, /*tp_as_number*/
  1274. 0, /*tp_as_sequence*/
  1275. 0, /*tp_as_mapping*/
  1276. 0, /*tp_hash */
  1277. 0, /*tp_call*/
  1278. 0, /*tp_str*/
  1279. 0, /*tp_getattro*/
  1280. 0, /*tp_setattro*/
  1281. 0, /*tp_as_buffer*/
  1282. Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER, /* tp_flags: tell python to use tp_iter and tp_iternext fields. */
  1283. "ECL dataset iterator object.", /* tp_doc */
  1284. 0, /* tp_traverse */
  1285. 0, /* tp_clear */
  1286. 0, /* tp_richcompare */
  1287. 0, /* tp_weaklistoffset */
  1288. ECLDatasetIterator_iter, /* tp_iter: __iter__() method */
  1289. ECLDatasetIterator_iternext /* tp_iternext: next() method */
  1290. };
  1291. static PyObject *createECLDatasetIterator(const RtlTypeInfo *_typeInfo, IRowStream * _val)
  1292. {
  1293. ECLDatasetIteratorType.tp_new = PyType_GenericNew;
  1294. if (PyType_Ready(&ECLDatasetIteratorType) < 0) return NULL;
  1295. ECLDatasetIterator *p = PyObject_New(ECLDatasetIterator, &ECLDatasetIteratorType);
  1296. if (!p)
  1297. {
  1298. checkPythonError();
  1299. rtlFail(0, "pyembed: failed to create dataset iterator");
  1300. }
  1301. p->typeInfo = _typeInfo;
  1302. p->val = _val;
  1303. return (PyObject *)p;
  1304. }
  1305. //-----------------------------------------------------
  1306. void Python27GlobalState::unregister(const char *key)
  1307. {
  1308. checkThreadContext();
  1309. GILBlock b(threadContext->threadState);
  1310. globalState.releaseNamedScope(key);
  1311. }
  1312. void Python27GlobalState::removePath(const char *path)
  1313. {
  1314. checkThreadContext();
  1315. GILBlock b(threadContext->threadState);
  1316. // Remove a manifest file from the Python path, and remove from sys.modules any modules loaded from that location
  1317. PyObject *sysPath = PySys_GetObject((char *) "path");
  1318. if (sysPath)
  1319. {
  1320. OwnedPyObject newPath = PyString_FromString(path);
  1321. Py_ssize_t found = PySequence_Index(sysPath, newPath);
  1322. if (found != (Py_ssize_t)-1)
  1323. {
  1324. PySequence_DelItem(sysPath, found);
  1325. checkPythonError();
  1326. }
  1327. else
  1328. PyErr_Clear();
  1329. PyObject *sysModules = PySys_GetObject((char *) "modules");
  1330. checkPythonError();
  1331. OwnedPyObject values = PyDict_Values(sysModules);
  1332. checkPythonError();
  1333. Py_ssize_t len = PyList_Size(values);
  1334. size_t pathLen = strlen(path);
  1335. for (Py_ssize_t idx = 0; idx < len; idx++)
  1336. {
  1337. PyObject *module = PyList_GetItem(values, idx);
  1338. if (PyObject_HasAttrString(module, "__file__"))
  1339. {
  1340. OwnedPyObject file = PyObject_GetAttrString(module, "__file__");
  1341. if (file && PyString_Check(file))
  1342. {
  1343. const char *fileName = PyString_AsString(file);
  1344. if (strncmp(fileName, path, pathLen)==0)
  1345. {
  1346. OwnedPyObject modname = PyObject_GetAttrString(module, "__name__");
  1347. DBGLOG("Unloading module %s", fileName);
  1348. PyDict_DelItem(sysModules, modname);
  1349. }
  1350. }
  1351. }
  1352. checkPythonError();
  1353. }
  1354. }
  1355. }
  1356. // A Python function that returns a dataset will return a PythonRowStream object that can be
  1357. // interrogated to return each row of the result in turn
  1358. class PythonRowStream : public CInterfaceOf<IRowStream>
  1359. {
  1360. public:
  1361. PythonRowStream(PyObject *result, IEngineRowAllocator *_resultAllocator)
  1362. : resultIterator(NULL)
  1363. {
  1364. // NOTE - the caller should already have the GIL lock before creating me
  1365. if (!result || result == Py_None)
  1366. typeError("list or generator", NULL);
  1367. resultIterator.setown(PyObject_GetIter(result)); // We allow anything that is iterable to be returned for a row stream
  1368. checkPythonError();
  1369. resultAllocator.set(_resultAllocator);
  1370. }
  1371. ~PythonRowStream()
  1372. {
  1373. if (resultIterator)
  1374. {
  1375. checkThreadContext();
  1376. GILBlock b(threadContext->threadState);
  1377. resultIterator.clear();
  1378. }
  1379. }
  1380. virtual const void *nextRow()
  1381. {
  1382. checkThreadContext();
  1383. GILBlock b(threadContext->threadState);
  1384. if (!resultIterator)
  1385. return NULL;
  1386. OwnedPyObject row = PyIter_Next(resultIterator);
  1387. checkPythonError();
  1388. if (!row)
  1389. return NULL;
  1390. RtlDynamicRowBuilder rowBuilder(resultAllocator);
  1391. size32_t len = py2embed::getRowResult(row, rowBuilder);
  1392. checkPythonError();
  1393. return rowBuilder.finalizeRowClear(len);
  1394. }
  1395. virtual void stop()
  1396. {
  1397. checkThreadContext();
  1398. GILBlock b(threadContext->threadState);
  1399. resultAllocator.clear();
  1400. resultIterator.clear();
  1401. }
  1402. protected:
  1403. Linked<IEngineRowAllocator> resultAllocator;
  1404. OwnedPyObject resultIterator;
  1405. };
  1406. // Each call to a Python function will use a new Python27EmbedFunctionContext object
  1407. // This takes care of ensuring that the Python GIL is locked while we are executing python code,
  1408. // and released when we are not
  1409. class Python27EmbedContextBase : public CInterfaceOf<IEmbedFunctionContext>
  1410. {
  1411. public:
  1412. Python27EmbedContextBase(PythonThreadContext *_sharedCtx, ICodeContext *_codeCtx)
  1413. : sharedCtx(_sharedCtx), codeCtx(_codeCtx)
  1414. {
  1415. PyEval_RestoreThread(sharedCtx->threadState);
  1416. }
  1417. virtual void setActivityOptions(const IThorActivityContext *ctx)
  1418. {
  1419. OwnedPyObject mynamedtupletype = globalState.getActivityContextTupleType();
  1420. OwnedPyObject args = PyTuple_New(5);
  1421. OwnedPyObject isLocal;
  1422. isLocal.set(ctx->isLocal() ? Py_True : Py_False);
  1423. PyTuple_SET_ITEM((PyTupleObject *) args.get(), 0, isLocal.getClear());
  1424. PyTuple_SET_ITEM((PyTupleObject *) args.get(), 1, PyInt_FromLong(ctx->numSlaves()));
  1425. PyTuple_SET_ITEM((PyTupleObject *) args.get(), 2, PyInt_FromLong(ctx->numStrands()));
  1426. PyTuple_SET_ITEM((PyTupleObject *) args.get(), 3, PyInt_FromLong(ctx->querySlave()));
  1427. PyTuple_SET_ITEM((PyTupleObject *) args.get(), 4, PyInt_FromLong(ctx->queryStrand()));
  1428. OwnedPyObject activityTuple = PyObject_CallObject(mynamedtupletype, args); // Creates a namedtuple from the supplied tuple
  1429. checkPythonError();
  1430. PyDict_SetItemString(locals, "__activity__", activityTuple.getClear());
  1431. checkPythonError();
  1432. }
  1433. void setScopes(ICodeContext *codeCtx, const char *_options)
  1434. {
  1435. locals.setown(PyDict_New());
  1436. StringArray options;
  1437. options.appendList(_options, ",");
  1438. StringBuffer scopeKey;
  1439. const char *scopeKey2 = nullptr;
  1440. bool registerCallback = false;
  1441. bool wuidScope = false;
  1442. IEngineContext *engine = nullptr;
  1443. ForEachItemIn(idx, options)
  1444. {
  1445. const char *opt = options.item(idx);
  1446. const char *val = strchr(opt, '=');
  1447. if (val)
  1448. {
  1449. StringBuffer optName(val-opt, opt);
  1450. val++;
  1451. if (strieq(optName, "globalscope"))
  1452. scopeKey2 = val;
  1453. else if (strieq(optName, "persist"))
  1454. {
  1455. if (scopeKey.length())
  1456. failx("persist option specified more than once");
  1457. if (strieq(val, "global"))
  1458. scopeKey.append("global");
  1459. else if (strieq(val, "workunit"))
  1460. {
  1461. engine = codeCtx->queryEngineContext();
  1462. wuidScope = true;
  1463. if (!engine)
  1464. failx("Persist mode 'workunit' not supported here");
  1465. }
  1466. else if (strieq(val, "query"))
  1467. {
  1468. engine = codeCtx->queryEngineContext();
  1469. wuidScope = false;
  1470. if (!engine)
  1471. failx("Persist mode 'query' not supported here");
  1472. }
  1473. else
  1474. failx("Unrecognized persist mode %s", val);
  1475. }
  1476. else
  1477. failx("Unrecognized option %s", optName.str());
  1478. }
  1479. else
  1480. failx("Unrecognized option %s", opt);
  1481. }
  1482. if (engine)
  1483. engine->getQueryId(scopeKey, wuidScope);
  1484. if (scopeKey2)
  1485. scopeKey.append(':').append(scopeKey2);
  1486. if (scopeKey.length())
  1487. {
  1488. bool isNew;
  1489. globals.setown(globalState.getNamedScope(scopeKey, isNew));
  1490. if (isNew && engine)
  1491. engine->onTermination(Python27GlobalState::unregister, scopeKey.str(), wuidScope);
  1492. }
  1493. else
  1494. globals.setown(PyDict_New());
  1495. PyDict_SetItemString(globals, "__builtins__", PyEval_GetBuiltins()); // required for import to work
  1496. }
  1497. ~Python27EmbedContextBase()
  1498. {
  1499. // We need to clear these before calling savethread, or we won't own the GIL
  1500. locals.clear();
  1501. globals.clear();
  1502. result.clear();
  1503. script.clear();
  1504. sharedCtx->threadState = PyEval_SaveThread();
  1505. }
  1506. virtual bool getBooleanResult()
  1507. {
  1508. return py2embed::getBooleanResult(NULL, result);
  1509. }
  1510. virtual void getDataResult(size32_t &__chars, void * &__result)
  1511. {
  1512. py2embed::getDataResult(NULL, result, __chars, __result);
  1513. }
  1514. virtual double getRealResult()
  1515. {
  1516. return py2embed::getRealResult(NULL, result);
  1517. }
  1518. virtual __int64 getSignedResult()
  1519. {
  1520. return py2embed::getSignedResult(NULL, result);
  1521. }
  1522. virtual unsigned __int64 getUnsignedResult()
  1523. {
  1524. return py2embed::getUnsignedResult(NULL, result);
  1525. }
  1526. virtual void getStringResult(size32_t &__chars, char * &__result)
  1527. {
  1528. py2embed::getStringResult(NULL, result, __chars, __result);
  1529. }
  1530. virtual void getUTF8Result(size32_t &__chars, char * &__result)
  1531. {
  1532. py2embed::getUTF8Result(NULL, result, __chars, __result);
  1533. }
  1534. virtual void getUnicodeResult(size32_t &__chars, UChar * &__result)
  1535. {
  1536. py2embed::getUnicodeResult(NULL, result, __chars, __result);
  1537. }
  1538. virtual void getSetResult(bool & __isAllResult, size32_t & __resultBytes, void * & __result, int elemType, size32_t elemSize)
  1539. {
  1540. py2embed::getSetResult(result, __isAllResult, __resultBytes, __result, elemType, elemSize);
  1541. }
  1542. virtual IRowStream *getDatasetResult(IEngineRowAllocator * _resultAllocator)
  1543. {
  1544. return new PythonRowStream(result, _resultAllocator);
  1545. }
  1546. virtual byte * getRowResult(IEngineRowAllocator * _resultAllocator)
  1547. {
  1548. RtlDynamicRowBuilder rowBuilder(_resultAllocator);
  1549. size32_t len = py2embed::getRowResult(result, rowBuilder);
  1550. return (byte *) rowBuilder.finalizeRowClear(len);
  1551. }
  1552. virtual size32_t getTransformResult(ARowBuilder & builder)
  1553. {
  1554. return py2embed::getRowResult(result, builder);
  1555. }
  1556. virtual void bindBooleanParam(const char *name, bool val)
  1557. {
  1558. addArg(name, PyBool_FromLong(val ? 1 : 0));
  1559. }
  1560. virtual void bindDataParam(const char *name, size32_t len, const void *val)
  1561. {
  1562. addArg(name, PyByteArray_FromStringAndSize((const char *) val, len));
  1563. }
  1564. virtual void bindFloatParam(const char *name, float val)
  1565. {
  1566. addArg(name, PyFloat_FromDouble((double) val));
  1567. }
  1568. virtual void bindRealParam(const char *name, double val)
  1569. {
  1570. addArg(name, PyFloat_FromDouble(val));
  1571. }
  1572. virtual void bindSignedSizeParam(const char *name, int size, __int64 val)
  1573. {
  1574. addArg(name, PyLong_FromLongLong(val));
  1575. }
  1576. virtual void bindSignedParam(const char *name, __int64 val)
  1577. {
  1578. addArg(name, PyLong_FromLongLong(val));
  1579. }
  1580. virtual void bindUnsignedSizeParam(const char *name, int size, unsigned __int64 val)
  1581. {
  1582. addArg(name, PyLong_FromUnsignedLongLong(val));
  1583. }
  1584. virtual void bindUnsignedParam(const char *name, unsigned __int64 val)
  1585. {
  1586. addArg(name, PyLong_FromUnsignedLongLong(val));
  1587. }
  1588. virtual void bindStringParam(const char *name, size32_t len, const char *val)
  1589. {
  1590. addArg(name, PyString_FromStringAndSize(val, len));
  1591. }
  1592. virtual void bindVStringParam(const char *name, const char *val)
  1593. {
  1594. addArg(name, PyString_FromString(val));
  1595. }
  1596. virtual void bindUTF8Param(const char *name, size32_t chars, const char *val)
  1597. {
  1598. size32_t sizeBytes = rtlUtf8Size(chars, val);
  1599. PyObject *vval = PyUnicode_FromStringAndSize(val, sizeBytes); // NOTE - requires size in bytes not chars
  1600. checkPythonError();
  1601. addArg(name, vval);
  1602. }
  1603. virtual void bindUnicodeParam(const char *name, size32_t chars, const UChar *val)
  1604. {
  1605. // You don't really know what size Py_UNICODE is (varies from system to system), so go via utf8
  1606. unsigned unicodeChars;
  1607. char *unicode;
  1608. rtlUnicodeToUtf8X(unicodeChars, unicode, chars, val);
  1609. size32_t sizeBytes = rtlUtf8Size(unicodeChars, unicode);
  1610. PyObject *vval = PyUnicode_FromStringAndSize(unicode, sizeBytes); // NOTE - requires size in bytes not chars
  1611. checkPythonError();
  1612. addArg(name, vval);
  1613. rtlFree(unicode);
  1614. }
  1615. virtual void bindSetParam(const char *name, int elemType, size32_t elemSize, bool isAll, size32_t totalBytes, const void *setData)
  1616. {
  1617. if (isAll)
  1618. rtlFail(0, "pyembed: Cannot pass ALL");
  1619. type_t typecode = (type_t) elemType;
  1620. const byte *inData = (const byte *) setData;
  1621. const byte *endData = inData + totalBytes;
  1622. OwnedPyObject vval = PyList_New(0);
  1623. while (inData < endData)
  1624. {
  1625. OwnedPyObject thisElem;
  1626. size32_t thisSize = elemSize;
  1627. switch (typecode)
  1628. {
  1629. case type_int:
  1630. thisElem.setown(PyLong_FromLongLong(rtlReadInt(inData, elemSize)));
  1631. break;
  1632. case type_unsigned:
  1633. thisElem.setown(PyLong_FromUnsignedLongLong(rtlReadUInt(inData, elemSize)));
  1634. break;
  1635. case type_varstring:
  1636. {
  1637. size32_t numChars = strlen((const char *) inData);
  1638. thisElem.setown(PyString_FromStringAndSize((const char *) inData, numChars));
  1639. if (elemSize == UNKNOWN_LENGTH)
  1640. thisSize = numChars + 1;
  1641. break;
  1642. }
  1643. case type_string:
  1644. if (elemSize == UNKNOWN_LENGTH)
  1645. {
  1646. thisSize = * (size32_t *) inData;
  1647. inData += sizeof(size32_t);
  1648. }
  1649. thisElem.setown(PyString_FromStringAndSize((const char *) inData, thisSize));
  1650. break;
  1651. case type_real:
  1652. if (elemSize == sizeof(double))
  1653. thisElem.setown(PyFloat_FromDouble(* (double *) inData));
  1654. else
  1655. thisElem.setown(PyFloat_FromDouble(* (float *) inData));
  1656. break;
  1657. case type_boolean:
  1658. assertex(elemSize == sizeof(bool));
  1659. thisElem.setown(PyBool_FromLong(*(bool*)inData ? 1 : 0));
  1660. break;
  1661. case type_unicode:
  1662. {
  1663. if (elemSize == UNKNOWN_LENGTH)
  1664. {
  1665. thisSize = (* (size32_t *) inData) * sizeof(UChar); // NOTE - it's in chars...
  1666. inData += sizeof(size32_t);
  1667. }
  1668. unsigned unicodeChars;
  1669. rtlDataAttr unicode;
  1670. rtlUnicodeToUtf8X(unicodeChars, unicode.refstr(), thisSize / sizeof(UChar), (const UChar *) inData);
  1671. size32_t sizeBytes = rtlUtf8Size(unicodeChars, unicode.getstr());
  1672. thisElem.setown(PyUnicode_FromStringAndSize(unicode.getstr(), sizeBytes)); // NOTE - requires size in bytes not chars
  1673. checkPythonError();
  1674. break;
  1675. }
  1676. case type_utf8:
  1677. {
  1678. assertex (elemSize == UNKNOWN_LENGTH);
  1679. size32_t numChars = * (size32_t *) inData;
  1680. inData += sizeof(size32_t);
  1681. thisSize = rtlUtf8Size(numChars, inData);
  1682. thisElem.setown(PyUnicode_FromStringAndSize((const char *) inData, thisSize)); // NOTE - requires size in bytes not chars
  1683. break;
  1684. }
  1685. case type_data:
  1686. if (elemSize == UNKNOWN_LENGTH)
  1687. {
  1688. thisSize = * (size32_t *) inData;
  1689. inData += sizeof(size32_t);
  1690. }
  1691. thisElem.setown(PyByteArray_FromStringAndSize((const char *) inData, thisSize));
  1692. break;
  1693. }
  1694. checkPythonError();
  1695. inData += thisSize;
  1696. PyList_Append(vval, thisElem);
  1697. }
  1698. addArg(name, vval.getLink());
  1699. }
  1700. virtual void bindRowParam(const char *name, IOutputMetaData & metaVal, const byte *val) override
  1701. {
  1702. const RtlTypeInfo *typeInfo = metaVal.queryTypeInfo();
  1703. assertex(typeInfo);
  1704. RtlFieldStrInfo dummyField("<row>", NULL, typeInfo);
  1705. PythonNamedTupleBuilder tupleBuilder(sharedCtx, &dummyField);
  1706. typeInfo->process(val, val, &dummyField, tupleBuilder); // Creates a tuple from the incoming ECL row
  1707. addArg(name, tupleBuilder.getTuple(typeInfo));
  1708. }
  1709. virtual void bindDatasetParam(const char *name, IOutputMetaData & metaVal, IRowStream * val)
  1710. {
  1711. addArg(name, createECLDatasetIterator(metaVal.queryTypeInfo(), LINK(val)));
  1712. }
  1713. protected:
  1714. virtual void addArg(const char *name, PyObject *arg) = 0;
  1715. PythonThreadContext *sharedCtx = nullptr;
  1716. ICodeContext *codeCtx = nullptr;
  1717. OwnedPyObject locals;
  1718. OwnedPyObject globals;
  1719. OwnedPyObject result;
  1720. OwnedPyObject script;
  1721. };
  1722. class Python27EmbedScriptContext : public Python27EmbedContextBase
  1723. {
  1724. public:
  1725. Python27EmbedScriptContext(PythonThreadContext *_sharedCtx, ICodeContext *_codeCtx)
  1726. : Python27EmbedContextBase(_sharedCtx, _codeCtx)
  1727. {
  1728. }
  1729. ~Python27EmbedScriptContext()
  1730. {
  1731. }
  1732. virtual IInterface *bindParamWriter(IInterface *esdl, const char *esdlservice, const char *esdltype, const char *name)
  1733. {
  1734. return NULL;
  1735. }
  1736. virtual void paramWriterCommit(IInterface *writer)
  1737. {
  1738. }
  1739. virtual void writeResult(IInterface *esdl, const char *esdlservice, const char *esdltype, IInterface *writer)
  1740. {
  1741. }
  1742. virtual void importFunction(size32_t lenChars, const char *text)
  1743. {
  1744. throwUnexpected();
  1745. }
  1746. virtual void compileEmbeddedScript(size32_t lenChars, const char *utf)
  1747. {
  1748. script.setown(sharedCtx->compileEmbeddedScript(codeCtx, lenChars, utf, argstring));
  1749. }
  1750. virtual void loadCompiledScript(size32_t chars, const void *_script) override
  1751. {
  1752. throwUnexpected();
  1753. }
  1754. virtual void enter() override {}
  1755. virtual void reenter(ICodeContext *codeCtx) override {}
  1756. virtual void exit() override {}
  1757. virtual void setActivityOptions(const IThorActivityContext *ctx) override
  1758. {
  1759. Python27EmbedContextBase::setActivityOptions(ctx);
  1760. argstring.append("__activity__");
  1761. }
  1762. virtual void callFunction()
  1763. {
  1764. result.setown(PyEval_EvalCode((PyCodeObject *) script.get(), globals, locals));
  1765. checkPythonError();
  1766. if (!result || result == Py_None)
  1767. result.set(PyDict_GetItemString(globals, "__result__"));
  1768. if (!result || result == Py_None)
  1769. result.set(PyDict_GetItemString(locals, "__result__"));
  1770. }
  1771. void setargs(const char *args)
  1772. {
  1773. argstring.set(args);
  1774. }
  1775. protected:
  1776. virtual void addArg(const char *name, PyObject *arg)
  1777. {
  1778. if (!arg)
  1779. return;
  1780. if (argstring.length())
  1781. argstring.append(',');
  1782. argstring.append(name);
  1783. if (script)
  1784. PyDict_SetItemString(globals, name, arg); // Back compatibility - if compiler did not recognize the prebind flag, we need to use globals
  1785. else
  1786. PyDict_SetItemString(locals, name, arg);
  1787. Py_DECREF(arg);
  1788. checkPythonError();
  1789. }
  1790. StringBuffer argstring;
  1791. };
  1792. class Python27EmbedImportContext : public Python27EmbedContextBase
  1793. {
  1794. public:
  1795. Python27EmbedImportContext(PythonThreadContext *_sharedCtx, ICodeContext *_codeCtx)
  1796. : Python27EmbedContextBase(_sharedCtx, _codeCtx)
  1797. {
  1798. argcount = 0;
  1799. }
  1800. ~Python27EmbedImportContext()
  1801. {
  1802. }
  1803. virtual IInterface *bindParamWriter(IInterface *esdl, const char *esdlservice, const char *esdltype, const char *name)
  1804. {
  1805. return NULL;
  1806. }
  1807. virtual void paramWriterCommit(IInterface *writer)
  1808. {
  1809. }
  1810. virtual void writeResult(IInterface *esdl, const char *esdlservice, const char *esdltype, IInterface *writer)
  1811. {
  1812. }
  1813. virtual void importFunction(size32_t lenChars, const char *utf)
  1814. {
  1815. script.setown(sharedCtx->importFunction(codeCtx, lenChars, utf));
  1816. }
  1817. virtual void compileEmbeddedScript(size32_t len, const char *text)
  1818. {
  1819. throwUnexpected();
  1820. }
  1821. virtual void loadCompiledScript(size32_t chars, const void *_script) override
  1822. {
  1823. throwUnexpected();
  1824. }
  1825. virtual void enter() override {}
  1826. virtual void reenter(ICodeContext *codeCtx) override {}
  1827. virtual void exit() override {}
  1828. virtual void callFunction()
  1829. {
  1830. result.setown(PyObject_CallObject(script, args));
  1831. checkPythonError();
  1832. }
  1833. private:
  1834. virtual void addArg(const char *name, PyObject *arg)
  1835. {
  1836. if (argcount)
  1837. _PyTuple_Resize(args.ref(), argcount+1);
  1838. else
  1839. args.setown(PyTuple_New(1));
  1840. PyTuple_SET_ITEM((PyTupleObject *) args.get(), argcount++, arg); // Note - 'steals' the arg reference
  1841. }
  1842. int argcount;
  1843. OwnedPyObject args;
  1844. };
  1845. class Python27EmbedContext : public CInterfaceOf<IEmbedContext>
  1846. {
  1847. public:
  1848. virtual IEmbedFunctionContext *createFunctionContext(unsigned flags, const char *options) override
  1849. {
  1850. return createFunctionContextEx(nullptr, nullptr, flags, options);
  1851. }
  1852. virtual IEmbedFunctionContext *createFunctionContextEx(ICodeContext * ctx, const IThorActivityContext *activityCtx, unsigned flags, const char *options) override
  1853. {
  1854. checkThreadContext();
  1855. Owned<Python27EmbedContextBase> ret;
  1856. if (flags & EFimport)
  1857. ret.setown(new Python27EmbedImportContext(threadContext, ctx));
  1858. else
  1859. ret.setown(new Python27EmbedScriptContext(threadContext, ctx));
  1860. ret->setScopes(ctx, options);
  1861. if (activityCtx)
  1862. ret->setActivityOptions(activityCtx);
  1863. return ret.getClear();
  1864. }
  1865. virtual IEmbedServiceContext *createServiceContext(const char *service, unsigned flags, const char *options) override
  1866. {
  1867. throwUnexpected();
  1868. }
  1869. };
  1870. extern DECL_EXPORT IEmbedContext* getEmbedContext()
  1871. {
  1872. return new Python27EmbedContext;
  1873. }
  1874. extern DECL_EXPORT void syntaxCheck(size32_t & __lenResult, char * & __result, const char *funcname, size32_t charsBody, const char * body, const char *argNames, const char *compilerOptions, const char *persistOptions)
  1875. {
  1876. StringBuffer result;
  1877. if (globalState.isInitialized())
  1878. {
  1879. // NOTE - compilation of a script does not actually resolve imports - so the fact that the manifest is not on the path does not matter
  1880. // This does mean that many errors cannot be caught until runtime, but that's Python for you...
  1881. try
  1882. {
  1883. checkThreadContext();
  1884. Owned<Python27EmbedScriptContext> ctx = new Python27EmbedScriptContext(threadContext, nullptr);
  1885. ctx->setargs(argNames);
  1886. ctx->compileEmbeddedScript(charsBody, body);
  1887. }
  1888. catch (IException *E)
  1889. {
  1890. StringBuffer msg;
  1891. result.append(E->errorMessage(msg));
  1892. E->Release();
  1893. }
  1894. }
  1895. __lenResult = result.length();
  1896. __result = result.detach();
  1897. }
  1898. } // namespace
  1899. // For back compatibility we also answer to the name "pyembed"...
  1900. namespace pyembed {
  1901. extern DECL_EXPORT IEmbedContext* getEmbedContext()
  1902. {
  1903. return new py2embed::Python27EmbedContext;
  1904. }
  1905. } // namespace