python_from_ecl.ecl 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. /* Example of calling Python from ECL code via embedded C++
  2. *
  3. * This example uses the following code in python_cat.py:
  4. *
  5. * def cat(a, b):
  6. * return a + b
  7. *
  8. * Note that you may need to change the line that sets the Python sys.path, if you extend this
  9. * code to use other python examples, or it you are running on a system other than a standard HPCC
  10. * install on Linux
  11. *
  12. */
  13. #option('compileOptions', '-I/usr/include/python2.7/')
  14. #option('linkOptions', '-lpython2.7')
  15. // Embedded C++ that makes a call to a Python function
  16. string cat(varstring a, varstring b) := BEGINC++
  17. // This section of the code should probably move to a plugin, or somesuch
  18. #include <Python.h>
  19. #include <assert.h>
  20. #include <pthread.h>
  21. static PyObject *pFunc_cat;
  22. class PythonInitializer
  23. {
  24. PyObject *pModule;
  25. PyThreadState *tstate;
  26. bool pythonInitialized;
  27. public:
  28. PythonInitializer()
  29. {
  30. pModule = NULL;
  31. tstate = NULL;
  32. pythonInitialized = false;
  33. // Initialize the Python Interpreter
  34. Py_Initialize();
  35. PyEval_InitThreads();
  36. pythonInitialized = true;
  37. resolvePythonFunctions();
  38. tstate = PyEval_SaveThread();
  39. }
  40. ~PythonInitializer()
  41. {
  42. PyEval_RestoreThread(tstate);
  43. // Clean up
  44. if (pModule)
  45. Py_DECREF(pModule);
  46. // Finish the Python Interpreter
  47. if (pythonInitialized)
  48. Py_Finalize();
  49. }
  50. void resolvePythonFunctions()
  51. {
  52. PySys_SetPath("/opt/HPCCSystems/examples/python/"); // Set this to where you want to pick up python_cat.py from
  53. pModule = PyImport_ImportModule("python_cat");
  54. if (pModule == NULL)
  55. {
  56. PyErr_Print();
  57. }
  58. else
  59. {
  60. // pDict is a borrowed reference
  61. PyObject *pDict = PyModule_GetDict(pModule);
  62. // pFunc_cat is also a borrowed reference
  63. pFunc_cat = PyDict_GetItemString(pDict, "cat");
  64. if (!pFunc_cat || !PyCallable_Check(pFunc_cat))
  65. {
  66. PyErr_Print();
  67. pFunc_cat = NULL;
  68. }
  69. }
  70. }
  71. };
  72. PythonInitializer __initializer;
  73. // Use class OwnedPyObject for any objects that are not 'borrowed references'
  74. // so that the appropriate Py_DECREF call is made when the OwnedPyObject goes
  75. // out of scope, even if the function returns prematurely (such as via an exception).
  76. // In particular, checkPythonError is a lot easier to call safely if this is used.
  77. class OwnedPyObject
  78. {
  79. PyObject *ptr;
  80. public:
  81. inline OwnedPyObject(PyObject *_ptr) : ptr(_ptr) {}
  82. inline ~OwnedPyObject() { if (ptr) Py_DECREF(ptr); }
  83. inline PyObject * get() const { return ptr; }
  84. inline PyObject * operator -> () const { return ptr; }
  85. inline operator PyObject *() const { return ptr; }
  86. };
  87. // call checkPythonError to throw an exception if Python error state is set
  88. static void checkPythonError()
  89. {
  90. PyObject* err = PyErr_Occurred();
  91. if (err)
  92. {
  93. OwnedPyObject errStr = PyObject_Str(err);
  94. PyErr_Clear();
  95. rtlFail(0, PyString_AsString(errStr));
  96. }
  97. }
  98. // The Python Global Interpreter Lock (GIL) won't know about C++-created threads, so we need to
  99. // call PyGILState_Ensure() and PyGILState_Release at the start and end of every function.
  100. // Wrapping them in a class like this ensures that the release always happens even if
  101. // the function exists prematurely
  102. class GILstateWrapper
  103. {
  104. PyGILState_STATE gstate;
  105. public:
  106. GILstateWrapper()
  107. {
  108. gstate = PyGILState_Ensure();
  109. }
  110. ~GILstateWrapper()
  111. {
  112. PyGILState_Release(gstate);
  113. }
  114. };
  115. //--------------------------------------------------------
  116. #body
  117. // extern void user1(size32_t & __lenResult,char * & __result,const char * a,const char * b) {
  118. {
  119. if (!pFunc_cat)
  120. rtlFail(0, "Could not resolve python functions");
  121. GILstateWrapper gstate;
  122. OwnedPyObject pArgs = Py_BuildValue("s,s", a, b);
  123. checkPythonError();
  124. OwnedPyObject pResult = PyObject_CallObject(pFunc_cat, pArgs);
  125. checkPythonError();
  126. __lenResult = PyString_Size(pResult);
  127. const char * chars = PyString_AsString(pResult);
  128. checkPythonError();
  129. __result = (char *)rtlMalloc(__lenResult);
  130. memcpy(__result, chars, __lenResult);
  131. }
  132. ENDC++;
  133. //--------------------------------------------------------
  134. // ECL code - an input dataset with 2 records, each containing 2 strings
  135. inrec := RECORD
  136. string f1;
  137. string f2;
  138. END;
  139. infile1 := DATASET([{'a', 'b'}, {'c', 'd'}], inrec);
  140. infile2 := DATASET([{'e', 'f'}, {'g', 'h'}], inrec);
  141. // Output record has just one string, filled in from the result of the python function
  142. outrec := RECORD
  143. string c;
  144. END;
  145. outrec t(inrec L) := TRANSFORM
  146. SELF.c := cat(L.f1, L.f2) // Calls Python function
  147. END;
  148. outfile := project(infile1, t(LEFT))+project(infile2, t(LEFT)); // threaded concat operation
  149. outfile;