python_from_ecl.ecl 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. /* Example of calling Python from ECL code via embedded C++
  2. *
  3. * This example uses the following code in python_cat.py:
  4. *
  5. * def cat(a, b):
  6. * return a + b
  7. *
  8. * To compile this ECL example, you need to link the Python libraries:
  9. *
  10. * eclcc python_from_ecl.ecl -Wc,-I/usr/include/python2.7/ -Wl,-lpython2.7 -target=hthor
  11. *
  12. * To run it, ensure that PYTHONPATH is set such that python_cat.py can be located
  13. */
  14. // Embedded C++ that makes a JNI call
  15. string cat(varstring a, varstring b) := BEGINC++
  16. // This section of the code should probably move to a plugin, or somesuch
  17. #include <Python.h>
  18. #include <assert.h>
  19. static pthread_once_t python_resolve_flag = PTHREAD_ONCE_INIT; /* Ensures called just once */
  20. static PyObject *pName, *pModule, *pFunc_cat;
  21. static bool pythonInitialized = false;
  22. static void resolvePythonFunctions()
  23. {
  24. /* Do all the function resolution just the once... */
  25. PyObject *pDict;
  26. // Initialize the Python Interpreter
  27. Py_Initialize();
  28. PyEval_InitThreads();
  29. pythonInitialized = true;
  30. // Build the name object
  31. pName = PyString_FromString("python_cat");
  32. // Load the module object
  33. pModule = PyImport_Import(pName);
  34. if (pModule == NULL)
  35. {
  36. PyErr_Print();
  37. }
  38. else
  39. {
  40. // pDict is a borrowed reference
  41. pDict = PyModule_GetDict(pModule);
  42. // pFunc_cat is also a borrowed reference
  43. pFunc_cat = PyDict_GetItemString(pDict, "cat");
  44. if (!pFunc_cat || !PyCallable_Check(pFunc_cat))
  45. {
  46. PyErr_Print();
  47. pFunc_cat = NULL;
  48. }
  49. }
  50. PyEval_ReleaseLock();
  51. }
  52. static void finishPython()
  53. {
  54. // Clean up
  55. if (pModule)
  56. Py_DECREF(pModule);
  57. if (pName)
  58. Py_DECREF(pName);
  59. // Finish the Python Interpreter
  60. if (pythonInitialized)
  61. Py_Finalize();
  62. }
  63. static void checkPythonError()
  64. {
  65. PyObject* err = PyErr_Occurred();
  66. if (err)
  67. {
  68. PyErr_Print();
  69. rtlFail(0, "Unexpected failure"); // MORE - should probably get some info out of PyError rather than just printing it
  70. }
  71. }
  72. // The Python Global Interpreter Lock (GIL) won't know about C++-created threads, so we need to
  73. // call PyGILState_Ensure() and PyGILState_Release at the start and end of every function.
  74. // Wrapping them in a class like this ensures that the release always happens even if
  75. // the function exists prematurely
  76. class GILstateWrapper
  77. {
  78. PyGILState_STATE gstate;
  79. public:
  80. GILstateWrapper()
  81. {
  82. gstate = PyGILState_Ensure();
  83. }
  84. ~GILstateWrapper()
  85. {
  86. PyGILState_Release(gstate);
  87. }
  88. };
  89. // Use class OwnedPyObject for any objects that are not 'borrowed references'
  90. // so that the appropriate Py_DECREF call is made when the OwnedPyObject goes
  91. // out of scope, even if the function returns prematurely (such as via an exception).
  92. // In particular, checkPythonError is a lot easier to call safely if this is used.
  93. class OwnedPyObject
  94. {
  95. PyObject *ptr;
  96. public:
  97. inline OwnedPyObject(PyObject *_ptr) : ptr(_ptr) {}
  98. inline ~OwnedPyObject() { if (ptr) Py_DECREF(ptr); }
  99. inline PyObject * operator -> () const { return ptr; }
  100. inline operator PyObject *() const { return ptr; }
  101. };
  102. //--------------------------------------------------------
  103. #body
  104. // extern void user1(size32_t & __lenResult,char * & __result,const char * a,const char * b) {
  105. pthread_once(&python_resolve_flag, resolvePythonFunctions);
  106. if (!pFunc_cat)
  107. rtlFail(0, "Could not resolve python functions");
  108. GILstateWrapper gstate; // Ensure that we play nice with Python threads
  109. OwnedPyObject pArgs = Py_BuildValue("s,s", a, b);
  110. checkPythonError();
  111. OwnedPyObject pResult = PyObject_CallObject(pFunc_cat, pArgs);
  112. checkPythonError();
  113. __lenResult = PyString_Size(pResult);
  114. const char * chars = PyString_AsString(pResult);
  115. __result = new char(__lenResult);
  116. memcpy(__result, chars, __lenResult);
  117. // }
  118. ENDC++;
  119. //--------------------------------------------------------
  120. // ECL code - an input dataset with 2 records, each containing 2 strings
  121. inrec := RECORD
  122. string f1;
  123. string f2;
  124. END;
  125. infile1 := DATASET([{'a', 'b'}, {'c', 'd'}], inrec);
  126. infile2 := DATASET([{'e', 'f'}, {'g', 'h'}], inrec);
  127. // Output record has just one string, filled in from the result of the java function
  128. outrec := RECORD
  129. string c;
  130. END;
  131. outrec t(inrec L) := TRANSFORM
  132. SELF.c := cat(L.f1, L.f2) // Calls Python function
  133. END;
  134. outfile := project(infile1, t(LEFT))+project(infile2, t(LEFT)); // threaded concat operation
  135. outfile;