python-stream.ecl 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. IMPORT Python;
  2. /*
  3. This example illustrates and tests the use of embedded Python.
  4. */
  5. // This example illustrates returning datasets from Python
  6. // These are the record structures we will be returning - note the child records, datasets and dictionaries in it
  7. childrec := RECORD
  8. string name => unsigned value;
  9. END;
  10. eclRecord := RECORD
  11. STRING name1;
  12. STRING10 name2;
  13. LINKCOUNTED DATASET(childrec) childnames;
  14. LINKCOUNTED DICTIONARY(childrec) childdict{linkcounted};
  15. childrec r;
  16. unsigned1 val1;
  17. integer1 val2;
  18. UTF8 u1;
  19. UNICODE u2;
  20. UNICODE8 u3;
  21. BIG_ENDIAN unsigned4 val3;
  22. DATA d;
  23. BOOLEAN b;
  24. SET OF STRING ss1;
  25. END;
  26. namerec := RECORD
  27. string name;
  28. END;
  29. namerec2 := RECORD
  30. string name;
  31. string name2;
  32. END;
  33. // To return a dataset, we can return a list of tuples, each one correponding to a field in the resulting ECL record
  34. // In this example, the fields are mapped by position
  35. // Just to spice things up we proved a couple of parameters too
  36. dataset(eclRecord) streamedNames(data d, utf8 u) := EMBED(Python)
  37. return [ \
  38. ("Gavin", "Halliday", [("a", 1),("b", 2),("c", 3)], [("aa", 11)], ("aaa", 111), 250, -1, U'là', U'là', U'là', 0x01000000, d, False, {"1","2"}), \
  39. ("John", "Smith", [], [], ("c", 3), 250, -1, U'là', U'là', u, 0x02000000, d, True, []) \
  40. ]
  41. ENDEMBED;
  42. output(streamedNames(d'AA', u'là'));
  43. // We can also return a dataset by using a Python generator, which will be lazy-evaluated as the records are required by ECL code...
  44. dataset(childrec) testGenerator(unsigned lim) := EMBED(Python)
  45. num = 0
  46. while num < lim:
  47. yield ("Generated", num)
  48. num += 1
  49. ENDEMBED;
  50. output (testGenerator(10));
  51. // If the returned tuples are namedtuples, we map fields by name rather than by position
  52. // Test use of Python named tuple...
  53. dataset(childrec) testNamedTuples() := EMBED(Python)
  54. import collections
  55. ChildRec = collections.namedtuple("childrec", "value, name") # Note - order is reverse of childrec - but works as we get fields by name
  56. c1 = ChildRec(1, "name1")
  57. c2 = ChildRec(name="name2", value=2)
  58. return [ c1, c2 ]
  59. ENDEMBED;
  60. output(testNamedTuples());
  61. // To return a record, just return a tuple (or namedtuple)
  62. childrec testRecord(integer value, string s) := EMBED(Python)
  63. return (s, value)
  64. ENDEMBED;
  65. output(testRecord(1,'Hello').value);
  66. output(testRecord(1,'Hello').name);
  67. // If the record has a single field, you don't need to put the field into a tuple...
  68. dataset(namerec) testMissingTuple1(unsigned lim) := EMBED(Python)
  69. return [ '1', '2', '3' ]
  70. ENDEMBED;
  71. output (testMissingTuple1(10));
  72. // ... but you can if you want
  73. dataset(namerec) testMissingTuple2(unsigned lim) := EMBED(Python)
  74. return [ ('1'), ('2'), ('3') ]
  75. ENDEMBED;
  76. output (testMissingTuple2(10));
  77. // You can define a transform in Python, using a function that returns a record (i.e. a Python tuple)
  78. // Note that the tuple we pass to Python is a namedtuple
  79. transform(childrec) testTransform(namerec inrec, unsigned c) := EMBED(Python)
  80. return (inrec.name, c)
  81. ENDEMBED;
  82. d := dataset([{'Richard'},{'Gavin'}], namerec);
  83. output(project(d, testTransform(LEFT, COUNTER)));
  84. // Most transforms take a record as the input, but it's not a requirement
  85. transform(childrec) testTransformNoRow(unsigned lim) := EMBED(Python)
  86. return ("Hello", lim)
  87. ENDEMBED;
  88. output(row(testTransformNoRow(10)));
  89. // When passing datasets to Python, we get an iterator of named tuples
  90. // They are actually implemented as generators, meaning they are lazy-evaluated
  91. names := DATASET([{'Richard'}, {'James'}, {'Andrew'}], namerec);
  92. string datasetAsIterator(dataset(namerec) input) := EMBED(Python)
  93. s = ''
  94. for n in input:
  95. s = s + ' ' + n.name
  96. return s;
  97. ENDEMBED;
  98. output(datasetAsIterator(names));