parselib.cpp 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. /*##############################################################################
  2. Copyright (C) 2011 HPCC Systems.
  3. All rights reserved. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU Affero General Public License as
  5. published by the Free Software Foundation, either version 3 of the
  6. License, or (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Affero General Public License for more details.
  11. You should have received a copy of the GNU Affero General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>.
  13. ############################################################################## */
  14. #include <time.h>
  15. #include <stdlib.h>
  16. #include <string.h>
  17. #include <ctype.h>
  18. #include "jlib.hpp"
  19. #include "thorparse.hpp"
  20. #include "parselib.hpp"
  21. #define PARSELIB_VERSION "PARSELIB 1.0.1"
  22. _ATOM separatorTagAtom;
  23. MODULE_INIT(INIT_PRIORITY_STANDARD)
  24. {
  25. separatorTagAtom = createAtom("<separator>");
  26. return true;
  27. }
  28. //-------------------------------------------------------------------------------------------------------------------------------------------
  29. const char * EclDefinition =
  30. "export ParseLib := SERVICE\n"
  31. " string getParseTree() : c,volatile,entrypoint='plGetDefaultParseTree',userMatchFunction; \n"
  32. " string getXmlParseTree() : c,volatile,entrypoint='plGetXmlParseTree',userMatchFunction; \n"
  33. "END;";
  34. static const char * compatibleVersions[] = {
  35. "PARSELIB 1.0.0 [fa9b3ab8fad8e46d8c926015cbd39f06]",
  36. PARSELIB_VERSION,
  37. NULL };
  38. PARSELIB_API bool getECLPluginDefinition(ECLPluginDefinitionBlock *pb)
  39. {
  40. if (pb->size == sizeof(ECLPluginDefinitionBlockEx))
  41. {
  42. ECLPluginDefinitionBlockEx * pbx = (ECLPluginDefinitionBlockEx *) pb;
  43. pbx->compatibleVersions = compatibleVersions;
  44. }
  45. else if (pb->size != sizeof(ECLPluginDefinitionBlock))
  46. return false;
  47. pb->magicVersion = PLUGIN_VERSION;
  48. pb->version = PARSELIB_VERSION;
  49. pb->moduleName = "lib_parselib";
  50. pb->ECL = EclDefinition;
  51. pb->flags = PLUGIN_IMPLICIT_MODULE;
  52. pb->description = "ParseLib PARSE helper library";
  53. return true;
  54. }
  55. namespace nsParselib {
  56. IPluginContext * parentCtx = NULL;
  57. static bool hasChildren(IMatchWalker * walker)
  58. {
  59. for (unsigned i=0;;i++)
  60. {
  61. Owned<IMatchWalker> child = walker->getChild(i);
  62. if (!child)
  63. return false;
  64. if (child->queryName() != separatorTagAtom)
  65. return true;
  66. }
  67. }
  68. static StringBuffer & getElementText(StringBuffer & s, IMatchWalker * walker)
  69. {
  70. unsigned len = walker->queryMatchSize();
  71. const char * text = (const char *)walker->queryMatchStart();
  72. return s.append(len, text);
  73. }
  74. static void expandElementText(StringBuffer & s, IMatchWalker * walker)
  75. {
  76. getElementText(s.append('"'), walker).append('"');
  77. }
  78. static void getDefaultParseTree(StringBuffer & s, IMatchWalker * cur)
  79. {
  80. _ATOM name = cur->queryName();
  81. if (name != separatorTagAtom)
  82. {
  83. if (name)
  84. {
  85. StringBuffer lname;
  86. lname.append(name);
  87. lname.toLowerCase();
  88. s.append(lname);
  89. }
  90. if (hasChildren(cur))
  91. {
  92. s.append("[");
  93. for (unsigned i=0;;i++)
  94. {
  95. Owned<IMatchWalker> child = cur->getChild(i);
  96. if (!child)
  97. break;
  98. getDefaultParseTree(s, child);
  99. s.append(" ");
  100. }
  101. s.setLength(s.length()-1);
  102. s.append("]");
  103. }
  104. else
  105. expandElementText(s, cur);
  106. }
  107. }
  108. //---------------------------------------------------------------------------
  109. static void getXmlParseTree(StringBuffer & s, IMatchWalker * walker, unsigned indent)
  110. {
  111. _ATOM name = walker->queryName();
  112. if (name != separatorTagAtom)
  113. {
  114. unsigned max = walker->numChildren();
  115. if (!name)
  116. {
  117. if (hasChildren(walker))
  118. {
  119. for (unsigned i=0; i<max; i++)
  120. {
  121. Owned<IMatchWalker> child = walker->getChild(i);
  122. getXmlParseTree(s, child, indent);
  123. }
  124. }
  125. else
  126. getElementText(s, walker);
  127. }
  128. else
  129. {
  130. StringBuffer lname;
  131. lname.append(name);
  132. lname.toLowerCase();
  133. s.pad(indent).append('<').append(lname).append('>');
  134. if (hasChildren(walker))
  135. {
  136. s.newline();
  137. for (unsigned i=0; i<max; i++)
  138. {
  139. Owned<IMatchWalker> child = walker->getChild(i);
  140. getXmlParseTree(s, child, indent+1);
  141. }
  142. s.pad(indent);
  143. }
  144. else
  145. getElementText(s, walker);
  146. s.append("</").append(lname).append('>').newline();
  147. }
  148. }
  149. }
  150. }//namespace
  151. using namespace nsParselib;
  152. PARSELIB_API void setPluginContext(IPluginContext * _ctx) { parentCtx = _ctx; }
  153. PARSELIB_API void plGetXmlParseTree(IMatchWalker * walker, unsigned & len, char * & text)
  154. {
  155. StringBuffer s;
  156. getXmlParseTree(s, walker, 0);
  157. len = s.length();
  158. text = s.detach();
  159. }
  160. PARSELIB_API void plGetDefaultParseTree(IMatchWalker * walker, unsigned & len, char * & text)
  161. {
  162. StringBuffer s;
  163. getDefaultParseTree(s, walker);
  164. len = s.length();
  165. text = s.detach();
  166. }
  167. //-------------------------------------------------------------------------------------------------------------------------------------------