thorxmlread.cpp 75 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "platform.h"
  14. #include <algorithm>
  15. #include "jlib.hpp"
  16. #include "jexcept.hpp"
  17. #include "jfile.hpp"
  18. #include "jlog.hpp"
  19. #include "csvsplitter.hpp"
  20. #include "thorherror.h"
  21. #include "thorxmlread.hpp"
  22. #include "thorcommon.ipp"
  23. #include "eclrtl.hpp"
  24. #include "jptree.ipp"
  25. #define XMLTAG_CONTENT "<>"
  26. //=====================================================================================================
  27. XmlColumnIterator::XmlColumnIterator(IPropertyTreeIterator * _iter) : iter(_iter)
  28. {
  29. }
  30. IColumnProvider * XmlColumnIterator::first()
  31. {
  32. if (!iter->first())
  33. return NULL;
  34. setCurrent();
  35. return cur;
  36. }
  37. IColumnProvider * XmlColumnIterator::next()
  38. {
  39. if (!iter->next())
  40. return NULL;
  41. setCurrent();
  42. return cur;
  43. }
  44. void XmlColumnIterator::setCurrent()
  45. {
  46. Owned<IPropertyTree> curTree = &iter->get();
  47. cur.setown(new XmlDatasetColumnProvider);
  48. cur->setRow(curTree);
  49. }
  50. //=====================================================================================================
  51. static void decodeHexPairs(const char *input, unsigned inputLen, void * outData, unsigned outLen)
  52. {
  53. byte * tgt = (byte *)outData;
  54. while (inputLen >= 2)
  55. {
  56. if (outLen-- == 0)
  57. return;
  58. byte high = hex2num(*input++);
  59. *tgt++ = (high << 4) | hex2num(*input++);
  60. inputLen -= 2;
  61. }
  62. if (outLen)
  63. memset(outData, 0, outLen);
  64. }
  65. static void decodeHexPairsX(const char *input, unsigned inputLen, void *&outData, unsigned &outLen)
  66. {
  67. if (inputLen<2)
  68. {
  69. outLen = 0;
  70. outData = NULL;
  71. return;
  72. }
  73. outLen = inputLen/2;
  74. outData = malloc(outLen);
  75. char *tgt = (char *)outData;
  76. for (;;)
  77. {
  78. byte high = hex2num(*input++);
  79. *tgt++ = (high << 4) | hex2num(*input++);
  80. inputLen -= 2;
  81. if (inputLen<2) break;
  82. }
  83. }
  84. //=====================================================================================================
  85. bool XmlDatasetColumnProvider::getBool(const char * name)
  86. {
  87. return row->getPropBool(name, 0);
  88. }
  89. __int64 XmlDatasetColumnProvider::getInt(const char * name)
  90. {
  91. return row->getPropInt64(name, 0);
  92. }
  93. __uint64 XmlDatasetColumnProvider::getUInt(const char * name)
  94. {
  95. return readUInt(name, 0);
  96. }
  97. void XmlDatasetColumnProvider::getData(size32_t len, void * target, const char * name)
  98. {
  99. const char *hexPairSequence = row->queryProp(name);
  100. if (!hexPairSequence)
  101. memset(target, 0, len);
  102. else
  103. decodeHexPairs(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  104. }
  105. void XmlDatasetColumnProvider::getDataX(size32_t & len, void * & target, const char * name)
  106. {
  107. const char *hexPairSequence = row->queryProp(name);
  108. if (!hexPairSequence)
  109. {
  110. len = 0;
  111. target = NULL;
  112. return;
  113. }
  114. decodeHexPairsX(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  115. }
  116. void XmlDatasetColumnProvider::getDataRaw(size32_t len, void * target, const char * name)
  117. {
  118. const char *hexPairSequence = row->queryProp(name);
  119. if (!hexPairSequence)
  120. memset(target, 0, len);
  121. else
  122. {
  123. size32_t dLen = (size32_t)strlen(hexPairSequence);
  124. memcpy(target, hexPairSequence, dLen);
  125. if (dLen < len)
  126. memset((byte*)target+dLen, 0, len - dLen);
  127. }
  128. }
  129. void XmlDatasetColumnProvider::getDataRawX(size32_t & len, void * & target, const char * name)
  130. {
  131. const char *hexPairSequence = row->queryProp(name);
  132. if (!hexPairSequence)
  133. {
  134. len = 0;
  135. target = NULL;
  136. return;
  137. }
  138. len = (size32_t)strlen(hexPairSequence);
  139. target = malloc(len);
  140. memcpy(target, hexPairSequence, len);
  141. }
  142. void XmlDatasetColumnProvider::getQString(size32_t len, char * target, const char * name)
  143. {
  144. // You could argue that it should convert from UTF8 to ascii first but it's a no-op for any char that QString supports, and it's ok to be undefined for any char that it doesn't
  145. const char * value = row->queryProp(name);
  146. size32_t lenValue = value ? (size32_t)strlen(value) : 0;
  147. rtlStrToQStr(len, target, lenValue, value);
  148. }
  149. void XmlDatasetColumnProvider::getString(size32_t len, char * target, const char * name)
  150. {
  151. const char * value = row->queryProp(name);
  152. size32_t utf8bytes = value ? (size32_t)strlen(value) : 0;
  153. if (utf8bytes)
  154. rtlUtf8ToStr(len, target, rtlUtf8Length(utf8bytes, value), value);
  155. else
  156. memset(target, ' ', len);
  157. }
  158. void XmlDatasetColumnProvider::getStringX(size32_t & len, char * & target, const char * name)
  159. {
  160. const char * value = row->queryProp(name);
  161. size32_t utf8bytes = value ? (size32_t)strlen(value) : 0;
  162. if (utf8bytes)
  163. rtlUtf8ToStrX(len, target, rtlUtf8Length(utf8bytes, value), value);
  164. else
  165. {
  166. len = 0;
  167. target = NULL;
  168. }
  169. }
  170. void XmlDatasetColumnProvider::getUnicodeX(size32_t & len, UChar * & target, const char * name)
  171. {
  172. const char * text = row->queryProp(name);
  173. if (text)
  174. rtlCodepageToUnicodeX(len, target, (size32_t)strlen(text), text, "utf-8");
  175. else
  176. {
  177. len = 0;
  178. target = NULL;
  179. }
  180. }
  181. void XmlDatasetColumnProvider::getUtf8X(size32_t & len, char * & target, const char * path)
  182. {
  183. const char * value = row->queryProp(path);
  184. size32_t size = value ? (size32_t)strlen(value) : 0;
  185. target = (char *)malloc(size);
  186. memcpy_iflen(target, value, size);
  187. len = rtlUtf8Length(size, target);
  188. }
  189. bool XmlDatasetColumnProvider::getIsSetAll(const char * path)
  190. {
  191. StringBuffer fullpath;
  192. fullpath.append(path).append("/All");
  193. return row->hasProp(fullpath.str());
  194. }
  195. IColumnProviderIterator * XmlDatasetColumnProvider::getChildIterator(const char * path)
  196. {
  197. return new XmlColumnIterator(row->getElements(path));
  198. }
  199. bool XmlDatasetColumnProvider::readBool(const char * path, bool _default)
  200. {
  201. return row->getPropBool(path, _default);
  202. }
  203. void XmlDatasetColumnProvider::readData(size32_t len, void * target, const char * path, size32_t _lenDefault, const void * _default)
  204. {
  205. const char *hexPairSequence = row->queryProp(path);
  206. if (hexPairSequence)
  207. decodeHexPairs(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  208. else
  209. rtlDataToData(len, target, _lenDefault, _default);
  210. }
  211. void XmlDatasetColumnProvider::readDataX(size32_t & len, void * & target, const char * path, size32_t _lenDefault, const void * _default)
  212. {
  213. const char *hexPairSequence = row->queryProp(path);
  214. if (hexPairSequence)
  215. decodeHexPairsX(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  216. else
  217. rtlStrToDataX(len, target, _lenDefault, _default);
  218. }
  219. void XmlDatasetColumnProvider::readDataRaw(size32_t len, void * target, const char * path, size32_t _lenDefault, const void * _default)
  220. {
  221. rtlDataToData(len, target, _lenDefault, _default);
  222. }
  223. void XmlDatasetColumnProvider::readDataRawX(size32_t & len, void * & target, const char * path, size32_t _lenDefault, const void * _default)
  224. {
  225. rtlStrToDataX(len, target, _lenDefault, _default);
  226. }
  227. __int64 XmlDatasetColumnProvider::readInt(const char * path, __int64 _default)
  228. {
  229. return row->getPropInt64(path, _default);
  230. }
  231. __uint64 XmlDatasetColumnProvider::readUInt(const char * path, __uint64 _default)
  232. {
  233. const char *val = row->queryProp(path);
  234. if (val && *val)
  235. return strtoull(val, nullptr, 10);
  236. else
  237. return _default;
  238. }
  239. void XmlDatasetColumnProvider::readQString(size32_t len, char * target, const char * path, size32_t _lenDefault, const char * _default)
  240. {
  241. const char * value = row->queryProp(path);
  242. if (value)
  243. rtlStrToQStr(len, target, (size32_t)strlen(value), value); // more: could process utf8, but characters would be lost anyway. At worse will mean extra blanks.
  244. else
  245. rtlQStrToQStr(len, target, _lenDefault, _default);
  246. }
  247. void XmlDatasetColumnProvider::readString(size32_t len, char * target, const char * path, size32_t _lenDefault, const char * _default)
  248. {
  249. const char * value = row->queryProp(path);
  250. if (value)
  251. rtlUtf8ToStr(len, target, rtlUtf8Length((size32_t)strlen(value), value), value);
  252. else
  253. rtlStrToStr(len, target, _lenDefault, _default);
  254. }
  255. void XmlDatasetColumnProvider::readStringX(size32_t & len, char * & target, const char * path, size32_t _lenDefault, const char * _default)
  256. {
  257. const char * value = row->queryProp(path);
  258. if (value)
  259. rtlUtf8ToStrX(len, target, rtlUtf8Length((size32_t)strlen(value), value), value);
  260. else
  261. rtlStrToStrX(len, target, _lenDefault, _default);
  262. }
  263. void XmlDatasetColumnProvider::readUnicodeX(size32_t & len, UChar * & target, const char * path, size32_t _lenDefault, const UChar * _default)
  264. {
  265. const char * text = row->queryProp(path);
  266. if (text)
  267. rtlCodepageToUnicodeX(len, target, (size32_t)strlen(text), text, "utf-8");
  268. else
  269. rtlUnicodeToUnicodeX(len, target, _lenDefault, _default);
  270. }
  271. bool XmlDatasetColumnProvider::readIsSetAll(const char * path, bool _default)
  272. {
  273. if (row->hasProp(path))
  274. return getIsSetAll(path);
  275. return _default;
  276. }
  277. void XmlDatasetColumnProvider::readUtf8X(size32_t & len, char * & target, const char * path, size32_t _lenDefault, const char * _default)
  278. {
  279. const char * value = row->queryProp(path);
  280. if (value)
  281. rtlUtf8ToUtf8X(len, target, rtlUtf8Length((size32_t)strlen(value), value), value);
  282. else
  283. rtlUtf8ToUtf8X(len, target, _lenDefault, _default);
  284. }
  285. const char *XmlDatasetColumnProvider::readRaw(const char * path, size32_t &sz) const
  286. {
  287. const char *value = row->queryProp(path);
  288. sz = value ? strlen(value) : 0;
  289. return value;
  290. }
  291. //=====================================================================================================
  292. bool XmlSetColumnProvider::getBool(const char * name)
  293. {
  294. #ifdef _DEBUG
  295. assertex(stricmp(name, "value")==0);
  296. #endif
  297. return row->getPropBool(NULL, 0);
  298. }
  299. __int64 XmlSetColumnProvider::getInt(const char * name)
  300. {
  301. #ifdef _DEBUG
  302. assertex(stricmp(name, "value")==0);
  303. #endif
  304. return row->getPropInt64(NULL, 0);
  305. }
  306. __uint64 XmlSetColumnProvider::getUInt(const char * name)
  307. {
  308. #ifdef _DEBUG
  309. assertex(stricmp(name, "value")==0);
  310. #endif
  311. //MORE: Note nullptr is passed in all of these XmlSetColumnProvider::get functions
  312. //The code generator incorrectly generates "value" as the name to read. Really it should be fixed there.
  313. return readUInt(nullptr, 0);
  314. }
  315. void XmlSetColumnProvider::getData(size32_t len, void * target, const char * name)
  316. {
  317. #ifdef _DEBUG
  318. assertex(stricmp(name, "value")==0);
  319. #endif
  320. const char *hexPairSequence = row->queryProp(NULL);
  321. if (!hexPairSequence)
  322. memset(target, 0, len);
  323. else
  324. decodeHexPairs(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  325. }
  326. void XmlSetColumnProvider::getDataX(size32_t & len, void * & target, const char * name)
  327. {
  328. #ifdef _DEBUG
  329. assertex(stricmp(name, "value")==0);
  330. #endif
  331. const char *hexPairSequence = row->queryProp(NULL);
  332. if (!hexPairSequence)
  333. {
  334. len = 0;
  335. target = NULL;
  336. return;
  337. }
  338. decodeHexPairsX(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  339. }
  340. void XmlSetColumnProvider::getDataRaw(size32_t len, void * target, const char * name)
  341. {
  342. #ifdef _DEBUG
  343. assertex(stricmp(name, "value")==0);
  344. #endif
  345. const char *hexPairSequence = row->queryProp(NULL);
  346. if (!hexPairSequence)
  347. memset(target, 0, len);
  348. else
  349. {
  350. size32_t dLen = strlen(hexPairSequence);
  351. memcpy(target, hexPairSequence, dLen);
  352. if (dLen < len)
  353. memset((byte*)target+dLen, 0, len - dLen);
  354. }
  355. }
  356. void XmlSetColumnProvider::getDataRawX(size32_t & len, void * & target, const char * name)
  357. {
  358. #ifdef _DEBUG
  359. assertex(stricmp(name, "value")==0);
  360. #endif
  361. const char *hexPairSequence = row->queryProp(NULL);
  362. if (!hexPairSequence)
  363. {
  364. len = 0;
  365. target = NULL;
  366. return;
  367. }
  368. len = (size32_t)strlen(hexPairSequence);
  369. target = malloc(len);
  370. memcpy(target, hexPairSequence, len);
  371. }
  372. void XmlSetColumnProvider::getQString(size32_t len, char * target, const char * name)
  373. {
  374. #ifdef _DEBUG
  375. assertex(stricmp(name, "value")==0);
  376. #endif
  377. const char * value = row->queryProp(NULL);
  378. unsigned lenValue = value ? (size32_t)strlen(value) : 0;
  379. rtlStrToQStr(len, target, lenValue, value);
  380. }
  381. void XmlSetColumnProvider::getString(size32_t len, char * target, const char * name)
  382. {
  383. #ifdef _DEBUG
  384. assertex(stricmp(name, "value")==0);
  385. #endif
  386. const char * value = row->queryProp(NULL);
  387. if (value)
  388. rtlVStrToStr(len, target, value);
  389. else
  390. memset(target, ' ', len);
  391. }
  392. void XmlSetColumnProvider::getStringX(size32_t & len, char * & target, const char * name)
  393. {
  394. #ifdef _DEBUG
  395. assertex(stricmp(name, "value")==0);
  396. #endif
  397. const char * value = row->queryProp(NULL);
  398. len = value ? (size32_t)strlen(value) : 0;
  399. target = (char *)malloc(len);
  400. memcpy_iflen(target, value, len);
  401. //MORE: utf8->ascii?
  402. }
  403. void XmlSetColumnProvider::getUnicodeX(size32_t & len, UChar * & target, const char * name)
  404. {
  405. #ifdef _DEBUG
  406. assertex(stricmp(name, "value")==0);
  407. #endif
  408. const char * text = row->queryProp(NULL);
  409. if (text)
  410. rtlCodepageToUnicodeX(len, target, (size32_t)strlen(text), text, "utf-8");
  411. else
  412. {
  413. len = 0;
  414. target = NULL;
  415. }
  416. }
  417. void XmlSetColumnProvider::getUtf8X(size32_t & len, char * & target, const char * name)
  418. {
  419. #ifdef _DEBUG
  420. assertex(stricmp(name, "value")==0);
  421. #endif
  422. const char * value = row->queryProp(NULL);
  423. size32_t size = value ? (size32_t)strlen(value) : 0;
  424. target = (char *)malloc(size);
  425. memcpy_iflen(target, value, size);
  426. len = rtlUtf8Length(size, value);
  427. }
  428. bool XmlSetColumnProvider::getIsSetAll(const char * path)
  429. {
  430. UNIMPLEMENTED;
  431. StringBuffer fullpath;
  432. fullpath.append(path).append("/All");
  433. return row->hasProp(fullpath.str());
  434. }
  435. IColumnProviderIterator * XmlSetColumnProvider::getChildIterator(const char * path)
  436. {
  437. UNIMPLEMENTED;
  438. return new XmlColumnIterator(row->getElements(path));
  439. }
  440. bool XmlSetColumnProvider::readBool(const char * path, bool _default)
  441. {
  442. return row->getPropBool(NULL, _default);
  443. }
  444. void XmlSetColumnProvider::readData(size32_t len, void * target, const char * path, size32_t _lenDefault, const void * _default)
  445. {
  446. const char *hexPairSequence = row->queryProp(NULL);
  447. if (hexPairSequence)
  448. decodeHexPairs(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  449. else
  450. rtlDataToData(len, target, _lenDefault, _default);
  451. }
  452. void XmlSetColumnProvider::readDataX(size32_t & len, void * & target, const char * path, size32_t _lenDefault, const void * _default)
  453. {
  454. const char *hexPairSequence = row->queryProp(NULL);
  455. if (hexPairSequence)
  456. decodeHexPairsX(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  457. else
  458. rtlStrToDataX(len, target, _lenDefault, _default);
  459. }
  460. void XmlSetColumnProvider::readDataRaw(size32_t len, void * target, const char * path, size32_t _lenDefault, const void * _default)
  461. {
  462. rtlDataToData(len, target, _lenDefault, _default);
  463. }
  464. void XmlSetColumnProvider::readDataRawX(size32_t & len, void * & target, const char * path, size32_t _lenDefault, const void * _default)
  465. {
  466. rtlDataToData(len, target, _lenDefault, _default);
  467. }
  468. __int64 XmlSetColumnProvider::readInt(const char * path, __int64 _default)
  469. {
  470. return row->getPropInt64(NULL, _default);
  471. }
  472. __uint64 XmlSetColumnProvider::readUInt(const char * path, __uint64 _default)
  473. {
  474. const char *val = row->queryProp(path);
  475. if (val && *val)
  476. return strtoull(val, nullptr, 10);
  477. else
  478. return _default;
  479. }
  480. void XmlSetColumnProvider::readQString(size32_t len, char * target, const char * path, size32_t _lenDefault, const char * _default)
  481. {
  482. const char * value = row->queryProp(NULL);
  483. if (value)
  484. rtlStrToQStr(len, target, (size32_t)strlen(value), value); // more: could process utf8, but characters would be lost anyway. At worse will mean extra blanks.
  485. else
  486. rtlQStrToQStr(len, target, _lenDefault, _default);
  487. }
  488. void XmlSetColumnProvider::readString(size32_t len, char * target, const char * path, size32_t _lenDefault, const char * _default)
  489. {
  490. const char * value = row->queryProp(NULL);
  491. if (value)
  492. rtlUtf8ToStr(len, target, rtlUtf8Length((size32_t)strlen(value), value), value);
  493. else
  494. rtlStrToStr(len, target, _lenDefault, _default);
  495. }
  496. void XmlSetColumnProvider::readStringX(size32_t & len, char * & target, const char * path, size32_t _lenDefault, const char * _default)
  497. {
  498. const char * value = row->queryProp(NULL);
  499. if (value)
  500. rtlUtf8ToStrX(len, target, rtlUtf8Length((size32_t)strlen(value), value), value);
  501. else
  502. rtlStrToStrX(len, target, _lenDefault, _default);
  503. }
  504. void XmlSetColumnProvider::readUnicodeX(size32_t & len, UChar * & target, const char * path, size32_t _lenDefault, const UChar * _default)
  505. {
  506. const char * text = row->queryProp(NULL);
  507. if (text)
  508. rtlCodepageToUnicodeX(len, target, (size32_t)strlen(text), text, "utf-8");
  509. else
  510. rtlUnicodeToUnicodeX(len, target, _lenDefault, _default);
  511. }
  512. bool XmlSetColumnProvider::readIsSetAll(const char * path, bool _default)
  513. {
  514. throwUnexpected();
  515. if (row->hasProp(NULL))
  516. return getIsSetAll(path);
  517. return _default;
  518. }
  519. void XmlSetColumnProvider::readUtf8X(size32_t & len, char * & target, const char * path, size32_t _lenDefault, const char * _default)
  520. {
  521. const char * value = row->queryProp(NULL);
  522. if (value)
  523. rtlUtf8ToUtf8X(len, target, rtlUtf8Length((size32_t)strlen(value), value), value);
  524. else
  525. rtlUtf8ToUtf8X(len, target, _lenDefault, _default);
  526. }
  527. IDataVal & CXmlToRawTransformer::transform(IDataVal & result, size32_t len, const void * text, bool isDataSet)
  528. {
  529. // MORE - should redo using a pull parser sometime
  530. Owned<IPropertyTree> root = createPTreeFromXMLString(len, (const char *)text, ipt_fast, xmlReadFlags);
  531. return transformTree(result, *root, isDataSet);
  532. }
  533. IDataVal & CXmlToRawTransformer::transformTree(IDataVal & result, IPropertyTree &root, bool isDataSet)
  534. {
  535. unsigned minRecordSize = rowTransformer->queryRecordSize()->getMinRecordSize();
  536. Owned <XmlColumnProvider> columns;
  537. Owned<IPropertyTreeIterator> rows;
  538. StringBuffer decodedXML;
  539. Owned<IPropertyTree> decodedTree;
  540. MemoryBuffer raw;
  541. size32_t curLength = 0;
  542. if (isDataSet)
  543. {
  544. columns.setown(new XmlDatasetColumnProvider);
  545. if (root.hasProp("Row"))
  546. rows.setown(root.getElements("Row"));
  547. else
  548. {
  549. // HACK for Gordon to work around WSDL issues
  550. const char *body = root.queryProp(NULL);
  551. if (body)
  552. {
  553. while(isspace(*body))
  554. body++;
  555. if (strncmp(body, "<Row", 4)==0)
  556. {
  557. try
  558. {
  559. decodedXML.append("<root>").append(body).append("</root>");
  560. decodedTree.setown(createPTreeFromXMLString(decodedXML.str(), ipt_caseInsensitive|ipt_fast));
  561. rows.setown(decodedTree->getElements("Row"));
  562. }
  563. catch (IException *E)
  564. {
  565. EXCLOG(E);
  566. E->Release();
  567. }
  568. catch (...)
  569. {
  570. ERRLOG(0, "Unexpected exception decoding XML for dataset");
  571. }
  572. }
  573. }
  574. }
  575. }
  576. else
  577. {
  578. columns.setown(new XmlSetColumnProvider);
  579. rows.setown(root.getElements("string"));
  580. ForEach(*rows)
  581. {
  582. columns->setRow(&rows->query());
  583. NullDiskCallback dummyCallback;
  584. MemoryBufferBuilder rowBuilder(raw, minRecordSize);
  585. size32_t thisSize = rowTransformer->transform(rowBuilder, columns, &dummyCallback);
  586. curLength += thisSize;
  587. rowBuilder.finishRow(thisSize);
  588. }
  589. rows.setown(root.getElements("Item"));
  590. }
  591. if (rows)
  592. {
  593. ForEach(*rows)
  594. {
  595. columns->setRow(&rows->query());
  596. NullDiskCallback dummyCallback;
  597. MemoryBufferBuilder rowBuilder(raw, minRecordSize);
  598. size32_t thisSize = rowTransformer->transform(rowBuilder, columns, &dummyCallback);
  599. curLength += thisSize;
  600. rowBuilder.finishRow(thisSize);
  601. }
  602. }
  603. result.setLen(raw.toByteArray(), curLength);
  604. return result;
  605. }
  606. size32_t createRowFromXml(ARowBuilder & rowBuilder, size32_t size, const char * utf8, IXmlToRowTransformer * xmlTransformer, bool stripWhitespace)
  607. {
  608. Owned<IPropertyTree> root = createPTreeFromXMLString(size, utf8, ipt_fast, stripWhitespace ? ptr_ignoreWhiteSpace : ptr_none);
  609. if (!root)
  610. {
  611. throwError(THORCERR_InvalidXmlFromXml);
  612. return 0;
  613. }
  614. Owned <XmlColumnProvider> columns = new XmlDatasetColumnProvider;
  615. columns->setRow(root);
  616. NullDiskCallback dummyCallback;
  617. return xmlTransformer->transform(rowBuilder, columns, &dummyCallback);
  618. }
  619. const void * createRowFromXml(IEngineRowAllocator * rowAllocator, size32_t len, const char * utf8, IXmlToRowTransformer * xmlTransformer, bool stripWhitespace)
  620. {
  621. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  622. size32_t newSize = createRowFromXml(rowBuilder, rtlUtf8Size(len, utf8), utf8, xmlTransformer, stripWhitespace);
  623. return rowBuilder.finalizeRowClear(newSize);
  624. }
  625. size32_t createRowFromJson(ARowBuilder & rowBuilder, size32_t size, const char * utf8, IXmlToRowTransformer * xmlTransformer, bool stripWhitespace)
  626. {
  627. Owned<IPropertyTree> root = createPTreeFromJSONString(size, utf8, ipt_fast, stripWhitespace ? ptr_ignoreWhiteSpace : ptr_none);
  628. if (!root)
  629. {
  630. throwError(THORCERR_InvalidJsonFromJson);
  631. return 0;
  632. }
  633. Owned <XmlColumnProvider> columns = new XmlDatasetColumnProvider;
  634. columns->setRow(root);
  635. NullDiskCallback dummyCallback;
  636. return xmlTransformer->transform(rowBuilder, columns, &dummyCallback);
  637. }
  638. const void * createRowFromJson(IEngineRowAllocator * rowAllocator, size32_t len, const char * utf8, IXmlToRowTransformer * xmlTransformer, bool stripWhitespace)
  639. {
  640. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  641. size32_t newSize = createRowFromJson(rowBuilder, rtlUtf8Size(len, utf8), utf8, xmlTransformer, stripWhitespace);
  642. return rowBuilder.finalizeRowClear(newSize);
  643. }
  644. //=====================================================================================================
  645. IDataVal & CCsvToRawTransformer::transform(IDataVal & result, size32_t len, const void * text, bool isDataSet)
  646. {
  647. CSVSplitter csvSplitter;
  648. csvSplitter.init(rowTransformer->getMaxColumns(), rowTransformer->queryCsvParameters(), NULL, NULL, NULL, NULL);
  649. size32_t minRecordSize = rowTransformer->queryRecordSize()->getMinRecordSize();
  650. const byte *finger = (const byte *) text;
  651. MemoryBuffer raw;
  652. size32_t curLength = 0;
  653. while (len)
  654. {
  655. unsigned thisLineLength = csvSplitter.splitLine(len, finger);
  656. finger += thisLineLength;
  657. len -= thisLineLength;
  658. MemoryBufferBuilder rowBuilder(raw, minRecordSize);
  659. unsigned thisSize = rowTransformer->transform(rowBuilder, csvSplitter.queryLengths(), (const char * *)csvSplitter.queryData(), 0);
  660. curLength += thisSize;
  661. rowBuilder.finishRow(thisSize);
  662. }
  663. result.setLen(raw.toByteArray(), curLength);
  664. return result;
  665. }
  666. //=====================================================================================================
  667. extern thorhelper_decl IXmlToRawTransformer * createXmlRawTransformer(IXmlToRowTransformer * xmlTransformer, PTreeReaderOptions xmlReadFlags)
  668. {
  669. if (xmlTransformer)
  670. return new CXmlToRawTransformer(*xmlTransformer, xmlReadFlags);
  671. return NULL;
  672. }
  673. extern thorhelper_decl ICsvToRawTransformer * createCsvRawTransformer(ICsvToRowTransformer * csvTransformer)
  674. {
  675. if (csvTransformer)
  676. return new CCsvToRawTransformer(*csvTransformer);
  677. return NULL;
  678. }
  679. bool isContentXPath(const char *xpath, StringBuffer &head)
  680. {
  681. if (xpath)
  682. {
  683. unsigned l = (size32_t)strlen(xpath);
  684. if (l >= 2)
  685. {
  686. const char *x = xpath+l-2;
  687. if ((x[0] == '<') && (x[1] == '>'))
  688. {
  689. head.append((size32_t)(x-xpath), xpath);
  690. return true;
  691. }
  692. }
  693. }
  694. return false;
  695. }
  696. class CXPath
  697. {
  698. int topQualifier;
  699. BoolArray simpleQualifier;
  700. StringArray nodes, qualifierStack;
  701. StringAttr xpathstr;
  702. bool testForSimpleQualifier(const char *qualifier)
  703. {
  704. // first char always '['
  705. return ('@' == qualifier[1]);
  706. }
  707. public:
  708. CXPath(const char *path, bool ignoreNameSpaces)
  709. {
  710. topQualifier = -1;
  711. if (!path) return;
  712. xpathstr.set(path);
  713. if (path && '/'==*path)
  714. {
  715. if ('/' == *(path+1))
  716. throw MakeStringException(0, "// unsupported here");
  717. path++;
  718. }
  719. for (;;)
  720. {
  721. const char *startQ = strchr(path, '[');
  722. const char *nextSep;
  723. for (;;)
  724. {
  725. nextSep = strchr(path, '/');
  726. if (startQ && (!nextSep || startQ < nextSep))
  727. break;
  728. StringAttr node;
  729. unsigned l = nextSep ? (size32_t)(nextSep-path) : (size32_t)strlen(path);
  730. if (!l) break;
  731. if (ignoreNameSpaces)
  732. {
  733. const char *colon = path;
  734. const char *end = path+l+1;
  735. do
  736. {
  737. if (':' == *colon++)
  738. {
  739. l -= colon-path;
  740. path = colon;
  741. break;
  742. }
  743. }
  744. while (colon != end);
  745. }
  746. StringBuffer wildRemoved;
  747. node.set(path, l);
  748. const char *c = node.get();
  749. while (*c) { if ('*' != *c) wildRemoved.append(*c); c++; }
  750. if (wildRemoved.length() && !validateXMLTag(wildRemoved.str()))
  751. throw MakeStringException(0, "Invalid node syntax %s in path %s", node.get(), path);
  752. nodes.append(node);
  753. qualifierStack.append(""); // no qualifier for this segment.
  754. simpleQualifier.append(true); // not used
  755. if (!nextSep) break;
  756. path = nextSep+1;
  757. }
  758. if (!nextSep && !startQ)
  759. break;
  760. const char *endQ = strchr(startQ, ']'); // escaped '[]' chars??
  761. assertex(endQ);
  762. unsigned l=startQ-path;
  763. if (ignoreNameSpaces)
  764. {
  765. const char *colon = path;
  766. const char *end = path+l+1;
  767. do
  768. {
  769. if (':' == *colon++)
  770. {
  771. l -= colon-path;
  772. path = colon;
  773. break;
  774. }
  775. }
  776. while (colon != end);
  777. }
  778. StringAttr node(path, l);
  779. nodes.append(node);
  780. StringAttr qualifier(startQ, endQ-startQ+1);
  781. qualifierStack.append(qualifier);
  782. bool simple = testForSimpleQualifier(qualifier);
  783. simpleQualifier.append(simple);
  784. if (-1 == topQualifier && !simple) topQualifier = qualifierStack.ordinality()-1;
  785. path = nextSep+1;
  786. if (!nextSep) break;
  787. }
  788. }
  789. bool toQualify(unsigned which, bool simple)
  790. {
  791. return (which < queryDepth() && *qualifierStack.item(which) && simple==querySimpleQualifier(which));
  792. }
  793. inline unsigned queryDepth()
  794. {
  795. return nodes.ordinality();
  796. }
  797. inline const char *queryNode(unsigned which)
  798. {
  799. return nodes.item(which);
  800. }
  801. inline bool querySimpleQualifier(unsigned which)
  802. {
  803. return simpleQualifier.item(which);
  804. }
  805. bool match(unsigned level, const char *tag)
  806. {
  807. const char *nodeTag = queryNode(level);
  808. if (strchr(nodeTag, '*'))
  809. return WildMatch(tag, strlen(tag), nodeTag, strlen(nodeTag), false);
  810. else
  811. return (0 == strcmp(nodeTag, tag));
  812. }
  813. bool qualify(IPropertyTree &tree, unsigned depth)
  814. {
  815. const char *qualifier = qualifierStack.item(depth);
  816. if (qualifier && '\0' != *qualifier)
  817. {
  818. const char *q = qualifier;
  819. bool numeric = true;
  820. for (;;)
  821. {
  822. if ('\0' == *q) break;
  823. else if (!isdigit(*q)) { numeric = false; break; }
  824. else q++;
  825. }
  826. if (numeric) throw MakeStringException(0, "Unsupported index qualifier: %s", qualifier);
  827. Owned<IPropertyTreeIterator> matchIter = tree.getElements(qualifier);
  828. if (!matchIter->first())
  829. return false;
  830. }
  831. return true;
  832. }
  833. inline int queryHighestQualifier() { return topQualifier; }
  834. const char *queryXPathStr() { return xpathstr; }
  835. };
  836. class CProspectiveMatch : public CInterface
  837. {
  838. public:
  839. CProspectiveMatch(IPropertyTree *_parent, IPropertyTree *_node, MemoryBuffer *_content=NULL) : parent(_parent), node(_node), content(_content) { }
  840. ~CProspectiveMatch() { if (content) delete content; }
  841. IPropertyTree *parent, *node;
  842. MemoryBuffer *content;
  843. };
  844. typedef CIArrayOf<CProspectiveMatch> CProcespectiveMatchArray;
  845. class CParseStackInfo : public CInterface
  846. {
  847. public:
  848. CParseStackInfo() : keep(false), nodeMade(false), keptForQualifier(false), iPTMade(NULL), startOffset(0), prospectiveMatches(NULL) { }
  849. ~CParseStackInfo()
  850. {
  851. if (prospectiveMatches)
  852. delete prospectiveMatches;
  853. }
  854. inline void reset()
  855. {
  856. keep = nodeMade = keptForQualifier = false;
  857. startOffset = 0;
  858. if (prospectiveMatches)
  859. prospectiveMatches->kill();
  860. iPTMade = NULL;
  861. }
  862. bool keep, nodeMade, keptForQualifier;
  863. offset_t startOffset;
  864. IPropertyTree *iPTMade;
  865. CProcespectiveMatchArray *prospectiveMatches;
  866. };
  867. class CMarkReadBase : public CInterface
  868. {
  869. public:
  870. virtual void reset() = 0;
  871. virtual void mark(offset_t offset) = 0;
  872. virtual void getMarkTo(offset_t offset, MemoryBuffer &mb) = 0;
  873. virtual void closeMark() = 0;
  874. };
  875. class CMarkRead : public CMarkReadBase
  876. {
  877. const void *buffer;
  878. offset_t startOffset;
  879. unsigned bufLen;
  880. bool marking;
  881. public:
  882. CMarkRead(const void *_buffer, unsigned _bufLen) : buffer(_buffer), bufLen(_bufLen)
  883. {
  884. reset();
  885. }
  886. virtual void reset()
  887. {
  888. marking = false;
  889. startOffset = 0;
  890. }
  891. virtual void mark(offset_t offset)
  892. {
  893. assertex(!marking);
  894. marking = true;
  895. if (offset >= bufLen)
  896. throw MakeStringException(0, "start offset past end of input string");
  897. startOffset = offset;
  898. }
  899. virtual void getMarkTo(offset_t offset, MemoryBuffer &mb)
  900. {
  901. assertex(marking);
  902. marking = true;
  903. if (offset < startOffset)
  904. throw MakeStringException(0, "end offset proceeds start offset");
  905. if (offset > bufLen)
  906. throw MakeStringException(0, "end offset past end of input string");
  907. mb.append((size32_t)(offset-startOffset), ((char*)buffer)+startOffset);
  908. marking = false;
  909. }
  910. virtual void closeMark()
  911. {
  912. marking = false;
  913. }
  914. };
  915. class CMarkReadStream : implements ISimpleReadStream, public CMarkReadBase
  916. {
  917. ISimpleReadStream &stream;
  918. offset_t readOffset, markingOffset;
  919. byte *buf, *bufPtr, *bufOther, *bufLowerHalf, *bufUpperHalf;
  920. size32_t remaining, bufSize;
  921. MemoryBuffer markBuffer;
  922. bool marking;
  923. public:
  924. IMPLEMENT_IINTERFACE;
  925. CMarkReadStream(ISimpleReadStream &_stream) : stream(_stream), readOffset(0)
  926. {
  927. bufSize = 0x8000/2;
  928. bufLowerHalf = buf = (byte *)malloc(bufSize*2);
  929. bufUpperHalf = bufLowerHalf+bufSize;
  930. reset();
  931. }
  932. ~CMarkReadStream()
  933. {
  934. free(bufLowerHalf); // pointer to whole buf in fact
  935. stream.Release();
  936. }
  937. virtual void reset()
  938. {
  939. remaining = 0;
  940. buf = bufPtr = bufLowerHalf;
  941. bufOther = NULL;
  942. readOffset = markingOffset = 0;
  943. marking = false;
  944. markBuffer.resetBuffer();
  945. }
  946. virtual void mark(offset_t offset)
  947. {
  948. assertex(!marking);
  949. marking=true;
  950. markingOffset = offset;
  951. offset_t from = readOffset-(bufPtr-buf);
  952. if (offset < from)
  953. {
  954. if (!bufOther)
  955. throw MakeStringException(0, "Not enough buffered to mark!");
  956. from -= bufSize;
  957. if (offset < from)
  958. throw MakeStringException(0, "Not enough buffered to mark!");
  959. size32_t a = (size32_t)(offset-from);
  960. markBuffer.append(bufSize-a, bufOther+a);
  961. }
  962. }
  963. virtual void getMarkTo(offset_t offset, MemoryBuffer &mb)
  964. {
  965. assertex(marking);
  966. size32_t markSize = (size32_t)(offset-markingOffset);
  967. int d = markSize-markBuffer.length();
  968. if (d < 0)
  969. markBuffer.setLength(markSize);
  970. else if (d > 0)
  971. {
  972. offset_t from = readOffset-(bufPtr-buf);
  973. size32_t o = 0;
  974. if (markingOffset>from)
  975. o = (size32_t)(markingOffset-from);
  976. markBuffer.append(d, buf+o);
  977. }
  978. mb.clear();
  979. mb.swapWith(markBuffer);
  980. marking = false;
  981. }
  982. virtual void closeMark()
  983. {
  984. if (marking)
  985. {
  986. markBuffer.clear();
  987. marking = false;
  988. }
  989. }
  990. // ISimpleReadStream
  991. virtual size32_t read(size32_t len, void * data)
  992. {
  993. unsigned r = 0;
  994. if (!remaining)
  995. {
  996. size32_t bufSpace = bufSize-(bufPtr-buf);
  997. if (bufSpace)
  998. {
  999. remaining = stream.read(bufSpace, bufPtr);
  1000. if (remaining)
  1001. {
  1002. bufSpace -= remaining;
  1003. r = std::min(len, remaining);
  1004. memcpy(data, bufPtr, r);
  1005. remaining -= r;
  1006. len -= r;
  1007. bufPtr += r;
  1008. data = (byte *)data + r;
  1009. readOffset += r;
  1010. }
  1011. else
  1012. return 0;
  1013. }
  1014. if (!bufSpace && !remaining)
  1015. {
  1016. if (marking && bufOther)
  1017. {
  1018. offset_t from = readOffset-(bufPtr-buf);
  1019. int d = (int)(markingOffset-from);
  1020. if (d>0)
  1021. markBuffer.append(bufSize-d, buf+d);
  1022. else
  1023. markBuffer.append(bufSize, buf);
  1024. }
  1025. if (buf==bufLowerHalf)
  1026. {
  1027. buf = bufUpperHalf;
  1028. bufOther = bufLowerHalf;
  1029. }
  1030. else
  1031. {
  1032. buf = bufLowerHalf;
  1033. bufOther = bufUpperHalf;
  1034. }
  1035. bufPtr = buf;
  1036. }
  1037. if (!len) return r;
  1038. if (!remaining)
  1039. {
  1040. remaining = stream.read(bufSize, buf);
  1041. if (!remaining)
  1042. return r;
  1043. }
  1044. }
  1045. unsigned r2 = std::min(len, remaining);
  1046. memcpy(data, bufPtr, r2);
  1047. remaining -= r2;
  1048. bufPtr += r2;
  1049. readOffset += r2;
  1050. return r + r2;
  1051. }
  1052. };
  1053. // could contain a IPT, but convenient and efficient to derive impl.
  1054. class CPTreeWithOffsets : public LocalPTree
  1055. {
  1056. public:
  1057. CPTreeWithOffsets(const char *name) : LocalPTree(name) { startOffset = endOffset = 0; }
  1058. offset_t startOffset, endOffset;
  1059. };
  1060. class COffsetNodeCreator : implements IPTreeNodeCreator, public CInterface
  1061. {
  1062. public:
  1063. IMPLEMENT_IINTERFACE;
  1064. COffsetNodeCreator() { }
  1065. virtual IPropertyTree *create(const char *tag) { return new CPTreeWithOffsets(tag); }
  1066. };
  1067. class thorhelper_decl CColumnIterator : implements IColumnProviderIterator, public CInterface
  1068. {
  1069. Linked<IColumnProvider> parent;
  1070. Linked<IPropertyTree> root, matchNode;
  1071. MemoryBuffer * contentMb;
  1072. offset_t contentStartOffset;
  1073. void *utf8Translator;
  1074. Linked<IPropertyTreeIterator> iter;
  1075. Owned<IColumnProvider> cur;
  1076. StringAttr xpath;
  1077. public:
  1078. CColumnIterator(IColumnProvider *_parent, void *_utf8Translator, IPropertyTree *_root, IPropertyTree *_matchNode, IPropertyTreeIterator * _iter, MemoryBuffer *_contentMb, offset_t _contentStartOffset, const char *_xpath) : parent(_parent), root(_root), matchNode(_matchNode), iter(_iter), utf8Translator(_utf8Translator), xpath(_xpath), contentStartOffset(_contentStartOffset) { contentMb = _contentMb; }
  1079. IMPLEMENT_IINTERFACE;
  1080. IColumnProvider * first()
  1081. {
  1082. if (!iter->first())
  1083. return NULL;
  1084. setCurrent();
  1085. return cur;
  1086. }
  1087. IColumnProvider * next()
  1088. {
  1089. if (!iter->next())
  1090. return NULL;
  1091. setCurrent();
  1092. return cur;
  1093. }
  1094. void setCurrent();
  1095. };
  1096. class CColumnProvider : implements IColumnProvider, public CInterface
  1097. {
  1098. Linked<IPropertyTree> root, node;
  1099. MemoryBuffer contentMb;
  1100. bool content;
  1101. offset_t contentStartOffset;
  1102. void *utf8Translator;
  1103. CriticalSection crit;
  1104. MemoryBuffer tmpMb;
  1105. MemoryBuffer sharedResult;
  1106. StringAttr xpath;
  1107. void cnv2Latin1(unsigned length, const void *data, MemoryBuffer &mb)
  1108. {
  1109. void *target = mb.reserveTruncate(length);
  1110. if (length == 0)
  1111. return;
  1112. bool f;
  1113. unsigned rl = rtlCodepageConvert(utf8Translator, length, (char *)target, length, (const char *)data, f);
  1114. if (f)
  1115. {
  1116. StringBuffer errMsg("Failure translating utf-8, matching element '");
  1117. errMsg.append(xpath).append("' data: '");
  1118. if (length>100)
  1119. {
  1120. appendDataAsHex(errMsg, 100, data);
  1121. errMsg.append("<TRUNCATED>");
  1122. }
  1123. else
  1124. appendDataAsHex(errMsg, length, data);
  1125. errMsg.append("'");
  1126. throw MakeStringExceptionDirect(0, errMsg.str());
  1127. } else if (length > rl)
  1128. mb.setLength(rl);
  1129. }
  1130. public:
  1131. IMPLEMENT_IINTERFACE;
  1132. CColumnProvider(void *_utf8Translator, IPropertyTree *_root, IPropertyTree *_node, MemoryBuffer *_contentMb, bool ownContent, offset_t _contentStartOffset, const char *_xpath) : root(_root), node(_node), utf8Translator(_utf8Translator), contentStartOffset(_contentStartOffset), xpath(_xpath)
  1133. {
  1134. if (_contentMb)
  1135. {
  1136. content = true;
  1137. if (ownContent)
  1138. contentMb.swapWith(*_contentMb);
  1139. else
  1140. contentMb.setBuffer(_contentMb->length(), (void *)_contentMb->toByteArray());
  1141. }
  1142. else
  1143. content = false;
  1144. }
  1145. bool contentRequest(const char *path, size32_t &offset, size32_t &length)
  1146. {
  1147. StringBuffer subPath;
  1148. if (isContentXPath(path, subPath))
  1149. {
  1150. assertex(content);
  1151. if (subPath.length())
  1152. {
  1153. if ('/' == *path && '/' != *(path+1))
  1154. throw MakeStringException(0, "Cannot extract xml text from absolute path specification: %s", path);
  1155. CPTreeWithOffsets *subTree = (CPTreeWithOffsets *)node->queryPropTree(subPath.str());
  1156. if (subTree)
  1157. {
  1158. offset = (size32_t)(subTree->startOffset-contentStartOffset);
  1159. length = (size32_t)(subTree->endOffset-subTree->startOffset);
  1160. }
  1161. else
  1162. {
  1163. offset = 0;
  1164. length = 0;
  1165. }
  1166. }
  1167. else
  1168. {
  1169. CPTreeWithOffsets *_node = (CPTreeWithOffsets *)node.get();
  1170. if (contentStartOffset != _node->startOffset)
  1171. { // must be child
  1172. offset = (size32_t)(_node->startOffset-contentStartOffset);
  1173. length = (size32_t)(_node->endOffset-_node->startOffset);
  1174. }
  1175. else
  1176. {
  1177. offset = 0;
  1178. length = contentMb.length();
  1179. }
  1180. }
  1181. return true;
  1182. }
  1183. return false;
  1184. }
  1185. inline bool hasProp(const char * path)
  1186. {
  1187. if (path && '/' == *path && '/' != *(path+1))
  1188. return root->hasProp(path+1);
  1189. else
  1190. return node->hasProp(path);
  1191. }
  1192. inline const char * queryProp(const char * path)
  1193. {
  1194. if (path && '/' == *path && '/' != *(path+1))
  1195. return root->queryProp(path+1);
  1196. else
  1197. return node->queryProp(path);
  1198. }
  1199. inline bool getPropBin(const char * path, MemoryBuffer & mb)
  1200. {
  1201. if (path && '/' == *path && '/' != *(path+1))
  1202. return root->getPropBin(path+1, mb);
  1203. else
  1204. return node->getPropBin(path, mb);
  1205. }
  1206. // IColumnProvider
  1207. void getData(size32_t len, void * data, const char * path)
  1208. {
  1209. readData(len, data, path, 0, NULL);
  1210. }
  1211. void getDataX(size32_t & len, void * & data, const char * path)
  1212. {
  1213. readDataX(len, data, path, 0, NULL);
  1214. }
  1215. void getDataRaw(size32_t len, void * data, const char * path)
  1216. {
  1217. readDataRaw(len, data, path, 0, NULL);
  1218. }
  1219. void getDataRawX(size32_t & len, void * & data, const char * path)
  1220. {
  1221. readDataRawX(len, data, path, 0, NULL);
  1222. }
  1223. bool getBool(const char * path)
  1224. {
  1225. return readBool(path, false);
  1226. }
  1227. __int64 getInt(const char * path)
  1228. {
  1229. return readInt(path, 0);
  1230. }
  1231. __uint64 getUInt(const char * path)
  1232. {
  1233. return readUInt(path, 0);
  1234. }
  1235. void getQString(size32_t len, char * text, const char * path)
  1236. {
  1237. readQString(len, text, path, 0, NULL);
  1238. }
  1239. void getString(size32_t len, char * text, const char * path)
  1240. {
  1241. readString(len, text, path, 0, NULL);
  1242. }
  1243. void getStringX(size32_t & len, char * & text, const char * path)
  1244. {
  1245. readStringX(len, text, path, 0, NULL);
  1246. }
  1247. void getUnicodeX(size32_t & len, UChar * & text, const char * path)
  1248. {
  1249. readUnicodeX(len, text, path, 0, NULL);
  1250. }
  1251. void getUtf8X(size32_t & len, char * & text, const char * path)
  1252. {
  1253. readUtf8X(len, text, path, 0, NULL);
  1254. }
  1255. bool getIsSetAll(const char * path)
  1256. {
  1257. return readIsSetAll(path, false);
  1258. }
  1259. IColumnProviderIterator * getChildIterator(const char * path)
  1260. {
  1261. Owned<IPropertyTreeIterator> iter;
  1262. if (path && '/' == *path && '/' != *(path+1))
  1263. iter.setown(root->getElements(path+1));
  1264. else
  1265. iter.setown(node->getElements(path));
  1266. return new CColumnIterator(this, utf8Translator, root, node, iter, content ? &contentMb : NULL, contentStartOffset, xpath);
  1267. }
  1268. //
  1269. virtual void readData(size32_t len, void * data, const char * path, size32_t _lenDefault, const void * _default)
  1270. {
  1271. CriticalBlock b(crit);
  1272. sharedResult.clear();
  1273. size32_t offset = 0;
  1274. size32_t length = 0;
  1275. if (contentRequest(path, offset, length))
  1276. {
  1277. cnv2Latin1(length, contentMb.toByteArray()+offset, sharedResult);
  1278. }
  1279. else
  1280. {
  1281. if (!getPropBin(path, tmpMb.clear()))
  1282. {
  1283. rtlStrToData(len, data, _lenDefault, _default);
  1284. return;
  1285. }
  1286. cnv2Latin1(tmpMb.length(), tmpMb.toByteArray(), sharedResult);
  1287. }
  1288. decodeHexPairs((const char *)sharedResult.toByteArray(), sharedResult.length(), data, len);
  1289. }
  1290. virtual void readDataX(size32_t & len, void * & data, const char * path, size32_t _lenDefault, const void * _default)
  1291. {
  1292. CriticalBlock b(crit);
  1293. sharedResult.clear();
  1294. size32_t offset = 0;
  1295. size32_t length = 0;
  1296. if (contentRequest(path, offset, length))
  1297. {
  1298. cnv2Latin1(length, contentMb.toByteArray()+offset, sharedResult);
  1299. }
  1300. else
  1301. {
  1302. if (!getPropBin(path, tmpMb.clear()))
  1303. {
  1304. rtlStrToDataX(len, data, _lenDefault, _default);
  1305. return;
  1306. }
  1307. cnv2Latin1(tmpMb.length(), tmpMb.toByteArray(), sharedResult);
  1308. }
  1309. decodeHexPairsX((const char *)sharedResult.toByteArray(), sharedResult.length(), data, len);
  1310. }
  1311. virtual void readDataRaw(size32_t len, void * data, const char * path, size32_t _lenDefault, const void * _default)
  1312. {
  1313. CriticalBlock b(crit);
  1314. sharedResult.clear();
  1315. size32_t offset = 0;
  1316. size32_t length = 0;
  1317. if (contentRequest(path, offset, length))
  1318. {
  1319. cnv2Latin1(length, contentMb.toByteArray()+offset, sharedResult);
  1320. }
  1321. else
  1322. {
  1323. if (!getPropBin(path, tmpMb.clear()))
  1324. {
  1325. rtlStrToData(len, data, _lenDefault, _default);
  1326. return;
  1327. }
  1328. }
  1329. memcpy(data, sharedResult.toByteArray(), sharedResult.length());
  1330. if (len < sharedResult.length())
  1331. memset((byte*)data + sharedResult.length(), 0, len-sharedResult.length());
  1332. }
  1333. virtual void readDataRawX(size32_t & len, void * & data, const char * path, size32_t _lenDefault, const void * _default)
  1334. {
  1335. CriticalBlock b(crit);
  1336. sharedResult.clear();
  1337. size32_t offset = 0;
  1338. size32_t length = 0;
  1339. if (contentRequest(path, offset, length))
  1340. {
  1341. cnv2Latin1(length, contentMb.toByteArray()+offset, sharedResult);
  1342. }
  1343. else
  1344. {
  1345. if (!getPropBin(path, tmpMb.clear()))
  1346. {
  1347. rtlStrToDataX(len, data, _lenDefault, _default);
  1348. return;
  1349. }
  1350. }
  1351. len = tmpMb.length();
  1352. if (len)
  1353. {
  1354. data = malloc(len);
  1355. memcpy(data, tmpMb.toByteArray(), len);
  1356. }
  1357. else
  1358. data = NULL;
  1359. }
  1360. virtual bool readBool(const char * path, bool _default)
  1361. {
  1362. size32_t offset = 0;
  1363. size32_t length = 0;
  1364. if (contentRequest(path, offset, length))
  1365. throw MakeStringException(0, "Attempting to extract xml content text as boolean");
  1366. const char *str = queryProp(path);
  1367. if (!str) return _default;
  1368. return strToBool(str);
  1369. }
  1370. virtual __int64 readInt(const char * path, __int64 _default)
  1371. {
  1372. size32_t offset = 0;
  1373. size32_t length = 0;
  1374. if (contentRequest(path, offset, length))
  1375. throw MakeStringException(0, "Attempting to extract xml content text as integer");
  1376. const char *str = queryProp(path);
  1377. if (!str) return _default;
  1378. return _atoi64(str);
  1379. }
  1380. virtual __uint64 readUInt(const char * path, __uint64 _default)
  1381. {
  1382. size32_t offset = 0;
  1383. size32_t length = 0;
  1384. if (contentRequest(path, offset, length))
  1385. throw MakeStringException(0, "Attempting to extract xml content text as integer");
  1386. const char *str = queryProp(path);
  1387. if (!str) return _default;
  1388. return strtoull(str, nullptr, 10);
  1389. }
  1390. virtual void readQString(size32_t len, char * text, const char * path, size32_t _lenDefault, const char * _default)
  1391. {
  1392. CriticalBlock b(crit);
  1393. sharedResult.clear();
  1394. size32_t offset = 0;
  1395. size32_t length = 0;
  1396. if (contentRequest(path, offset, length))
  1397. {
  1398. cnv2Latin1(length, contentMb.toByteArray()+offset, sharedResult);
  1399. }
  1400. else
  1401. {
  1402. const char *str = queryProp(path);
  1403. if (str)
  1404. cnv2Latin1((size32_t)strlen(str), str, sharedResult);
  1405. else
  1406. {
  1407. rtlQStrToQStr(len, text, _lenDefault, _default);
  1408. return;
  1409. }
  1410. }
  1411. rtlStrToQStr(len, text, sharedResult.length(), sharedResult.toByteArray());
  1412. }
  1413. virtual void readString(size32_t len, char * text, const char * path, size32_t _lenDefault, const char * _default)
  1414. {
  1415. CriticalBlock b(crit);
  1416. sharedResult.clear();
  1417. size32_t offset = 0;
  1418. size32_t length = 0;
  1419. if (contentRequest(path, offset, length))
  1420. {
  1421. cnv2Latin1(length, contentMb.toByteArray()+offset, sharedResult);
  1422. }
  1423. else
  1424. {
  1425. const char *str = queryProp(path);
  1426. if (str)
  1427. cnv2Latin1((size32_t)strlen(str), str, sharedResult);
  1428. else
  1429. {
  1430. rtlStrToStr(len, text, _lenDefault, _default);
  1431. return;
  1432. }
  1433. }
  1434. rtlStrToStr(len, text, sharedResult.length(), sharedResult.toByteArray());
  1435. }
  1436. virtual void readStringX(size32_t & len, char * & text, const char * path, size32_t _lenDefault, const char * _default)
  1437. {
  1438. MemoryBuffer result;
  1439. size32_t offset = 0;
  1440. size32_t length = 0;
  1441. if (contentRequest(path, offset, length))
  1442. {
  1443. if (length)
  1444. cnv2Latin1(length, contentMb.toByteArray()+offset, result);
  1445. }
  1446. else
  1447. {
  1448. const char *str = queryProp(path);
  1449. if (str)
  1450. cnv2Latin1((size32_t)strlen(str), str, result);
  1451. else
  1452. {
  1453. rtlStrToStrX(len, text, _lenDefault, _default);
  1454. return;
  1455. }
  1456. }
  1457. len = result.length();
  1458. text = (char *) result.detach();
  1459. }
  1460. virtual void readUnicodeX(size32_t & len, UChar * & text, const char * path, size32_t _lenDefault, const UChar * _default)
  1461. {
  1462. size32_t offset = 0;
  1463. size32_t length = 0;
  1464. if (contentRequest(path, offset, length))
  1465. {
  1466. rtlCodepageToUnicodeX(len, text, length, contentMb.toByteArray()+offset, "utf-8");
  1467. }
  1468. else
  1469. {
  1470. CriticalBlock b(crit);
  1471. const char *tmpPtr = queryProp(path);
  1472. if (tmpPtr)
  1473. rtlCodepageToUnicodeX(len, text, strlen(tmpPtr), tmpPtr, "utf-8");
  1474. else
  1475. rtlUnicodeToUnicodeX(len, text, _lenDefault, _default);
  1476. }
  1477. }
  1478. virtual void readUtf8X(size32_t & len, char * & text, const char * path, size32_t _lenDefault, const char * _default)
  1479. {
  1480. size32_t offset = 0;
  1481. size32_t length = 0;
  1482. size32_t size;
  1483. if (contentRequest(path, offset, length))
  1484. {
  1485. rtlStrToStrX(size, text, length, contentMb.toByteArray()+offset);
  1486. }
  1487. else
  1488. {
  1489. CriticalBlock b(crit);
  1490. const char *tmpPtr = queryProp(path);
  1491. if (tmpPtr)
  1492. {
  1493. rtlStrToStrX(size, text, strlen(tmpPtr), tmpPtr);
  1494. }
  1495. else
  1496. {
  1497. rtlUtf8ToUtf8X(len, text, _lenDefault, _default);
  1498. return;
  1499. }
  1500. }
  1501. len = rtlUtf8Length(size, text);
  1502. }
  1503. virtual bool readIsSetAll(const char * path, bool _default)
  1504. {
  1505. if (hasProp(path))
  1506. {
  1507. StringBuffer fullpath;
  1508. fullpath.append(path).append("/All");
  1509. if (path && '/' == *path && '/' != *(path+1))
  1510. return root->hasProp(fullpath.str()+1);
  1511. else
  1512. return node->hasProp(fullpath.str());
  1513. }
  1514. return _default;
  1515. }
  1516. virtual const char *readRaw(const char * path, size32_t &sz) const override
  1517. {
  1518. const char *value = node->queryProp(path);
  1519. sz = value ? strlen(value) : 0;
  1520. return value;
  1521. }
  1522. };
  1523. void CColumnIterator::setCurrent()
  1524. {
  1525. Owned<IPropertyTree> curTree = &iter->get();
  1526. if (contentMb)
  1527. cur.setown(new CColumnProvider(utf8Translator, root, curTree, contentMb, false, contentStartOffset, xpath));
  1528. else
  1529. cur.setown(new CColumnProvider(utf8Translator, root, curTree, NULL, false, 0, xpath));
  1530. }
  1531. class CXMLParse : implements IXMLParse, public CInterface
  1532. {
  1533. IPullPTreeReader *xmlReader;
  1534. StringAttr xpath;
  1535. IXMLSelect *iXMLSelect; // NOTE - not linked - creates circular links
  1536. PTreeReaderOptions xmlOptions;
  1537. bool step, contentRequired, isJson;
  1538. //to make json file handling intuitive an array opening at root level is just ignored
  1539. //but webservice calls map the entire response to a single row, and keeping the array works better
  1540. bool keepRootArray;
  1541. class CMakerBase : public CInterface, implements IPTreeMaker
  1542. {
  1543. protected:
  1544. CXPath xpath;
  1545. IXMLSelect *iXMLSelect; // NOTE - not linked - creates circular links
  1546. CICopyArrayOf<CParseStackInfo> stack, freeParseInfo;
  1547. IPTreeMaker *maker;
  1548. Linked<CMarkReadBase> marking;
  1549. Owned<COffsetNodeCreator> nodeCreator;
  1550. void *utf8Translator;
  1551. unsigned level;
  1552. bool contentRequired;
  1553. unsigned lastMatchKeptLevel;
  1554. IPropertyTree *lastMatchKeptNode, *lastMatchKeptNodeParent;
  1555. public:
  1556. IMPLEMENT_IINTERFACE;
  1557. CMakerBase(const char *_xpath, IXMLSelect &_iXMLSelect, bool _contentRequired, bool ignoreNameSpaces) : xpath(_xpath, ignoreNameSpaces), iXMLSelect(&_iXMLSelect), contentRequired(_contentRequired)
  1558. {
  1559. lastMatchKeptLevel = 0;
  1560. lastMatchKeptNode = lastMatchKeptNodeParent = NULL;
  1561. maker = NULL;
  1562. utf8Translator = NULL;
  1563. }
  1564. ~CMakerBase()
  1565. {
  1566. ForEachItemIn(i, stack)
  1567. delete &stack.item(i);
  1568. ForEachItemIn(i2, freeParseInfo)
  1569. delete &freeParseInfo.item(i2);
  1570. ::Release(maker);
  1571. rtlCloseCodepageConverter(utf8Translator);
  1572. }
  1573. void init()
  1574. {
  1575. level = 0;
  1576. nodeCreator.setown(new COffsetNodeCreator());
  1577. maker = createRootLessPTreeMaker(ipt_none, NULL, nodeCreator);
  1578. bool f;
  1579. utf8Translator = rtlOpenCodepageConverter("utf-8", "latin1", f);
  1580. if (f)
  1581. throw MakeStringException(0, "Failed to initialize unicode utf-8 translator");
  1582. }
  1583. void setMarkingStream(CMarkReadBase &_marking) { marking.set(&_marking); }
  1584. CXPath &queryXPath() { return xpath; }
  1585. // IPTreeMaker
  1586. virtual void beginNode(const char *tag, bool arrayitem, offset_t startOffset)
  1587. {
  1588. if (lastMatchKeptNode && level == lastMatchKeptLevel)
  1589. {
  1590. // NB: could be passed to match objects for removal by match object,
  1591. // but dubious if useful for greater than one path to exist above match.
  1592. if (lastMatchKeptNodeParent)
  1593. lastMatchKeptNodeParent->removeTree(lastMatchKeptNode);
  1594. else
  1595. maker->reset();
  1596. lastMatchKeptNode = NULL;
  1597. }
  1598. bool res = false;
  1599. CParseStackInfo *stackInfo;
  1600. if (freeParseInfo.ordinality())
  1601. {
  1602. stackInfo = &freeParseInfo.popGet();
  1603. stackInfo->reset();
  1604. }
  1605. else
  1606. stackInfo = new CParseStackInfo();
  1607. stackInfo->startOffset = startOffset;
  1608. if (!stack.ordinality())
  1609. {
  1610. if (0 == xpath.queryDepth() || xpath.match(0, tag))
  1611. {
  1612. if (1 >= xpath.queryDepth())
  1613. {
  1614. if (contentRequired)
  1615. {
  1616. assertex(marking);
  1617. marking->mark(startOffset); // mark stream at tag start offset
  1618. }
  1619. }
  1620. res = true;
  1621. }
  1622. }
  1623. else if (xpath.queryDepth())
  1624. {
  1625. if (stack.tos().keep)
  1626. {
  1627. if (level >= xpath.queryDepth())
  1628. res = true;
  1629. else if (xpath.match(level, tag))
  1630. {
  1631. res = true;
  1632. if (level == xpath.queryDepth()-1)
  1633. {
  1634. if (contentRequired)
  1635. {
  1636. assertex(marking);
  1637. marking->mark(startOffset); // mark stream at tag start offset
  1638. }
  1639. }
  1640. }
  1641. else if (level > ((unsigned)xpath.queryHighestQualifier()))
  1642. {
  1643. stackInfo->keptForQualifier = true;
  1644. res = true; // construct content below qualified tag (!=simple) needed to qualify when back at topQ.
  1645. }
  1646. }
  1647. }
  1648. else
  1649. res = true;
  1650. stackInfo->keep = res;
  1651. stack.append(*stackInfo);
  1652. if (res)
  1653. {
  1654. maker->beginNode(tag, false, startOffset);
  1655. CPTreeWithOffsets *current = (CPTreeWithOffsets *)maker->queryCurrentNode();
  1656. current->startOffset = startOffset;
  1657. stackInfo->nodeMade = res;
  1658. stackInfo->iPTMade = current;
  1659. }
  1660. }
  1661. virtual void newAttribute(const char *tag, const char *value)
  1662. {
  1663. if (stack.tos().keep)
  1664. maker->newAttribute(tag, value);
  1665. }
  1666. virtual void beginNodeContent(const char *tag)
  1667. {
  1668. // Can optimize qualifiers here that contain only attribute tests.
  1669. bool &keep = stack.tos().keep;
  1670. if (keep)
  1671. {
  1672. if (xpath.toQualify(level, true))
  1673. {
  1674. IPropertyTree *currentNode = maker->queryCurrentNode();
  1675. keep = xpath.qualify(*currentNode, level);
  1676. }
  1677. }
  1678. level++;
  1679. }
  1680. virtual void endNode(const char *tag, unsigned length, const void *value, bool binary, offset_t endOffset)
  1681. {
  1682. --level;
  1683. CParseStackInfo &stackInfo = stack.tos();
  1684. bool keep = stackInfo.keep;
  1685. bool nodeMade = stackInfo.nodeMade;
  1686. IPropertyTree *currentNode = maker->queryCurrentNode();
  1687. if (nodeMade)
  1688. {
  1689. CPTreeWithOffsets *current = (CPTreeWithOffsets *)maker->queryCurrentNode();
  1690. current->endOffset = endOffset;
  1691. maker->endNode(tag, length, value, binary, endOffset);
  1692. }
  1693. if (keep)
  1694. {
  1695. if (!stackInfo.keptForQualifier)
  1696. if (xpath.toQualify(level, false))
  1697. keep = xpath.qualify(*currentNode, level);
  1698. }
  1699. bool matched = false;
  1700. if (keep)
  1701. {
  1702. if (!stackInfo.keptForQualifier)
  1703. {
  1704. if ((0 == xpath.queryDepth() && 0 == level) || level == xpath.queryDepth()-1)
  1705. {
  1706. unsigned topQ = xpath.queryHighestQualifier();
  1707. unsigned noHigherQualifiers = -1 == topQ || topQ >= level;
  1708. IPropertyTree *parent = stack.ordinality()>=2?stack.item(stack.ordinality()-2).iPTMade:NULL;
  1709. if (noHigherQualifiers)
  1710. {
  1711. MemoryBuffer mb;
  1712. MemoryBuffer *content;
  1713. if (contentRequired)
  1714. {
  1715. assertex(marking);
  1716. marking->getMarkTo(endOffset, mb);
  1717. content = &mb;
  1718. }
  1719. else
  1720. content = NULL;
  1721. CPTreeWithOffsets *currentNodeWO = (CPTreeWithOffsets *)currentNode;
  1722. Owned<CColumnProvider> provider = new CColumnProvider(utf8Translator, maker->queryRoot(), currentNode, content, true, currentNodeWO->startOffset, xpath.queryXPathStr());
  1723. iXMLSelect->match(*provider, stackInfo.startOffset, endOffset);
  1724. matched = true;
  1725. }
  1726. else
  1727. {
  1728. // only prospective match - depends on higher qualifiers being satisfied.
  1729. if (!stackInfo.prospectiveMatches)
  1730. stackInfo.prospectiveMatches = new CProcespectiveMatchArray;
  1731. MemoryBuffer *tagContent = NULL;
  1732. if (contentRequired)
  1733. {
  1734. tagContent = new MemoryBuffer;
  1735. marking->getMarkTo(endOffset, *tagContent);
  1736. }
  1737. stackInfo.prospectiveMatches->append(*new CProspectiveMatch(parent, currentNode, tagContent));
  1738. }
  1739. }
  1740. else if (stackInfo.prospectiveMatches && stackInfo.prospectiveMatches->ordinality() && level < xpath.queryDepth()-1)
  1741. {
  1742. unsigned topQ = xpath.queryHighestQualifier();
  1743. unsigned noHigherQualifiers = -1 == topQ || topQ >= level;
  1744. if (noHigherQualifiers)
  1745. {
  1746. ForEachItemIn(m, *stackInfo.prospectiveMatches)
  1747. {
  1748. CProspectiveMatch &prospectiveMatch = stackInfo.prospectiveMatches->item(m);
  1749. CPTreeWithOffsets *prospectiveNodeWO = (CPTreeWithOffsets *)prospectiveMatch.node;
  1750. Owned<CColumnProvider> provider = new CColumnProvider(utf8Translator, maker->queryRoot(), prospectiveMatch.node, prospectiveMatch.content, true, prospectiveNodeWO->startOffset, xpath.queryXPathStr());
  1751. // NB: caveat; if complex qualifiers on intermediate iterator nodes and fully qualified attributes
  1752. // are access from this match, there are potential ambiguities in the lookup.
  1753. iXMLSelect->match(*provider, stackInfo.startOffset, endOffset);
  1754. matched = true;
  1755. }
  1756. stackInfo.prospectiveMatches->kill();
  1757. stackInfo.prospectiveMatches = NULL;
  1758. }
  1759. }
  1760. else
  1761. {
  1762. if (NULL == lastMatchKeptNode && level < xpath.queryDepth())
  1763. keep = false;
  1764. }
  1765. }
  1766. }
  1767. else
  1768. {
  1769. if (contentRequired && ((0==level && 0==xpath.queryDepth()) || level == xpath.queryDepth()-1))
  1770. {
  1771. assertex(marking);
  1772. marking->closeMark();
  1773. }
  1774. }
  1775. freeParseInfo.append(stackInfo);
  1776. if (keep && stackInfo.prospectiveMatches && stackInfo.prospectiveMatches->ordinality())
  1777. {
  1778. Linked<CParseStackInfo> childStackInfo = &stackInfo;
  1779. stack.pop();
  1780. if (stack.ordinality())
  1781. {
  1782. CParseStackInfo &parentSI = stack.tos();
  1783. if (!parentSI.prospectiveMatches)
  1784. parentSI.prospectiveMatches = new CProcespectiveMatchArray;
  1785. ForEachItemIn(p, *stackInfo.prospectiveMatches)
  1786. parentSI.prospectiveMatches->append(*LINK(&stackInfo.prospectiveMatches->item(p)));
  1787. }
  1788. }
  1789. else
  1790. stack.pop();
  1791. // Track last level kept
  1792. if (lastMatchKeptNode || (keep && matched))
  1793. {
  1794. assertex(nodeMade);
  1795. lastMatchKeptLevel = level;
  1796. lastMatchKeptNode = currentNode;
  1797. lastMatchKeptNodeParent = maker->queryCurrentNode();
  1798. }
  1799. else if (!keep && nodeMade)
  1800. {
  1801. IPropertyTree *parent = maker->queryCurrentNode();
  1802. if (parent)
  1803. parent->removeTree(currentNode);
  1804. }
  1805. currentNode = NULL;
  1806. }
  1807. virtual IPropertyTree *queryRoot() { return maker->queryRoot(); }
  1808. virtual IPropertyTree *queryCurrentNode() { return maker->queryCurrentNode(); }
  1809. virtual void reset()
  1810. {
  1811. level = 0;
  1812. ForEachItemIn(i, stack)
  1813. delete &stack.item(i);
  1814. ForEachItemIn(i2, freeParseInfo)
  1815. delete &freeParseInfo.item(i2);
  1816. stack.kill();
  1817. freeParseInfo.kill();
  1818. if (marking)
  1819. marking->reset();
  1820. }
  1821. virtual IPropertyTree *create(const char *tag)
  1822. {
  1823. return nodeCreator->create(tag);
  1824. }
  1825. } *iXMLMaker;
  1826. class CXMLMaker : public CMakerBase
  1827. {
  1828. public:
  1829. CXMLMaker(const char *_xpath, IXMLSelect &_iXMLSelect, bool _contentRequired, bool ignoreNameSpaces) : CMakerBase(_xpath, _iXMLSelect, _contentRequired, ignoreNameSpaces)
  1830. {
  1831. }
  1832. };
  1833. class CJSONMaker : public CMakerBase
  1834. {
  1835. private:
  1836. bool keepRootArray;
  1837. bool inRootArray;
  1838. public:
  1839. CJSONMaker(const char *_xpath, IXMLSelect &_iXMLSelect, bool _contentRequired, bool ignoreNameSpaces, bool _keepRootArray) : CMakerBase(_xpath, _iXMLSelect, _contentRequired, ignoreNameSpaces), keepRootArray(_keepRootArray)
  1840. {
  1841. inRootArray = false;
  1842. }
  1843. bool checkRootArrayItem(const char *&tag)
  1844. {
  1845. if (!inRootArray)
  1846. return false;
  1847. if (stack.ordinality()!=1)
  1848. return false;
  1849. if (streq(tag, "__object__"))
  1850. tag = "Row"; //unamed json root array [{},{}] will generate "Row"s
  1851. return true;
  1852. }
  1853. bool checkSkipRoot(const char *&tag)
  1854. {
  1855. if (checkRootArrayItem(tag))
  1856. return false;
  1857. if (stack.ordinality()) //root level only
  1858. return false;
  1859. if (streq(tag, "__array__")) //xpath starts after root array
  1860. {
  1861. if (keepRootArray && !xpath.queryDepth())
  1862. {
  1863. inRootArray = true;
  1864. return false;
  1865. }
  1866. return true;
  1867. }
  1868. if (streq(tag, "__object__") && xpath.queryDepth()) //empty xpath matches start object, otherwise skip, xpath starts immediately after
  1869. return true;
  1870. return false;
  1871. }
  1872. virtual void beginNode(const char *tag, bool arrayitem, offset_t startOffset) override
  1873. {
  1874. if (!checkSkipRoot(tag))
  1875. CMakerBase::beginNode(tag, arrayitem, startOffset);
  1876. }
  1877. virtual void newAttribute(const char *tag, const char *value)
  1878. {
  1879. if (stack.ordinality() && stack.tos().keep)
  1880. maker->newAttribute(tag, value);
  1881. }
  1882. virtual void beginNodeContent(const char *tag)
  1883. {
  1884. if (!checkSkipRoot(tag))
  1885. CMakerBase::beginNodeContent(tag);
  1886. }
  1887. virtual void endNode(const char *tag, unsigned length, const void *value, bool binary, offset_t endOffset)
  1888. {
  1889. if (!checkSkipRoot(tag))
  1890. CMakerBase::endNode(tag, length, value, binary, endOffset);
  1891. }
  1892. };
  1893. public:
  1894. IMPLEMENT_IINTERFACE;
  1895. CXMLParse(const char *fName, const char *_xpath, IXMLSelect &_iXMLSelect, PTreeReaderOptions _xmlOptions=ptr_none, bool _contentRequired=true, bool _step=true, bool _isJson=false, bool _keepRootArray=false) : xpath(_xpath), iXMLSelect(&_iXMLSelect), xmlOptions(_xmlOptions), contentRequired(_contentRequired), step(_step), isJson(_isJson), keepRootArray(_keepRootArray) { init(); go(fName); }
  1896. CXMLParse(IFile &ifile, const char *_xpath, IXMLSelect &_iXMLSelect, PTreeReaderOptions _xmlOptions=ptr_none, bool _contentRequired=true, bool _step=true, bool _isJson=false, bool _keepRootArray=false) : xpath(_xpath), iXMLSelect(&_iXMLSelect), xmlOptions(_xmlOptions), contentRequired(_contentRequired), step(_step), isJson(_isJson), keepRootArray(_keepRootArray) { init(); go(ifile); }
  1897. CXMLParse(IFileIO &fileio, const char *_xpath, IXMLSelect &_iXMLSelect, PTreeReaderOptions _xmlOptions=ptr_none, bool _contentRequired=true, bool _step=true, bool _isJson=false, bool _keepRootArray=false) : xpath(_xpath), iXMLSelect(&_iXMLSelect), xmlOptions(_xmlOptions), contentRequired(_contentRequired), step(_step), isJson(_isJson), keepRootArray(_keepRootArray) { init(); go(fileio); }
  1898. CXMLParse(ISimpleReadStream &stream, const char *_xpath, IXMLSelect &_iXMLSelect, PTreeReaderOptions _xmlOptions=ptr_none, bool _contentRequired=true, bool _step=true, bool _isJson=false, bool _keepRootArray=false) : xpath(_xpath), iXMLSelect(&_iXMLSelect), xmlOptions(_xmlOptions), contentRequired(_contentRequired), step(_step), isJson(_isJson), keepRootArray(_keepRootArray) { init(); go(stream); }
  1899. CXMLParse(const void *buffer, unsigned bufLen, const char *_xpath, IXMLSelect &_iXMLSelect, PTreeReaderOptions _xmlOptions=ptr_none, bool _contentRequired=true, bool _step=true, bool _isJson=false, bool _keepRootArray=false) : xpath(_xpath), iXMLSelect(&_iXMLSelect), xmlOptions(_xmlOptions), contentRequired(_contentRequired), step(_step), isJson(_isJson), keepRootArray(_keepRootArray) { init(); go(buffer, bufLen); }
  1900. CXMLParse(const char *_xpath, IXMLSelect &_iXMLSelect, PTreeReaderOptions _xmlOptions=ptr_none, bool _contentRequired=true, bool _step=true, bool _isJson=false, bool _keepRootArray=false) : xpath(_xpath), iXMLSelect(&_iXMLSelect), xmlOptions(_xmlOptions), contentRequired(_contentRequired), step(_step), isJson(_isJson), keepRootArray(_keepRootArray) { init(); }
  1901. ~CXMLParse()
  1902. {
  1903. ::Release(iXMLMaker);
  1904. ::Release(xmlReader);
  1905. }
  1906. CMakerBase *createMaker()
  1907. {
  1908. bool ignoreNameSpaces = 0 != ((unsigned)xmlOptions & (unsigned)ptr_ignoreNameSpaces);
  1909. if (isJson)
  1910. return new CJSONMaker(xpath, *iXMLSelect, contentRequired, ignoreNameSpaces, keepRootArray);
  1911. return new CXMLMaker(xpath, *iXMLSelect, contentRequired, ignoreNameSpaces);
  1912. }
  1913. void init()
  1914. {
  1915. xmlReader = NULL;
  1916. iXMLMaker = createMaker();
  1917. iXMLMaker->init();
  1918. }
  1919. void go(const char *fName)
  1920. {
  1921. OwnedIFile ifile = createIFile(fName);
  1922. go(*ifile);
  1923. }
  1924. void go(IFile &file)
  1925. {
  1926. OwnedIFileIO ifileio = file.open(IFOread);
  1927. if (!ifileio)
  1928. throw MakeStringException(0, "Failed to open: %s", file.queryFilename());
  1929. go(*ifileio);
  1930. }
  1931. void go(IFileIO &fileio)
  1932. {
  1933. Owned<IIOStream> stream = createIOStream(&fileio);
  1934. go(*stream);
  1935. }
  1936. void go(ISimpleReadStream &stream)
  1937. {
  1938. if (contentRequired)
  1939. {
  1940. // only need marking stream if fetching xml text content.
  1941. Owned<CMarkReadStream> markingStream = new CMarkReadStream(*LINK(&stream));
  1942. iXMLMaker->setMarkingStream(*markingStream);
  1943. if (isJson)
  1944. xmlReader = createPullJSONStreamReader(*markingStream, *iXMLMaker, xmlOptions);
  1945. else
  1946. xmlReader = createPullXMLStreamReader(*markingStream, *iXMLMaker, xmlOptions);
  1947. }
  1948. else if (isJson)
  1949. xmlReader = createPullJSONStreamReader(stream, *iXMLMaker, xmlOptions);
  1950. else
  1951. xmlReader = createPullXMLStreamReader(stream, *iXMLMaker, xmlOptions);
  1952. if (!step)
  1953. {
  1954. xmlReader->load();
  1955. xmlReader->Release();
  1956. xmlReader = NULL;
  1957. }
  1958. }
  1959. void go(const void *buffer, unsigned bufLen)
  1960. {
  1961. if (contentRequired)
  1962. {
  1963. Owned<CMarkReadBase> markingStream = new CMarkRead(buffer, bufLen);
  1964. iXMLMaker->setMarkingStream(*markingStream);
  1965. }
  1966. if (isJson)
  1967. xmlReader = createPullJSONBufferReader(buffer, bufLen, *iXMLMaker, xmlOptions);
  1968. else
  1969. xmlReader = createPullXMLBufferReader(buffer, bufLen, *iXMLMaker, xmlOptions);
  1970. if (!step)
  1971. {
  1972. xmlReader->load();
  1973. xmlReader->Release();
  1974. xmlReader = NULL;
  1975. }
  1976. }
  1977. void provideXML(const char *str)
  1978. {
  1979. if (contentRequired)
  1980. {
  1981. Owned<CMarkReadBase> markingStream = new CMarkRead(str, strlen(str));
  1982. iXMLMaker->setMarkingStream(*markingStream);
  1983. }
  1984. if (isJson)
  1985. xmlReader = createPullJSONStringReader(str, *iXMLMaker, xmlOptions);
  1986. else
  1987. xmlReader = createPullXMLStringReader(str, *iXMLMaker, xmlOptions);
  1988. if (!step)
  1989. {
  1990. xmlReader->load();
  1991. xmlReader->Release();
  1992. xmlReader = NULL;
  1993. }
  1994. }
  1995. // IXMLParse
  1996. virtual bool next()
  1997. {
  1998. return xmlReader->next();
  1999. }
  2000. virtual void reset()
  2001. {
  2002. iXMLMaker->reset();
  2003. xmlReader->reset();
  2004. }
  2005. };
  2006. IXMLParse *createXMLParse(const char *filename, const char *xpath, IXMLSelect &iselect, PTreeReaderOptions xmlOptions, bool contentRequired)
  2007. {
  2008. return new CXMLParse(filename, xpath, iselect, xmlOptions, contentRequired);
  2009. }
  2010. IXMLParse *createXMLParse(ISimpleReadStream &stream, const char *xpath, IXMLSelect &iselect, PTreeReaderOptions xmlOptions, bool contentRequired)
  2011. {
  2012. return new CXMLParse(stream, xpath, iselect, xmlOptions, contentRequired);
  2013. }
  2014. IXMLParse *createXMLParse(const void *buffer, unsigned bufLen, const char *xpath, IXMLSelect &iselect, PTreeReaderOptions xmlOptions, bool contentRequired)
  2015. {
  2016. return new CXMLParse(buffer, bufLen, xpath, iselect, xmlOptions, contentRequired);
  2017. }
  2018. IXMLParse *createXMLParseString(const char *string, const char *xpath, IXMLSelect &iselect, PTreeReaderOptions xmlOptions, bool contentRequired)
  2019. {
  2020. CXMLParse *parser = new CXMLParse(xpath, iselect, xmlOptions, contentRequired);
  2021. parser->provideXML(string);
  2022. return parser;
  2023. }
  2024. IXMLParse *createJSONParse(const char *filename, const char *xpath, IXMLSelect &iselect, PTreeReaderOptions xmlOptions, bool contentRequired)
  2025. {
  2026. return new CXMLParse(filename, xpath, iselect, xmlOptions, contentRequired, true, true);
  2027. }
  2028. IXMLParse *createJSONParse(ISimpleReadStream &stream, const char *xpath, IXMLSelect &iselect, PTreeReaderOptions xmlOptions, bool contentRequired)
  2029. {
  2030. return new CXMLParse(stream, xpath, iselect, xmlOptions, contentRequired, true, true);
  2031. }
  2032. IXMLParse *createJSONParse(const void *buffer, unsigned bufLen, const char *xpath, IXMLSelect &iselect, PTreeReaderOptions xmlOptions, bool contentRequired, bool keepRootArray)
  2033. {
  2034. return new CXMLParse(buffer, bufLen, xpath, iselect, xmlOptions, contentRequired, true, true, keepRootArray);
  2035. }
  2036. IXMLParse *createJSONParseString(const char *string, const char *xpath, IXMLSelect &iselect, PTreeReaderOptions xmlOptions, bool contentRequired)
  2037. {
  2038. CXMLParse *parser = new CXMLParse(xpath, iselect, xmlOptions, contentRequired, true, true);
  2039. parser->provideXML(string);
  2040. return parser;
  2041. }