thorxmlread.cpp 73 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "platform.h"
  14. #include <algorithm>
  15. #include "jlib.hpp"
  16. #include "jexcept.hpp"
  17. #include "jfile.hpp"
  18. #include "jlog.hpp"
  19. #include "csvsplitter.hpp"
  20. #include "thorherror.h"
  21. #include "thorxmlread.hpp"
  22. #include "thorcommon.ipp"
  23. #include "eclrtl.hpp"
  24. #include "jptree.ipp"
  25. #define XMLTAG_CONTENT "<>"
  26. //=====================================================================================================
  27. XmlColumnIterator::XmlColumnIterator(IPropertyTreeIterator * _iter) : iter(_iter)
  28. {
  29. }
  30. IColumnProvider * XmlColumnIterator::first()
  31. {
  32. if (!iter->first())
  33. return NULL;
  34. setCurrent();
  35. return cur;
  36. }
  37. IColumnProvider * XmlColumnIterator::next()
  38. {
  39. if (!iter->next())
  40. return NULL;
  41. setCurrent();
  42. return cur;
  43. }
  44. void XmlColumnIterator::setCurrent()
  45. {
  46. Owned<IPropertyTree> curTree = &iter->get();
  47. cur.setown(new XmlDatasetColumnProvider);
  48. cur->setRow(curTree);
  49. }
  50. //=====================================================================================================
  51. static void decodeHexPairs(const char *input, unsigned inputLen, void * outData, unsigned outLen)
  52. {
  53. byte * tgt = (byte *)outData;
  54. while (inputLen >= 2)
  55. {
  56. if (outLen-- == 0)
  57. return;
  58. byte high = hex2num(*input++);
  59. *tgt++ = (high << 4) | hex2num(*input++);
  60. inputLen -= 2;
  61. }
  62. if (outLen)
  63. memset(outData, 0, outLen);
  64. }
  65. static void decodeHexPairsX(const char *input, unsigned inputLen, void *&outData, unsigned &outLen)
  66. {
  67. if (inputLen<2)
  68. {
  69. outLen = 0;
  70. outData = NULL;
  71. return;
  72. }
  73. outLen = inputLen/2;
  74. outData = malloc(outLen);
  75. char *tgt = (char *)outData;
  76. loop
  77. {
  78. byte high = hex2num(*input++);
  79. *tgt++ = (high << 4) | hex2num(*input++);
  80. inputLen -= 2;
  81. if (inputLen<2) break;
  82. }
  83. }
  84. //=====================================================================================================
  85. bool XmlDatasetColumnProvider::getBool(const char * name)
  86. {
  87. return row->getPropBool(name, 0);
  88. }
  89. __int64 XmlDatasetColumnProvider::getInt(const char * name)
  90. {
  91. return row->getPropInt64(name, 0);
  92. }
  93. void XmlDatasetColumnProvider::getData(size32_t len, void * target, const char * name)
  94. {
  95. const char *hexPairSequence = row->queryProp(name);
  96. if (!hexPairSequence)
  97. memset(target, 0, len);
  98. else
  99. decodeHexPairs(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  100. }
  101. void XmlDatasetColumnProvider::getDataX(size32_t & len, void * & target, const char * name)
  102. {
  103. const char *hexPairSequence = row->queryProp(name);
  104. if (!hexPairSequence)
  105. {
  106. len = 0;
  107. target = NULL;
  108. return;
  109. }
  110. decodeHexPairsX(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  111. }
  112. void XmlDatasetColumnProvider::getDataRaw(size32_t len, void * target, const char * name)
  113. {
  114. const char *hexPairSequence = row->queryProp(name);
  115. if (!hexPairSequence)
  116. memset(target, 0, len);
  117. else
  118. {
  119. size32_t dLen = (size32_t)strlen(hexPairSequence);
  120. memcpy(target, hexPairSequence, dLen);
  121. if (dLen < len)
  122. memset((byte*)target+dLen, 0, len - dLen);
  123. }
  124. }
  125. void XmlDatasetColumnProvider::getDataRawX(size32_t & len, void * & target, const char * name)
  126. {
  127. const char *hexPairSequence = row->queryProp(name);
  128. if (!hexPairSequence)
  129. {
  130. len = 0;
  131. target = NULL;
  132. return;
  133. }
  134. len = (size32_t)strlen(hexPairSequence);
  135. target = malloc(len);
  136. memcpy(target, hexPairSequence, len);
  137. }
  138. void XmlDatasetColumnProvider::getQString(size32_t len, char * target, const char * name)
  139. {
  140. // You could argue that it should convert from UTF8 to ascii first but it's a no-op for any char that QString supports, and it's ok to be undefined for any char that it doesn't
  141. const char * value = row->queryProp(name);
  142. size32_t lenValue = value ? (size32_t)strlen(value) : 0;
  143. rtlStrToQStr(len, target, lenValue, value);
  144. }
  145. void XmlDatasetColumnProvider::getString(size32_t len, char * target, const char * name)
  146. {
  147. const char * value = row->queryProp(name);
  148. size32_t utf8bytes = value ? (size32_t)strlen(value) : 0;
  149. if (utf8bytes)
  150. rtlUtf8ToStr(len, target, rtlUtf8Length(utf8bytes, value), value);
  151. else
  152. memset(target, ' ', len);
  153. }
  154. void XmlDatasetColumnProvider::getStringX(size32_t & len, char * & target, const char * name)
  155. {
  156. const char * value = row->queryProp(name);
  157. size32_t utf8bytes = value ? (size32_t)strlen(value) : 0;
  158. if (utf8bytes)
  159. rtlUtf8ToStrX(len, target, rtlUtf8Length(utf8bytes, value), value);
  160. else
  161. {
  162. len = 0;
  163. target = NULL;
  164. }
  165. }
  166. void XmlDatasetColumnProvider::getUnicodeX(size32_t & len, UChar * & target, const char * name)
  167. {
  168. const char * text = row->queryProp(name);
  169. if (text)
  170. rtlCodepageToUnicodeX(len, target, (size32_t)strlen(text), text, "utf-8");
  171. else
  172. {
  173. len = 0;
  174. target = NULL;
  175. }
  176. }
  177. void XmlDatasetColumnProvider::getUtf8X(size32_t & len, char * & target, const char * path)
  178. {
  179. const char * value = row->queryProp(path);
  180. size32_t size = value ? (size32_t)strlen(value) : 0;
  181. target = (char *)malloc(size);
  182. memcpy(target, value, size);
  183. len = rtlUtf8Length(size, target);
  184. }
  185. bool XmlDatasetColumnProvider::getIsSetAll(const char * path)
  186. {
  187. StringBuffer fullpath;
  188. fullpath.append(path).append("/All");
  189. return row->hasProp(fullpath.str());
  190. }
  191. IColumnProviderIterator * XmlDatasetColumnProvider::getChildIterator(const char * path)
  192. {
  193. return new XmlColumnIterator(row->getElements(path));
  194. }
  195. bool XmlDatasetColumnProvider::readBool(const char * path, bool _default)
  196. {
  197. return row->getPropBool(path, _default);
  198. }
  199. void XmlDatasetColumnProvider::readData(size32_t len, void * target, const char * path, size32_t _lenDefault, const void * _default)
  200. {
  201. const char *hexPairSequence = row->queryProp(path);
  202. if (hexPairSequence)
  203. decodeHexPairs(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  204. else
  205. rtlDataToData(len, target, _lenDefault, _default);
  206. }
  207. void XmlDatasetColumnProvider::readDataX(size32_t & len, void * & target, const char * path, size32_t _lenDefault, const void * _default)
  208. {
  209. const char *hexPairSequence = row->queryProp(path);
  210. if (hexPairSequence)
  211. decodeHexPairsX(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  212. else
  213. rtlStrToDataX(len, target, _lenDefault, _default);
  214. }
  215. void XmlDatasetColumnProvider::readDataRaw(size32_t len, void * target, const char * path, size32_t _lenDefault, const void * _default)
  216. {
  217. rtlDataToData(len, target, _lenDefault, _default);
  218. }
  219. void XmlDatasetColumnProvider::readDataRawX(size32_t & len, void * & target, const char * path, size32_t _lenDefault, const void * _default)
  220. {
  221. rtlStrToDataX(len, target, _lenDefault, _default);
  222. }
  223. __int64 XmlDatasetColumnProvider::readInt(const char * path, __int64 _default)
  224. {
  225. return row->getPropInt64(path, _default);
  226. }
  227. void XmlDatasetColumnProvider::readQString(size32_t len, char * target, const char * path, size32_t _lenDefault, const char * _default)
  228. {
  229. const char * value = row->queryProp(path);
  230. if (value)
  231. rtlStrToQStr(len, target, (size32_t)strlen(value), value); // more: could process utf8, but characters would be lost anyway. At worse will mean extra blanks.
  232. else
  233. rtlQStrToQStr(len, target, _lenDefault, _default);
  234. }
  235. void XmlDatasetColumnProvider::readString(size32_t len, char * target, const char * path, size32_t _lenDefault, const char * _default)
  236. {
  237. const char * value = row->queryProp(path);
  238. if (value)
  239. rtlUtf8ToStr(len, target, rtlUtf8Length((size32_t)strlen(value), value), value);
  240. else
  241. rtlStrToStr(len, target, _lenDefault, _default);
  242. }
  243. void XmlDatasetColumnProvider::readStringX(size32_t & len, char * & target, const char * path, size32_t _lenDefault, const char * _default)
  244. {
  245. const char * value = row->queryProp(path);
  246. if (value)
  247. rtlUtf8ToStrX(len, target, rtlUtf8Length((size32_t)strlen(value), value), value);
  248. else
  249. rtlStrToStrX(len, target, _lenDefault, _default);
  250. }
  251. void XmlDatasetColumnProvider::readUnicodeX(size32_t & len, UChar * & target, const char * path, size32_t _lenDefault, const UChar * _default)
  252. {
  253. const char * text = row->queryProp(path);
  254. if (text)
  255. rtlCodepageToUnicodeX(len, target, (size32_t)strlen(text), text, "utf-8");
  256. else
  257. rtlUnicodeToUnicodeX(len, target, _lenDefault, _default);
  258. }
  259. bool XmlDatasetColumnProvider::readIsSetAll(const char * path, bool _default)
  260. {
  261. if (row->hasProp(path))
  262. return getIsSetAll(path);
  263. return _default;
  264. }
  265. void XmlDatasetColumnProvider::readUtf8X(size32_t & len, char * & target, const char * path, size32_t _lenDefault, const char * _default)
  266. {
  267. const char * value = row->queryProp(path);
  268. if (value)
  269. rtlUtf8ToUtf8X(len, target, rtlUtf8Length((size32_t)strlen(value), value), value);
  270. else
  271. rtlUtf8ToUtf8X(len, target, _lenDefault, _default);
  272. }
  273. //=====================================================================================================
  274. bool XmlSetColumnProvider::getBool(const char * name)
  275. {
  276. #ifdef _DEBUG
  277. assertex(stricmp(name, "value")==0);
  278. #endif
  279. return row->getPropBool(NULL, 0);
  280. }
  281. __int64 XmlSetColumnProvider::getInt(const char * name)
  282. {
  283. #ifdef _DEBUG
  284. assertex(stricmp(name, "value")==0);
  285. #endif
  286. return row->getPropInt64(NULL, 0);
  287. }
  288. void XmlSetColumnProvider::getData(size32_t len, void * target, const char * name)
  289. {
  290. #ifdef _DEBUG
  291. assertex(stricmp(name, "value")==0);
  292. #endif
  293. const char *hexPairSequence = row->queryProp(NULL);
  294. if (!hexPairSequence)
  295. memset(target, 0, len);
  296. else
  297. decodeHexPairs(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  298. }
  299. void XmlSetColumnProvider::getDataX(size32_t & len, void * & target, const char * name)
  300. {
  301. #ifdef _DEBUG
  302. assertex(stricmp(name, "value")==0);
  303. #endif
  304. const char *hexPairSequence = row->queryProp(NULL);
  305. if (!hexPairSequence)
  306. {
  307. len = 0;
  308. target = NULL;
  309. return;
  310. }
  311. decodeHexPairsX(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  312. }
  313. void XmlSetColumnProvider::getDataRaw(size32_t len, void * target, const char * name)
  314. {
  315. #ifdef _DEBUG
  316. assertex(stricmp(name, "value")==0);
  317. #endif
  318. const char *hexPairSequence = row->queryProp(NULL);
  319. if (!hexPairSequence)
  320. memset(target, 0, len);
  321. else
  322. {
  323. size32_t dLen = strlen(hexPairSequence);
  324. memcpy(target, hexPairSequence, dLen);
  325. if (dLen < len)
  326. memset((byte*)target+dLen, 0, len - dLen);
  327. }
  328. }
  329. void XmlSetColumnProvider::getDataRawX(size32_t & len, void * & target, const char * name)
  330. {
  331. #ifdef _DEBUG
  332. assertex(stricmp(name, "value")==0);
  333. #endif
  334. const char *hexPairSequence = row->queryProp(NULL);
  335. if (!hexPairSequence)
  336. {
  337. len = 0;
  338. target = NULL;
  339. return;
  340. }
  341. len = (size32_t)strlen(hexPairSequence);
  342. target = malloc(len);
  343. memcpy(target, hexPairSequence, len);
  344. }
  345. void XmlSetColumnProvider::getQString(size32_t len, char * target, const char * name)
  346. {
  347. #ifdef _DEBUG
  348. assertex(stricmp(name, "value")==0);
  349. #endif
  350. const char * value = row->queryProp(NULL);
  351. unsigned lenValue = value ? (size32_t)strlen(value) : 0;
  352. rtlStrToQStr(len, target, lenValue, value);
  353. }
  354. void XmlSetColumnProvider::getString(size32_t len, char * target, const char * name)
  355. {
  356. #ifdef _DEBUG
  357. assertex(stricmp(name, "value")==0);
  358. #endif
  359. const char * value = row->queryProp(NULL);
  360. if (value)
  361. rtlVStrToStr(len, target, value);
  362. else
  363. memset(target, ' ', len);
  364. }
  365. void XmlSetColumnProvider::getStringX(size32_t & len, char * & target, const char * name)
  366. {
  367. #ifdef _DEBUG
  368. assertex(stricmp(name, "value")==0);
  369. #endif
  370. const char * value = row->queryProp(NULL);
  371. len = value ? (size32_t)strlen(value) : 0;
  372. target = (char *)malloc(len);
  373. memcpy(target, value, len);
  374. //MORE: utf8->ascii?
  375. }
  376. void XmlSetColumnProvider::getUnicodeX(size32_t & len, UChar * & target, const char * name)
  377. {
  378. #ifdef _DEBUG
  379. assertex(stricmp(name, "value")==0);
  380. #endif
  381. const char * text = row->queryProp(NULL);
  382. if (text)
  383. rtlCodepageToUnicodeX(len, target, (size32_t)strlen(text), text, "utf-8");
  384. else
  385. {
  386. len = 0;
  387. target = NULL;
  388. }
  389. }
  390. void XmlSetColumnProvider::getUtf8X(size32_t & len, char * & target, const char * name)
  391. {
  392. #ifdef _DEBUG
  393. assertex(stricmp(name, "value")==0);
  394. #endif
  395. const char * value = row->queryProp(NULL);
  396. size32_t size = value ? (size32_t)strlen(value) : 0;
  397. target = (char *)malloc(size);
  398. memcpy(target, value, size);
  399. len = rtlUtf8Length(size, value);
  400. }
  401. bool XmlSetColumnProvider::getIsSetAll(const char * path)
  402. {
  403. UNIMPLEMENTED;
  404. StringBuffer fullpath;
  405. fullpath.append(path).append("/All");
  406. return row->hasProp(fullpath.str());
  407. }
  408. IColumnProviderIterator * XmlSetColumnProvider::getChildIterator(const char * path)
  409. {
  410. UNIMPLEMENTED;
  411. return new XmlColumnIterator(row->getElements(path));
  412. }
  413. bool XmlSetColumnProvider::readBool(const char * path, bool _default)
  414. {
  415. return row->getPropBool(NULL, _default);
  416. }
  417. void XmlSetColumnProvider::readData(size32_t len, void * target, const char * path, size32_t _lenDefault, const void * _default)
  418. {
  419. const char *hexPairSequence = row->queryProp(NULL);
  420. if (hexPairSequence)
  421. decodeHexPairs(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  422. else
  423. rtlDataToData(len, target, _lenDefault, _default);
  424. }
  425. void XmlSetColumnProvider::readDataX(size32_t & len, void * & target, const char * path, size32_t _lenDefault, const void * _default)
  426. {
  427. const char *hexPairSequence = row->queryProp(NULL);
  428. if (hexPairSequence)
  429. decodeHexPairsX(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  430. else
  431. rtlStrToDataX(len, target, _lenDefault, _default);
  432. }
  433. void XmlSetColumnProvider::readDataRaw(size32_t len, void * target, const char * path, size32_t _lenDefault, const void * _default)
  434. {
  435. rtlDataToData(len, target, _lenDefault, _default);
  436. }
  437. void XmlSetColumnProvider::readDataRawX(size32_t & len, void * & target, const char * path, size32_t _lenDefault, const void * _default)
  438. {
  439. rtlDataToData(len, target, _lenDefault, _default);
  440. }
  441. __int64 XmlSetColumnProvider::readInt(const char * path, __int64 _default)
  442. {
  443. return row->getPropInt64(NULL, _default);
  444. }
  445. void XmlSetColumnProvider::readQString(size32_t len, char * target, const char * path, size32_t _lenDefault, const char * _default)
  446. {
  447. const char * value = row->queryProp(NULL);
  448. if (value)
  449. rtlStrToQStr(len, target, (size32_t)strlen(value), value); // more: could process utf8, but characters would be lost anyway. At worse will mean extra blanks.
  450. else
  451. rtlQStrToQStr(len, target, _lenDefault, _default);
  452. }
  453. void XmlSetColumnProvider::readString(size32_t len, char * target, const char * path, size32_t _lenDefault, const char * _default)
  454. {
  455. const char * value = row->queryProp(NULL);
  456. if (value)
  457. rtlUtf8ToStr(len, target, rtlUtf8Length((size32_t)strlen(value), value), value);
  458. else
  459. rtlStrToStr(len, target, _lenDefault, _default);
  460. }
  461. void XmlSetColumnProvider::readStringX(size32_t & len, char * & target, const char * path, size32_t _lenDefault, const char * _default)
  462. {
  463. const char * value = row->queryProp(NULL);
  464. if (value)
  465. rtlUtf8ToStrX(len, target, rtlUtf8Length((size32_t)strlen(value), value), value);
  466. else
  467. rtlStrToStrX(len, target, _lenDefault, _default);
  468. }
  469. void XmlSetColumnProvider::readUnicodeX(size32_t & len, UChar * & target, const char * path, size32_t _lenDefault, const UChar * _default)
  470. {
  471. const char * text = row->queryProp(NULL);
  472. if (text)
  473. rtlCodepageToUnicodeX(len, target, (size32_t)strlen(text), text, "utf-8");
  474. else
  475. rtlUnicodeToUnicodeX(len, target, _lenDefault, _default);
  476. }
  477. bool XmlSetColumnProvider::readIsSetAll(const char * path, bool _default)
  478. {
  479. throwUnexpected();
  480. if (row->hasProp(NULL))
  481. return getIsSetAll(path);
  482. return _default;
  483. }
  484. void XmlSetColumnProvider::readUtf8X(size32_t & len, char * & target, const char * path, size32_t _lenDefault, const char * _default)
  485. {
  486. const char * value = row->queryProp(NULL);
  487. if (value)
  488. rtlUtf8ToUtf8X(len, target, rtlUtf8Length((size32_t)strlen(value), value), value);
  489. else
  490. rtlUtf8ToUtf8X(len, target, _lenDefault, _default);
  491. }
  492. IDataVal & CXmlToRawTransformer::transform(IDataVal & result, size32_t len, const void * text, bool isDataSet)
  493. {
  494. // MORE - should redo using a pull parser sometime
  495. Owned<IPropertyTree> root = createPTreeFromXMLString(len, (const char *)text, ipt_none, xmlReadFlags);
  496. return transformTree(result, *root, isDataSet);
  497. }
  498. IDataVal & CXmlToRawTransformer::transformTree(IDataVal & result, IPropertyTree &root, bool isDataSet)
  499. {
  500. unsigned minRecordSize = rowTransformer->queryRecordSize()->getMinRecordSize();
  501. Owned <XmlColumnProvider> columns;
  502. Owned<IPropertyTreeIterator> rows;
  503. StringBuffer decodedXML;
  504. Owned<IPropertyTree> decodedTree;
  505. MemoryBuffer raw;
  506. size32_t curLength = 0;
  507. if (isDataSet)
  508. {
  509. columns.setown(new XmlDatasetColumnProvider);
  510. if (root.hasProp("Row"))
  511. rows.setown(root.getElements("Row"));
  512. else
  513. {
  514. // HACK for Gordon to work around WSDL issues
  515. const char *body = root.queryProp(NULL);
  516. if (body)
  517. {
  518. while(isspace(*body))
  519. body++;
  520. if (strncmp(body, "<Row", 4)==0)
  521. {
  522. try
  523. {
  524. decodedXML.append("<root>").append(body).append("</root>");
  525. decodedTree.setown(createPTreeFromXMLString(decodedXML.str(), ipt_caseInsensitive));
  526. rows.setown(decodedTree->getElements("Row"));
  527. }
  528. catch (IException *E)
  529. {
  530. EXCLOG(E);
  531. E->Release();
  532. }
  533. catch (...)
  534. {
  535. ERRLOG(0, "Unexpected exception decoding XML for dataset");
  536. }
  537. }
  538. }
  539. }
  540. }
  541. else
  542. {
  543. columns.setown(new XmlSetColumnProvider);
  544. rows.setown(root.getElements("string"));
  545. ForEach(*rows)
  546. {
  547. columns->setRow(&rows->query());
  548. NullDiskCallback dummyCallback;
  549. MemoryBufferBuilder rowBuilder(raw, minRecordSize);
  550. size32_t thisSize = rowTransformer->transform(rowBuilder, columns, &dummyCallback);
  551. curLength += thisSize;
  552. rowBuilder.finishRow(thisSize);
  553. }
  554. rows.setown(root.getElements("Item"));
  555. }
  556. if (rows)
  557. {
  558. ForEach(*rows)
  559. {
  560. columns->setRow(&rows->query());
  561. NullDiskCallback dummyCallback;
  562. MemoryBufferBuilder rowBuilder(raw, minRecordSize);
  563. size32_t thisSize = rowTransformer->transform(rowBuilder, columns, &dummyCallback);
  564. curLength += thisSize;
  565. rowBuilder.finishRow(thisSize);
  566. }
  567. }
  568. result.setLen(raw.toByteArray(), curLength);
  569. return result;
  570. }
  571. size32_t createRowFromXml(ARowBuilder & rowBuilder, size32_t size, const char * utf8, IXmlToRowTransformer * xmlTransformer, bool stripWhitespace)
  572. {
  573. Owned<IPropertyTree> root = createPTreeFromXMLString(size, utf8, ipt_none, stripWhitespace ? ptr_ignoreWhiteSpace : ptr_none);
  574. if (!root)
  575. {
  576. throwError(THORCERR_InvalidXmlFromXml);
  577. return 0;
  578. }
  579. Owned <XmlColumnProvider> columns = new XmlDatasetColumnProvider;
  580. columns->setRow(root);
  581. NullDiskCallback dummyCallback;
  582. return xmlTransformer->transform(rowBuilder, columns, &dummyCallback);
  583. }
  584. const void * createRowFromXml(IEngineRowAllocator * rowAllocator, size32_t len, const char * utf8, IXmlToRowTransformer * xmlTransformer, bool stripWhitespace)
  585. {
  586. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  587. size32_t newSize = createRowFromXml(rowBuilder, rtlUtf8Size(len, utf8), utf8, xmlTransformer, stripWhitespace);
  588. return rowBuilder.finalizeRowClear(newSize);
  589. }
  590. size32_t createRowFromJson(ARowBuilder & rowBuilder, size32_t size, const char * utf8, IXmlToRowTransformer * xmlTransformer, bool stripWhitespace)
  591. {
  592. Owned<IPropertyTree> root = createPTreeFromJSONString(size, utf8, ipt_none, stripWhitespace ? ptr_ignoreWhiteSpace : ptr_none);
  593. if (!root)
  594. {
  595. throwError(THORCERR_InvalidJsonFromJson);
  596. return 0;
  597. }
  598. Owned <XmlColumnProvider> columns = new XmlDatasetColumnProvider;
  599. columns->setRow(root);
  600. NullDiskCallback dummyCallback;
  601. return xmlTransformer->transform(rowBuilder, columns, &dummyCallback);
  602. }
  603. const void * createRowFromJson(IEngineRowAllocator * rowAllocator, size32_t len, const char * utf8, IXmlToRowTransformer * xmlTransformer, bool stripWhitespace)
  604. {
  605. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  606. size32_t newSize = createRowFromJson(rowBuilder, rtlUtf8Size(len, utf8), utf8, xmlTransformer, stripWhitespace);
  607. return rowBuilder.finalizeRowClear(newSize);
  608. }
  609. //=====================================================================================================
  610. IDataVal & CCsvToRawTransformer::transform(IDataVal & result, size32_t len, const void * text, bool isDataSet)
  611. {
  612. CSVSplitter csvSplitter;
  613. csvSplitter.init(rowTransformer->getMaxColumns(), rowTransformer->queryCsvParameters(), NULL, NULL, NULL, NULL);
  614. size32_t minRecordSize = rowTransformer->queryRecordSize()->getMinRecordSize();
  615. const byte *finger = (const byte *) text;
  616. MemoryBuffer raw;
  617. size32_t curLength = 0;
  618. while (len)
  619. {
  620. unsigned thisLineLength = csvSplitter.splitLine(len, finger);
  621. finger += thisLineLength;
  622. len -= thisLineLength;
  623. MemoryBufferBuilder rowBuilder(raw, minRecordSize);
  624. unsigned thisSize = rowTransformer->transform(rowBuilder, csvSplitter.queryLengths(), (const char * *)csvSplitter.queryData(), 0);
  625. curLength += thisSize;
  626. rowBuilder.finishRow(thisSize);
  627. }
  628. result.setLen(raw.toByteArray(), curLength);
  629. return result;
  630. }
  631. //=====================================================================================================
  632. extern thorhelper_decl IXmlToRawTransformer * createXmlRawTransformer(IXmlToRowTransformer * xmlTransformer, PTreeReaderOptions xmlReadFlags)
  633. {
  634. if (xmlTransformer)
  635. return new CXmlToRawTransformer(*xmlTransformer, xmlReadFlags);
  636. return NULL;
  637. }
  638. extern thorhelper_decl ICsvToRawTransformer * createCsvRawTransformer(ICsvToRowTransformer * csvTransformer)
  639. {
  640. if (csvTransformer)
  641. return new CCsvToRawTransformer(*csvTransformer);
  642. return NULL;
  643. }
  644. bool isContentXPath(const char *xpath, StringBuffer &head)
  645. {
  646. if (xpath)
  647. {
  648. unsigned l = (size32_t)strlen(xpath);
  649. if (l >= 2)
  650. {
  651. const char *x = xpath+l-2;
  652. if ((x[0] == '<') && (x[1] == '>'))
  653. {
  654. head.append((size32_t)(x-xpath), xpath);
  655. return true;
  656. }
  657. }
  658. }
  659. return false;
  660. }
  661. class CXPath
  662. {
  663. int topQualifier;
  664. BoolArray simpleQualifier;
  665. StringArray nodes, qualifierStack;
  666. StringAttr xpathstr;
  667. bool testForSimpleQualifier(const char *qualifier)
  668. {
  669. // first char always '['
  670. return ('@' == qualifier[1]);
  671. }
  672. public:
  673. CXPath(const char *path, bool ignoreNameSpaces)
  674. {
  675. topQualifier = -1;
  676. if (!path) return;
  677. xpathstr.set(path);
  678. if (path && '/'==*path)
  679. {
  680. if ('/' == *(path+1))
  681. throw MakeStringException(0, "// unsupported here");
  682. path++;
  683. }
  684. loop
  685. {
  686. const char *startQ = strchr(path, '[');
  687. const char *nextSep;
  688. loop
  689. {
  690. nextSep = strchr(path, '/');
  691. if (startQ && (!nextSep || startQ < nextSep))
  692. break;
  693. StringAttr node;
  694. unsigned l = nextSep ? (size32_t)(nextSep-path) : (size32_t)strlen(path);
  695. if (!l) break;
  696. if (ignoreNameSpaces)
  697. {
  698. const char *colon = path;
  699. const char *end = path+l+1;
  700. do
  701. {
  702. if (':' == *colon++)
  703. {
  704. l -= colon-path;
  705. path = colon;
  706. break;
  707. }
  708. }
  709. while (colon != end);
  710. }
  711. StringBuffer wildRemoved;
  712. node.set(path, l);
  713. const char *c = node.get();
  714. while (*c) { if ('*' != *c) wildRemoved.append(*c); c++; }
  715. if (wildRemoved.length() && !validateXMLTag(wildRemoved.str()))
  716. throw MakeStringException(0, "Invalid node syntax %s in path %s", node.get(), path);
  717. nodes.append(node);
  718. qualifierStack.append(""); // no qualifier for this segment.
  719. simpleQualifier.append(true); // not used
  720. if (!nextSep) break;
  721. path = nextSep+1;
  722. }
  723. if (!nextSep && !startQ)
  724. break;
  725. const char *endQ = strchr(startQ, ']'); // escaped '[]' chars??
  726. assertex(endQ);
  727. unsigned l=startQ-path;
  728. if (ignoreNameSpaces)
  729. {
  730. const char *colon = path;
  731. const char *end = path+l+1;
  732. do
  733. {
  734. if (':' == *colon++)
  735. {
  736. l -= colon-path;
  737. path = colon;
  738. break;
  739. }
  740. }
  741. while (colon != end);
  742. }
  743. StringAttr node(path, l);
  744. nodes.append(node);
  745. StringAttr qualifier(startQ, endQ-startQ+1);
  746. qualifierStack.append(qualifier);
  747. bool simple = testForSimpleQualifier(qualifier);
  748. simpleQualifier.append(simple);
  749. if (-1 == topQualifier && !simple) topQualifier = qualifierStack.ordinality()-1;
  750. path = nextSep+1;
  751. if (!nextSep) break;
  752. }
  753. }
  754. bool toQualify(unsigned which, bool simple)
  755. {
  756. return (which < queryDepth() && *qualifierStack.item(which) && simple==querySimpleQualifier(which));
  757. }
  758. inline unsigned queryDepth()
  759. {
  760. return nodes.ordinality();
  761. }
  762. inline const char *queryNode(unsigned which)
  763. {
  764. return nodes.item(which);
  765. }
  766. inline bool querySimpleQualifier(unsigned which)
  767. {
  768. return simpleQualifier.item(which);
  769. }
  770. bool match(unsigned level, const char *tag)
  771. {
  772. const char *nodeTag = queryNode(level);
  773. if (strchr(nodeTag, '*'))
  774. return WildMatch(tag, strlen(tag), nodeTag, strlen(nodeTag), false);
  775. else
  776. return (0 == strcmp(nodeTag, tag));
  777. }
  778. bool qualify(IPropertyTree &tree, unsigned depth)
  779. {
  780. const char *qualifier = qualifierStack.item(depth);
  781. if (qualifier && '\0' != *qualifier)
  782. {
  783. const char *q = qualifier;
  784. bool numeric = true;
  785. loop
  786. {
  787. if ('\0' == *q) break;
  788. else if (!isdigit(*q)) { numeric = false; break; }
  789. else q++;
  790. }
  791. if (numeric) throw MakeStringException(0, "Unsupported index qualifier: %s", qualifier);
  792. Owned<IPropertyTreeIterator> matchIter = tree.getElements(qualifier);
  793. if (!matchIter->first())
  794. return false;
  795. }
  796. return true;
  797. }
  798. inline int queryHighestQualifier() { return topQualifier; }
  799. const char *queryXPathStr() { return xpathstr; }
  800. };
  801. class CProspectiveMatch : public CInterface
  802. {
  803. public:
  804. CProspectiveMatch(IPropertyTree *_parent, IPropertyTree *_node, MemoryBuffer *_content=NULL) : parent(_parent), node(_node), content(_content) { }
  805. ~CProspectiveMatch() { if (content) delete content; }
  806. IPropertyTree *parent, *node;
  807. MemoryBuffer *content;
  808. };
  809. typedef CIArrayOf<CProspectiveMatch> CProcespectiveMatchArray;
  810. class CParseStackInfo : public CInterface
  811. {
  812. public:
  813. CParseStackInfo() : keep(false), nodeMade(false), keptForQualifier(false), iPTMade(NULL), startOffset(0), prospectiveMatches(NULL) { }
  814. ~CParseStackInfo()
  815. {
  816. if (prospectiveMatches)
  817. delete prospectiveMatches;
  818. }
  819. inline void reset()
  820. {
  821. keep = nodeMade = keptForQualifier = false;
  822. startOffset = 0;
  823. if (prospectiveMatches)
  824. prospectiveMatches->kill();
  825. iPTMade = NULL;
  826. }
  827. bool keep, nodeMade, keptForQualifier;
  828. offset_t startOffset;
  829. IPropertyTree *iPTMade;
  830. CProcespectiveMatchArray *prospectiveMatches;
  831. };
  832. class CMarkReadBase : public CInterface
  833. {
  834. public:
  835. virtual void reset() = 0;
  836. virtual void mark(offset_t offset) = 0;
  837. virtual void getMarkTo(offset_t offset, MemoryBuffer &mb) = 0;
  838. virtual void closeMark() = 0;
  839. };
  840. class CMarkRead : public CMarkReadBase
  841. {
  842. const void *buffer;
  843. offset_t startOffset;
  844. unsigned bufLen;
  845. bool marking;
  846. public:
  847. CMarkRead(const void *_buffer, unsigned _bufLen) : buffer(_buffer), bufLen(_bufLen)
  848. {
  849. reset();
  850. }
  851. virtual void reset()
  852. {
  853. marking = false;
  854. startOffset = 0;
  855. }
  856. virtual void mark(offset_t offset)
  857. {
  858. assertex(!marking);
  859. marking = true;
  860. if (offset >= bufLen)
  861. throw MakeStringException(0, "start offset past end of input string");
  862. startOffset = offset;
  863. }
  864. virtual void getMarkTo(offset_t offset, MemoryBuffer &mb)
  865. {
  866. assertex(marking);
  867. marking = true;
  868. if (offset < startOffset)
  869. throw MakeStringException(0, "end offset proceeds start offset");
  870. if (offset > bufLen)
  871. throw MakeStringException(0, "end offset past end of input string");
  872. mb.append((size32_t)(offset-startOffset), ((char*)buffer)+startOffset);
  873. marking = false;
  874. }
  875. virtual void closeMark()
  876. {
  877. marking = false;
  878. }
  879. };
  880. class CMarkReadStream : implements ISimpleReadStream, public CMarkReadBase
  881. {
  882. ISimpleReadStream &stream;
  883. offset_t readOffset, markingOffset;
  884. byte *buf, *bufPtr, *bufOther, *bufLowerHalf, *bufUpperHalf;
  885. size32_t remaining, bufSize;
  886. MemoryBuffer markBuffer;
  887. bool marking;
  888. public:
  889. IMPLEMENT_IINTERFACE;
  890. CMarkReadStream(ISimpleReadStream &_stream) : stream(_stream), readOffset(0)
  891. {
  892. bufSize = 0x8000/2;
  893. bufLowerHalf = buf = (byte *)malloc(bufSize*2);
  894. bufUpperHalf = bufLowerHalf+bufSize;
  895. reset();
  896. }
  897. ~CMarkReadStream()
  898. {
  899. free(bufLowerHalf); // pointer to whole buf in fact
  900. stream.Release();
  901. }
  902. virtual void reset()
  903. {
  904. remaining = 0;
  905. buf = bufPtr = bufLowerHalf;
  906. bufOther = NULL;
  907. readOffset = markingOffset = 0;
  908. marking = false;
  909. markBuffer.resetBuffer();
  910. }
  911. virtual void mark(offset_t offset)
  912. {
  913. assertex(!marking);
  914. marking=true;
  915. markingOffset = offset;
  916. offset_t from = readOffset-(bufPtr-buf);
  917. if (offset < from)
  918. {
  919. if (!bufOther)
  920. throw MakeStringException(0, "Not enough buffered to mark!");
  921. from -= bufSize;
  922. if (offset < from)
  923. throw MakeStringException(0, "Not enough buffered to mark!");
  924. size32_t a = (size32_t)(offset-from);
  925. markBuffer.append(bufSize-a, bufOther+a);
  926. }
  927. }
  928. virtual void getMarkTo(offset_t offset, MemoryBuffer &mb)
  929. {
  930. assertex(marking);
  931. size32_t markSize = (size32_t)(offset-markingOffset);
  932. int d = markSize-markBuffer.length();
  933. if (d < 0)
  934. markBuffer.setLength(markSize);
  935. else if (d > 0)
  936. {
  937. offset_t from = readOffset-(bufPtr-buf);
  938. size32_t o = 0;
  939. if (markingOffset>from)
  940. o = (size32_t)(markingOffset-from);
  941. markBuffer.append(d, buf+o);
  942. }
  943. mb.clear();
  944. mb.swapWith(markBuffer);
  945. marking = false;
  946. }
  947. virtual void closeMark()
  948. {
  949. if (marking)
  950. {
  951. markBuffer.clear();
  952. marking = false;
  953. }
  954. }
  955. // ISimpleReadStream
  956. virtual size32_t read(size32_t len, void * data)
  957. {
  958. unsigned r = 0;
  959. if (!remaining)
  960. {
  961. size32_t bufSpace = bufSize-(bufPtr-buf);
  962. if (bufSpace)
  963. {
  964. remaining = stream.read(bufSpace, bufPtr);
  965. if (remaining)
  966. {
  967. bufSpace -= remaining;
  968. r = std::min(len, remaining);
  969. memcpy(data, bufPtr, r);
  970. remaining -= r;
  971. len -= r;
  972. bufPtr += r;
  973. data = (byte *)data + r;
  974. readOffset += r;
  975. }
  976. else
  977. return 0;
  978. }
  979. if (!bufSpace && !remaining)
  980. {
  981. if (marking && bufOther)
  982. {
  983. offset_t from = readOffset-(bufPtr-buf);
  984. int d = (int)(markingOffset-from);
  985. if (d>0)
  986. markBuffer.append(bufSize-d, buf+d);
  987. else
  988. markBuffer.append(bufSize, buf);
  989. }
  990. if (buf==bufLowerHalf)
  991. {
  992. buf = bufUpperHalf;
  993. bufOther = bufLowerHalf;
  994. }
  995. else
  996. {
  997. buf = bufLowerHalf;
  998. bufOther = bufUpperHalf;
  999. }
  1000. bufPtr = buf;
  1001. }
  1002. if (!len) return r;
  1003. if (!remaining)
  1004. {
  1005. remaining = stream.read(bufSize, buf);
  1006. if (!remaining)
  1007. return r;
  1008. }
  1009. }
  1010. unsigned r2 = std::min(len, remaining);
  1011. memcpy(data, bufPtr, r2);
  1012. remaining -= r2;
  1013. bufPtr += r2;
  1014. readOffset += r2;
  1015. return r + r2;
  1016. }
  1017. };
  1018. // could contain a IPT, but convenient and efficient to derive impl.
  1019. class CPTreeWithOffsets : public LocalPTree
  1020. {
  1021. public:
  1022. CPTreeWithOffsets(const char *name) : LocalPTree(name) { startOffset = endOffset = 0; }
  1023. offset_t startOffset, endOffset;
  1024. };
  1025. class COffsetNodeCreator : implements IPTreeNodeCreator, public CInterface
  1026. {
  1027. public:
  1028. IMPLEMENT_IINTERFACE;
  1029. COffsetNodeCreator() { }
  1030. virtual IPropertyTree *create(const char *tag) { return new CPTreeWithOffsets(tag); }
  1031. };
  1032. class thorhelper_decl CColumnIterator : implements IColumnProviderIterator, public CInterface
  1033. {
  1034. Linked<IColumnProvider> parent;
  1035. Linked<IPropertyTree> root, matchNode;
  1036. MemoryBuffer * contentMb;
  1037. offset_t contentStartOffset;
  1038. void *utf8Translator;
  1039. Linked<IPropertyTreeIterator> iter;
  1040. Owned<IColumnProvider> cur;
  1041. StringAttr xpath;
  1042. public:
  1043. CColumnIterator(IColumnProvider *_parent, void *_utf8Translator, IPropertyTree *_root, IPropertyTree *_matchNode, IPropertyTreeIterator * _iter, MemoryBuffer *_contentMb, offset_t _contentStartOffset, const char *_xpath) : parent(_parent), root(_root), matchNode(_matchNode), iter(_iter), utf8Translator(_utf8Translator), xpath(_xpath), contentStartOffset(_contentStartOffset) { contentMb = _contentMb; }
  1044. IMPLEMENT_IINTERFACE;
  1045. IColumnProvider * first()
  1046. {
  1047. if (!iter->first())
  1048. return NULL;
  1049. setCurrent();
  1050. return cur;
  1051. }
  1052. IColumnProvider * next()
  1053. {
  1054. if (!iter->next())
  1055. return NULL;
  1056. setCurrent();
  1057. return cur;
  1058. }
  1059. void setCurrent();
  1060. };
  1061. class CColumnProvider : implements IColumnProvider, public CInterface
  1062. {
  1063. Linked<IPropertyTree> root, node;
  1064. MemoryBuffer contentMb;
  1065. bool content;
  1066. offset_t contentStartOffset;
  1067. void *utf8Translator;
  1068. CriticalSection crit;
  1069. MemoryBuffer tmpMb;
  1070. MemoryBuffer sharedResult;
  1071. StringAttr xpath;
  1072. void cnv2Latin1(unsigned length, const void *data, MemoryBuffer &mb)
  1073. {
  1074. void *target = mb.reserveTruncate(length);
  1075. if (length == 0)
  1076. return;
  1077. bool f;
  1078. unsigned rl = rtlCodepageConvert(utf8Translator, length, (char *)target, length, (const char *)data, f);
  1079. if (f)
  1080. {
  1081. StringBuffer errMsg("Failure translating utf-8, matching element '");
  1082. errMsg.append(xpath).append("' data: '");
  1083. if (length>100)
  1084. {
  1085. appendDataAsHex(errMsg, 100, data);
  1086. errMsg.append("<TRUNCATED>");
  1087. }
  1088. else
  1089. appendDataAsHex(errMsg, length, data);
  1090. errMsg.append("'");
  1091. throw MakeStringExceptionDirect(0, errMsg.str());
  1092. } else if (length > rl)
  1093. mb.setLength(rl);
  1094. }
  1095. public:
  1096. IMPLEMENT_IINTERFACE;
  1097. CColumnProvider(void *_utf8Translator, IPropertyTree *_root, IPropertyTree *_node, MemoryBuffer *_contentMb, bool ownContent, offset_t _contentStartOffset, const char *_xpath) : root(_root), node(_node), utf8Translator(_utf8Translator), contentStartOffset(_contentStartOffset), xpath(_xpath)
  1098. {
  1099. if (_contentMb)
  1100. {
  1101. content = true;
  1102. if (ownContent)
  1103. contentMb.swapWith(*_contentMb);
  1104. else
  1105. contentMb.setBuffer(_contentMb->length(), (void *)_contentMb->toByteArray());
  1106. }
  1107. else
  1108. content = false;
  1109. }
  1110. bool contentRequest(const char *path, size32_t &offset, size32_t &length)
  1111. {
  1112. StringBuffer subPath;
  1113. if (isContentXPath(path, subPath))
  1114. {
  1115. assertex(content);
  1116. if (subPath.length())
  1117. {
  1118. if ('/' == *path && '/' != *(path+1))
  1119. throw MakeStringException(0, "Cannot extract xml text from absolute path specification: %s", path);
  1120. CPTreeWithOffsets *subTree = (CPTreeWithOffsets *)node->queryPropTree(subPath.str());
  1121. if (subTree)
  1122. {
  1123. offset = (size32_t)(subTree->startOffset-contentStartOffset);
  1124. length = (size32_t)(subTree->endOffset-subTree->startOffset);
  1125. }
  1126. else
  1127. {
  1128. offset = 0;
  1129. length = 0;
  1130. }
  1131. }
  1132. else
  1133. {
  1134. CPTreeWithOffsets *_node = (CPTreeWithOffsets *)node.get();
  1135. if (contentStartOffset != _node->startOffset)
  1136. { // must be child
  1137. offset = (size32_t)(_node->startOffset-contentStartOffset);
  1138. length = (size32_t)(_node->endOffset-_node->startOffset);
  1139. }
  1140. else
  1141. {
  1142. offset = 0;
  1143. length = contentMb.length();
  1144. }
  1145. }
  1146. return true;
  1147. }
  1148. return false;
  1149. }
  1150. inline bool hasProp(const char * path)
  1151. {
  1152. if (path && '/' == *path && '/' != *(path+1))
  1153. return root->hasProp(path+1);
  1154. else
  1155. return node->hasProp(path);
  1156. }
  1157. inline const char * queryProp(const char * path)
  1158. {
  1159. if (path && '/' == *path && '/' != *(path+1))
  1160. return root->queryProp(path+1);
  1161. else
  1162. return node->queryProp(path);
  1163. }
  1164. inline bool getPropBin(const char * path, MemoryBuffer & mb)
  1165. {
  1166. if (path && '/' == *path && '/' != *(path+1))
  1167. return root->getPropBin(path+1, mb);
  1168. else
  1169. return node->getPropBin(path, mb);
  1170. }
  1171. // IColumnProvider
  1172. void getData(size32_t len, void * data, const char * path)
  1173. {
  1174. readData(len, data, path, 0, NULL);
  1175. }
  1176. void getDataX(size32_t & len, void * & data, const char * path)
  1177. {
  1178. readDataX(len, data, path, 0, NULL);
  1179. }
  1180. void getDataRaw(size32_t len, void * data, const char * path)
  1181. {
  1182. readDataRaw(len, data, path, 0, NULL);
  1183. }
  1184. void getDataRawX(size32_t & len, void * & data, const char * path)
  1185. {
  1186. readDataRawX(len, data, path, 0, NULL);
  1187. }
  1188. bool getBool(const char * path)
  1189. {
  1190. return readBool(path, false);
  1191. }
  1192. __int64 getInt(const char * path)
  1193. {
  1194. return readInt(path, 0);
  1195. }
  1196. void getQString(size32_t len, char * text, const char * path)
  1197. {
  1198. readQString(len, text, path, 0, NULL);
  1199. }
  1200. void getString(size32_t len, char * text, const char * path)
  1201. {
  1202. readString(len, text, path, 0, NULL);
  1203. }
  1204. void getStringX(size32_t & len, char * & text, const char * path)
  1205. {
  1206. readStringX(len, text, path, 0, NULL);
  1207. }
  1208. void getUnicodeX(size32_t & len, UChar * & text, const char * path)
  1209. {
  1210. readUnicodeX(len, text, path, 0, NULL);
  1211. }
  1212. void getUtf8X(size32_t & len, char * & text, const char * path)
  1213. {
  1214. readUtf8X(len, text, path, 0, NULL);
  1215. }
  1216. bool getIsSetAll(const char * path)
  1217. {
  1218. return readIsSetAll(path, false);
  1219. }
  1220. IColumnProviderIterator * getChildIterator(const char * path)
  1221. {
  1222. Owned<IPropertyTreeIterator> iter;
  1223. if (path && '/' == *path && '/' != *(path+1))
  1224. iter.setown(root->getElements(path+1));
  1225. else
  1226. iter.setown(node->getElements(path));
  1227. return new CColumnIterator(this, utf8Translator, root, node, iter, content ? &contentMb : NULL, contentStartOffset, xpath);
  1228. }
  1229. //
  1230. virtual void readData(size32_t len, void * data, const char * path, size32_t _lenDefault, const void * _default)
  1231. {
  1232. CriticalBlock b(crit);
  1233. sharedResult.clear();
  1234. size32_t offset = 0;
  1235. size32_t length = 0;
  1236. if (contentRequest(path, offset, length))
  1237. {
  1238. cnv2Latin1(length, contentMb.toByteArray()+offset, sharedResult);
  1239. }
  1240. else
  1241. {
  1242. if (!getPropBin(path, tmpMb.clear()))
  1243. {
  1244. rtlStrToData(len, data, _lenDefault, _default);
  1245. return;
  1246. }
  1247. cnv2Latin1(tmpMb.length(), tmpMb.toByteArray(), sharedResult);
  1248. }
  1249. decodeHexPairs((const char *)sharedResult.toByteArray(), sharedResult.length(), data, len);
  1250. }
  1251. virtual void readDataX(size32_t & len, void * & data, const char * path, size32_t _lenDefault, const void * _default)
  1252. {
  1253. CriticalBlock b(crit);
  1254. sharedResult.clear();
  1255. size32_t offset = 0;
  1256. size32_t length = 0;
  1257. if (contentRequest(path, offset, length))
  1258. {
  1259. cnv2Latin1(length, contentMb.toByteArray()+offset, sharedResult);
  1260. }
  1261. else
  1262. {
  1263. if (!getPropBin(path, tmpMb.clear()))
  1264. {
  1265. rtlStrToDataX(len, data, _lenDefault, _default);
  1266. return;
  1267. }
  1268. cnv2Latin1(tmpMb.length(), tmpMb.toByteArray(), sharedResult);
  1269. }
  1270. decodeHexPairsX((const char *)sharedResult.toByteArray(), sharedResult.length(), data, len);
  1271. }
  1272. virtual void readDataRaw(size32_t len, void * data, const char * path, size32_t _lenDefault, const void * _default)
  1273. {
  1274. CriticalBlock b(crit);
  1275. sharedResult.clear();
  1276. size32_t offset = 0;
  1277. size32_t length = 0;
  1278. if (contentRequest(path, offset, length))
  1279. {
  1280. cnv2Latin1(length, contentMb.toByteArray()+offset, sharedResult);
  1281. }
  1282. else
  1283. {
  1284. if (!getPropBin(path, tmpMb.clear()))
  1285. {
  1286. rtlStrToData(len, data, _lenDefault, _default);
  1287. return;
  1288. }
  1289. }
  1290. memcpy(data, sharedResult.toByteArray(), sharedResult.length());
  1291. if (len < sharedResult.length())
  1292. memset((byte*)data + sharedResult.length(), 0, len-sharedResult.length());
  1293. }
  1294. virtual void readDataRawX(size32_t & len, void * & data, const char * path, size32_t _lenDefault, const void * _default)
  1295. {
  1296. CriticalBlock b(crit);
  1297. sharedResult.clear();
  1298. size32_t offset = 0;
  1299. size32_t length = 0;
  1300. if (contentRequest(path, offset, length))
  1301. {
  1302. cnv2Latin1(length, contentMb.toByteArray()+offset, sharedResult);
  1303. }
  1304. else
  1305. {
  1306. if (!getPropBin(path, tmpMb.clear()))
  1307. {
  1308. rtlStrToDataX(len, data, _lenDefault, _default);
  1309. return;
  1310. }
  1311. }
  1312. len = tmpMb.length();
  1313. if (len)
  1314. {
  1315. data = malloc(len);
  1316. memcpy(data, tmpMb.toByteArray(), len);
  1317. }
  1318. else
  1319. data = NULL;
  1320. }
  1321. virtual bool readBool(const char * path, bool _default)
  1322. {
  1323. size32_t offset = 0;
  1324. size32_t length = 0;
  1325. if (contentRequest(path, offset, length))
  1326. throw MakeStringException(0, "Attempting to extract xml content text as boolean");
  1327. const char *str = queryProp(path);
  1328. if (!str) return _default;
  1329. return strToBool(str);
  1330. }
  1331. virtual __int64 readInt(const char * path, __int64 _default)
  1332. {
  1333. size32_t offset = 0;
  1334. size32_t length = 0;
  1335. if (contentRequest(path, offset, length))
  1336. throw MakeStringException(0, "Attempting to extract xml content text as integer");
  1337. const char *str = queryProp(path);
  1338. if (!str) return _default;
  1339. return _atoi64(str);
  1340. }
  1341. virtual void readQString(size32_t len, char * text, const char * path, size32_t _lenDefault, const char * _default)
  1342. {
  1343. CriticalBlock b(crit);
  1344. sharedResult.clear();
  1345. size32_t offset = 0;
  1346. size32_t length = 0;
  1347. if (contentRequest(path, offset, length))
  1348. {
  1349. cnv2Latin1(length, contentMb.toByteArray()+offset, sharedResult);
  1350. }
  1351. else
  1352. {
  1353. const char *str = queryProp(path);
  1354. if (str)
  1355. cnv2Latin1((size32_t)strlen(str), str, sharedResult);
  1356. else
  1357. {
  1358. rtlQStrToQStr(len, text, _lenDefault, _default);
  1359. return;
  1360. }
  1361. }
  1362. rtlStrToQStr(len, text, sharedResult.length(), sharedResult.toByteArray());
  1363. }
  1364. virtual void readString(size32_t len, char * text, const char * path, size32_t _lenDefault, const char * _default)
  1365. {
  1366. CriticalBlock b(crit);
  1367. sharedResult.clear();
  1368. size32_t offset = 0;
  1369. size32_t length = 0;
  1370. if (contentRequest(path, offset, length))
  1371. {
  1372. cnv2Latin1(length, contentMb.toByteArray()+offset, sharedResult);
  1373. }
  1374. else
  1375. {
  1376. const char *str = queryProp(path);
  1377. if (str)
  1378. cnv2Latin1((size32_t)strlen(str), str, sharedResult);
  1379. else
  1380. {
  1381. rtlStrToStr(len, text, _lenDefault, _default);
  1382. return;
  1383. }
  1384. }
  1385. rtlStrToStr(len, text, sharedResult.length(), sharedResult.toByteArray());
  1386. }
  1387. virtual void readStringX(size32_t & len, char * & text, const char * path, size32_t _lenDefault, const char * _default)
  1388. {
  1389. MemoryBuffer result;
  1390. size32_t offset = 0;
  1391. size32_t length = 0;
  1392. if (contentRequest(path, offset, length))
  1393. {
  1394. if (length)
  1395. cnv2Latin1(length, contentMb.toByteArray()+offset, result);
  1396. }
  1397. else
  1398. {
  1399. const char *str = queryProp(path);
  1400. if (str)
  1401. cnv2Latin1((size32_t)strlen(str), str, result);
  1402. else
  1403. {
  1404. rtlStrToStrX(len, text, _lenDefault, _default);
  1405. return;
  1406. }
  1407. }
  1408. len = result.length();
  1409. text = (char *) result.detach();
  1410. }
  1411. virtual void readUnicodeX(size32_t & len, UChar * & text, const char * path, size32_t _lenDefault, const UChar * _default)
  1412. {
  1413. size32_t offset = 0;
  1414. size32_t length = 0;
  1415. if (contentRequest(path, offset, length))
  1416. {
  1417. rtlCodepageToUnicodeX(len, text, length, contentMb.toByteArray()+offset, "utf-8");
  1418. }
  1419. else
  1420. {
  1421. CriticalBlock b(crit);
  1422. const char *tmpPtr = queryProp(path);
  1423. if (tmpPtr)
  1424. rtlCodepageToUnicodeX(len, text, strlen(tmpPtr), tmpPtr, "utf-8");
  1425. else
  1426. rtlUnicodeToUnicodeX(len, text, _lenDefault, _default);
  1427. }
  1428. }
  1429. virtual void readUtf8X(size32_t & len, char * & text, const char * path, size32_t _lenDefault, const char * _default)
  1430. {
  1431. size32_t offset = 0;
  1432. size32_t length = 0;
  1433. size32_t size;
  1434. if (contentRequest(path, offset, length))
  1435. {
  1436. rtlStrToStrX(size, text, length, contentMb.toByteArray()+offset);
  1437. }
  1438. else
  1439. {
  1440. CriticalBlock b(crit);
  1441. const char *tmpPtr = queryProp(path);
  1442. if (tmpPtr)
  1443. {
  1444. rtlStrToStrX(size, text, strlen(tmpPtr), tmpPtr);
  1445. }
  1446. else
  1447. {
  1448. rtlUtf8ToUtf8X(len, text, _lenDefault, _default);
  1449. return;
  1450. }
  1451. }
  1452. len = rtlUtf8Length(size, text);
  1453. }
  1454. virtual bool readIsSetAll(const char * path, bool _default)
  1455. {
  1456. if (hasProp(path))
  1457. {
  1458. StringBuffer fullpath;
  1459. fullpath.append(path).append("/All");
  1460. if (path && '/' == *path && '/' != *(path+1))
  1461. return root->hasProp(fullpath.str()+1);
  1462. else
  1463. return node->hasProp(fullpath.str());
  1464. }
  1465. return _default;
  1466. }
  1467. };
  1468. void CColumnIterator::setCurrent()
  1469. {
  1470. Owned<IPropertyTree> curTree = &iter->get();
  1471. if (contentMb)
  1472. cur.setown(new CColumnProvider(utf8Translator, root, curTree, contentMb, false, contentStartOffset, xpath));
  1473. else
  1474. cur.setown(new CColumnProvider(utf8Translator, root, curTree, NULL, false, 0, xpath));
  1475. }
  1476. class CXMLParse : implements IXMLParse, public CInterface
  1477. {
  1478. IPullPTreeReader *xmlReader;
  1479. StringAttr xpath;
  1480. IXMLSelect *iXMLSelect; // NOTE - not linked - creates circular links
  1481. PTreeReaderOptions xmlOptions;
  1482. bool step, contentRequired, isJson;
  1483. //to make json file handling intuitive an array opening at root level is just ignored
  1484. //but webservice calls map the entire response to a single row, and keeping the array works better
  1485. bool keepRootArray;
  1486. class CMakerBase : public CInterface, implements IPTreeMaker
  1487. {
  1488. protected:
  1489. CXPath xpath;
  1490. IXMLSelect *iXMLSelect; // NOTE - not linked - creates circular links
  1491. CICopyArrayOf<CParseStackInfo> stack, freeParseInfo;
  1492. IPTreeMaker *maker;
  1493. Linked<CMarkReadBase> marking;
  1494. Owned<COffsetNodeCreator> nodeCreator;
  1495. void *utf8Translator;
  1496. unsigned level;
  1497. bool contentRequired;
  1498. unsigned lastMatchKeptLevel;
  1499. IPropertyTree *lastMatchKeptNode, *lastMatchKeptNodeParent;
  1500. public:
  1501. IMPLEMENT_IINTERFACE;
  1502. CMakerBase(const char *_xpath, IXMLSelect &_iXMLSelect, bool _contentRequired, bool ignoreNameSpaces) : xpath(_xpath, ignoreNameSpaces), iXMLSelect(&_iXMLSelect), contentRequired(_contentRequired)
  1503. {
  1504. lastMatchKeptLevel = 0;
  1505. lastMatchKeptNode = lastMatchKeptNodeParent = NULL;
  1506. maker = NULL;
  1507. utf8Translator = NULL;
  1508. }
  1509. ~CMakerBase()
  1510. {
  1511. ForEachItemIn(i, stack)
  1512. delete &stack.item(i);
  1513. ForEachItemIn(i2, freeParseInfo)
  1514. delete &freeParseInfo.item(i2);
  1515. ::Release(maker);
  1516. rtlCloseCodepageConverter(utf8Translator);
  1517. }
  1518. void init()
  1519. {
  1520. level = 0;
  1521. nodeCreator.setown(new COffsetNodeCreator());
  1522. maker = createRootLessPTreeMaker(ipt_none, NULL, nodeCreator);
  1523. bool f;
  1524. utf8Translator = rtlOpenCodepageConverter("utf-8", "latin1", f);
  1525. if (f)
  1526. throw MakeStringException(0, "Failed to initialize unicode utf-8 translator");
  1527. }
  1528. void setMarkingStream(CMarkReadBase &_marking) { marking.set(&_marking); }
  1529. CXPath &queryXPath() { return xpath; }
  1530. // IPTreeMaker
  1531. virtual void beginNode(const char *tag, offset_t startOffset)
  1532. {
  1533. if (lastMatchKeptNode && level == lastMatchKeptLevel)
  1534. {
  1535. // NB: could be passed to match objects for removal by match object,
  1536. // but dubious if useful for greater than one path to exist above match.
  1537. if (lastMatchKeptNodeParent)
  1538. lastMatchKeptNodeParent->removeTree(lastMatchKeptNode);
  1539. else
  1540. maker->reset();
  1541. lastMatchKeptNode = NULL;
  1542. }
  1543. bool res = false;
  1544. CParseStackInfo *stackInfo;
  1545. if (freeParseInfo.ordinality())
  1546. {
  1547. stackInfo = &freeParseInfo.popGet();
  1548. stackInfo->reset();
  1549. }
  1550. else
  1551. stackInfo = new CParseStackInfo();
  1552. stackInfo->startOffset = startOffset;
  1553. if (!stack.ordinality())
  1554. {
  1555. if (0 == xpath.queryDepth() || xpath.match(0, tag))
  1556. {
  1557. if (1 >= xpath.queryDepth())
  1558. {
  1559. if (contentRequired)
  1560. {
  1561. assertex(marking);
  1562. marking->mark(startOffset); // mark stream at tag start offset
  1563. }
  1564. }
  1565. res = true;
  1566. }
  1567. }
  1568. else if (xpath.queryDepth())
  1569. {
  1570. if (stack.tos().keep)
  1571. {
  1572. if (level >= xpath.queryDepth())
  1573. res = true;
  1574. else if (xpath.match(level, tag))
  1575. {
  1576. res = true;
  1577. if (level == xpath.queryDepth()-1)
  1578. {
  1579. if (contentRequired)
  1580. {
  1581. assertex(marking);
  1582. marking->mark(startOffset); // mark stream at tag start offset
  1583. }
  1584. }
  1585. }
  1586. else if (level > ((unsigned)xpath.queryHighestQualifier()))
  1587. {
  1588. stackInfo->keptForQualifier = true;
  1589. res = true; // construct content below qualified tag (!=simple) needed to qualify when back at topQ.
  1590. }
  1591. }
  1592. }
  1593. else
  1594. res = true;
  1595. stackInfo->keep = res;
  1596. stack.append(*stackInfo);
  1597. if (res)
  1598. {
  1599. maker->beginNode(tag, startOffset);
  1600. CPTreeWithOffsets *current = (CPTreeWithOffsets *)maker->queryCurrentNode();
  1601. current->startOffset = startOffset;
  1602. stackInfo->nodeMade = res;
  1603. stackInfo->iPTMade = current;
  1604. }
  1605. }
  1606. virtual void newAttribute(const char *tag, const char *value)
  1607. {
  1608. if (stack.tos().keep)
  1609. maker->newAttribute(tag, value);
  1610. }
  1611. virtual void beginNodeContent(const char *tag)
  1612. {
  1613. // Can optimize qualifiers here that contain only attribute tests.
  1614. bool &keep = stack.tos().keep;
  1615. if (keep)
  1616. {
  1617. if (xpath.toQualify(level, true))
  1618. {
  1619. IPropertyTree *currentNode = maker->queryCurrentNode();
  1620. keep = xpath.qualify(*currentNode, level);
  1621. }
  1622. }
  1623. level++;
  1624. }
  1625. virtual void endNode(const char *tag, unsigned length, const void *value, bool binary, offset_t endOffset)
  1626. {
  1627. --level;
  1628. CParseStackInfo &stackInfo = stack.tos();
  1629. bool keep = stackInfo.keep;
  1630. bool nodeMade = stackInfo.nodeMade;
  1631. IPropertyTree *currentNode = maker->queryCurrentNode();
  1632. if (nodeMade)
  1633. {
  1634. CPTreeWithOffsets *current = (CPTreeWithOffsets *)maker->queryCurrentNode();
  1635. current->endOffset = endOffset;
  1636. maker->endNode(tag, length, value, binary, endOffset);
  1637. }
  1638. if (keep)
  1639. {
  1640. if (!stackInfo.keptForQualifier)
  1641. if (xpath.toQualify(level, false))
  1642. keep = xpath.qualify(*currentNode, level);
  1643. }
  1644. bool matched = false;
  1645. if (keep)
  1646. {
  1647. if (!stackInfo.keptForQualifier)
  1648. {
  1649. if ((0 == xpath.queryDepth() && 0 == level) || level == xpath.queryDepth()-1)
  1650. {
  1651. unsigned topQ = xpath.queryHighestQualifier();
  1652. unsigned noHigherQualifiers = -1 == topQ || topQ >= level;
  1653. IPropertyTree *parent = stack.ordinality()>=2?stack.item(stack.ordinality()-2).iPTMade:NULL;
  1654. if (noHigherQualifiers)
  1655. {
  1656. MemoryBuffer mb;
  1657. MemoryBuffer *content;
  1658. if (contentRequired)
  1659. {
  1660. assertex(marking);
  1661. marking->getMarkTo(endOffset, mb);
  1662. content = &mb;
  1663. }
  1664. else
  1665. content = NULL;
  1666. CPTreeWithOffsets *currentNodeWO = (CPTreeWithOffsets *)currentNode;
  1667. Owned<CColumnProvider> provider = new CColumnProvider(utf8Translator, maker->queryRoot(), currentNode, content, true, currentNodeWO->startOffset, xpath.queryXPathStr());
  1668. iXMLSelect->match(*provider, stackInfo.startOffset, endOffset);
  1669. matched = true;
  1670. }
  1671. else
  1672. {
  1673. // only prospective match - depends on higher qualifiers being satisfied.
  1674. if (!stackInfo.prospectiveMatches)
  1675. stackInfo.prospectiveMatches = new CProcespectiveMatchArray;
  1676. MemoryBuffer *tagContent = NULL;
  1677. if (contentRequired)
  1678. {
  1679. tagContent = new MemoryBuffer;
  1680. marking->getMarkTo(endOffset, *tagContent);
  1681. }
  1682. stackInfo.prospectiveMatches->append(*new CProspectiveMatch(parent, currentNode, tagContent));
  1683. }
  1684. }
  1685. else if (stackInfo.prospectiveMatches && stackInfo.prospectiveMatches->ordinality() && level < xpath.queryDepth()-1)
  1686. {
  1687. unsigned topQ = xpath.queryHighestQualifier();
  1688. unsigned noHigherQualifiers = -1 == topQ || topQ >= level;
  1689. if (noHigherQualifiers)
  1690. {
  1691. ForEachItemIn(m, *stackInfo.prospectiveMatches)
  1692. {
  1693. CProspectiveMatch &prospectiveMatch = stackInfo.prospectiveMatches->item(m);
  1694. CPTreeWithOffsets *prospectiveNodeWO = (CPTreeWithOffsets *)prospectiveMatch.node;
  1695. Owned<CColumnProvider> provider = new CColumnProvider(utf8Translator, maker->queryRoot(), prospectiveMatch.node, prospectiveMatch.content, true, prospectiveNodeWO->startOffset, xpath.queryXPathStr());
  1696. // NB: caveat; if complex qualifiers on intermediate iterator nodes and fully qualified attributes
  1697. // are access from this match, there are potential ambiguities in the lookup.
  1698. iXMLSelect->match(*provider, stackInfo.startOffset, endOffset);
  1699. matched = true;
  1700. }
  1701. stackInfo.prospectiveMatches->kill();
  1702. stackInfo.prospectiveMatches = NULL;
  1703. }
  1704. }
  1705. else
  1706. {
  1707. if (NULL == lastMatchKeptNode && level < xpath.queryDepth())
  1708. keep = false;
  1709. }
  1710. }
  1711. }
  1712. else
  1713. {
  1714. if (contentRequired && ((0==level && 0==xpath.queryDepth()) || level == xpath.queryDepth()-1))
  1715. {
  1716. assertex(marking);
  1717. marking->closeMark();
  1718. }
  1719. }
  1720. freeParseInfo.append(stackInfo);
  1721. if (keep && stackInfo.prospectiveMatches && stackInfo.prospectiveMatches->ordinality())
  1722. {
  1723. Linked<CParseStackInfo> childStackInfo = &stackInfo;
  1724. stack.pop();
  1725. if (stack.ordinality())
  1726. {
  1727. CParseStackInfo &parentSI = stack.tos();
  1728. if (!parentSI.prospectiveMatches)
  1729. parentSI.prospectiveMatches = new CProcespectiveMatchArray;
  1730. ForEachItemIn(p, *stackInfo.prospectiveMatches)
  1731. parentSI.prospectiveMatches->append(*LINK(&stackInfo.prospectiveMatches->item(p)));
  1732. }
  1733. }
  1734. else
  1735. stack.pop();
  1736. // Track last level kept
  1737. if (lastMatchKeptNode || (keep && matched))
  1738. {
  1739. assertex(nodeMade);
  1740. lastMatchKeptLevel = level;
  1741. lastMatchKeptNode = currentNode;
  1742. lastMatchKeptNodeParent = maker->queryCurrentNode();
  1743. }
  1744. else if (!keep && nodeMade)
  1745. {
  1746. IPropertyTree *parent = maker->queryCurrentNode();
  1747. if (parent)
  1748. parent->removeTree(currentNode);
  1749. }
  1750. currentNode = NULL;
  1751. }
  1752. virtual IPropertyTree *queryRoot() { return maker->queryRoot(); }
  1753. virtual IPropertyTree *queryCurrentNode() { return maker->queryCurrentNode(); }
  1754. virtual void reset()
  1755. {
  1756. level = 0;
  1757. ForEachItemIn(i, stack)
  1758. delete &stack.item(i);
  1759. ForEachItemIn(i2, freeParseInfo)
  1760. delete &freeParseInfo.item(i2);
  1761. stack.kill();
  1762. freeParseInfo.kill();
  1763. if (marking)
  1764. marking->reset();
  1765. }
  1766. virtual IPropertyTree *create(const char *tag)
  1767. {
  1768. return nodeCreator->create(tag);
  1769. }
  1770. } *iXMLMaker;
  1771. class CXMLMaker : public CMakerBase
  1772. {
  1773. public:
  1774. CXMLMaker(const char *_xpath, IXMLSelect &_iXMLSelect, bool _contentRequired, bool ignoreNameSpaces) : CMakerBase(_xpath, _iXMLSelect, _contentRequired, ignoreNameSpaces)
  1775. {
  1776. }
  1777. };
  1778. class CJSONMaker : public CMakerBase
  1779. {
  1780. private:
  1781. bool keepRootArray;
  1782. bool inRootArray;
  1783. public:
  1784. CJSONMaker(const char *_xpath, IXMLSelect &_iXMLSelect, bool _contentRequired, bool ignoreNameSpaces, bool _keepRootArray) : CMakerBase(_xpath, _iXMLSelect, _contentRequired, ignoreNameSpaces), keepRootArray(_keepRootArray)
  1785. {
  1786. inRootArray = false;
  1787. }
  1788. bool checkRootArrayItem(const char *&tag)
  1789. {
  1790. if (!inRootArray)
  1791. return false;
  1792. if (stack.ordinality()!=1)
  1793. return false;
  1794. if (streq(tag, "__object__"))
  1795. tag = "Row"; //unamed json root array [{},{}] will generate "Row"s
  1796. return true;
  1797. }
  1798. bool checkSkipRoot(const char *&tag)
  1799. {
  1800. if (checkRootArrayItem(tag))
  1801. return false;
  1802. if (stack.ordinality()) //root level only
  1803. return false;
  1804. if (streq(tag, "__array__")) //xpath starts after root array
  1805. {
  1806. if (keepRootArray && !xpath.queryDepth())
  1807. {
  1808. inRootArray = true;
  1809. return false;
  1810. }
  1811. return true;
  1812. }
  1813. if (streq(tag, "__object__") && xpath.queryDepth()) //empty xpath matches start object, otherwise skip, xpath starts immediately after
  1814. return true;
  1815. return false;
  1816. }
  1817. virtual void beginNode(const char *tag, offset_t startOffset)
  1818. {
  1819. if (!checkSkipRoot(tag))
  1820. CMakerBase::beginNode(tag, startOffset);
  1821. }
  1822. virtual void newAttribute(const char *tag, const char *value)
  1823. {
  1824. if (stack.ordinality() && stack.tos().keep)
  1825. maker->newAttribute(tag, value);
  1826. }
  1827. virtual void beginNodeContent(const char *tag)
  1828. {
  1829. if (!checkSkipRoot(tag))
  1830. CMakerBase::beginNodeContent(tag);
  1831. }
  1832. virtual void endNode(const char *tag, unsigned length, const void *value, bool binary, offset_t endOffset)
  1833. {
  1834. if (!checkSkipRoot(tag))
  1835. CMakerBase::endNode(tag, length, value, binary, endOffset);
  1836. }
  1837. };
  1838. public:
  1839. IMPLEMENT_IINTERFACE;
  1840. CXMLParse(const char *fName, const char *_xpath, IXMLSelect &_iXMLSelect, PTreeReaderOptions _xmlOptions=ptr_none, bool _contentRequired=true, bool _step=true, bool _isJson=false, bool _keepRootArray=false) : xpath(_xpath), iXMLSelect(&_iXMLSelect), xmlOptions(_xmlOptions), contentRequired(_contentRequired), step(_step), isJson(_isJson), keepRootArray(_keepRootArray) { init(); go(fName); }
  1841. CXMLParse(IFile &ifile, const char *_xpath, IXMLSelect &_iXMLSelect, PTreeReaderOptions _xmlOptions=ptr_none, bool _contentRequired=true, bool _step=true, bool _isJson=false, bool _keepRootArray=false) : xpath(_xpath), iXMLSelect(&_iXMLSelect), xmlOptions(_xmlOptions), contentRequired(_contentRequired), step(_step), isJson(_isJson), keepRootArray(_keepRootArray) { init(); go(ifile); }
  1842. CXMLParse(IFileIO &fileio, const char *_xpath, IXMLSelect &_iXMLSelect, PTreeReaderOptions _xmlOptions=ptr_none, bool _contentRequired=true, bool _step=true, bool _isJson=false, bool _keepRootArray=false) : xpath(_xpath), iXMLSelect(&_iXMLSelect), xmlOptions(_xmlOptions), contentRequired(_contentRequired), step(_step), isJson(_isJson), keepRootArray(_keepRootArray) { init(); go(fileio); }
  1843. CXMLParse(ISimpleReadStream &stream, const char *_xpath, IXMLSelect &_iXMLSelect, PTreeReaderOptions _xmlOptions=ptr_none, bool _contentRequired=true, bool _step=true, bool _isJson=false, bool _keepRootArray=false) : xpath(_xpath), iXMLSelect(&_iXMLSelect), xmlOptions(_xmlOptions), contentRequired(_contentRequired), step(_step), isJson(_isJson), keepRootArray(_keepRootArray) { init(); go(stream); }
  1844. CXMLParse(const void *buffer, unsigned bufLen, const char *_xpath, IXMLSelect &_iXMLSelect, PTreeReaderOptions _xmlOptions=ptr_none, bool _contentRequired=true, bool _step=true, bool _isJson=false, bool _keepRootArray=false) : xpath(_xpath), iXMLSelect(&_iXMLSelect), xmlOptions(_xmlOptions), contentRequired(_contentRequired), step(_step), isJson(_isJson), keepRootArray(_keepRootArray) { init(); go(buffer, bufLen); }
  1845. CXMLParse(const char *_xpath, IXMLSelect &_iXMLSelect, PTreeReaderOptions _xmlOptions=ptr_none, bool _contentRequired=true, bool _step=true, bool _isJson=false, bool _keepRootArray=false) : xpath(_xpath), iXMLSelect(&_iXMLSelect), xmlOptions(_xmlOptions), contentRequired(_contentRequired), step(_step), isJson(_isJson), keepRootArray(_keepRootArray) { init(); }
  1846. ~CXMLParse()
  1847. {
  1848. ::Release(iXMLMaker);
  1849. ::Release(xmlReader);
  1850. }
  1851. CMakerBase *createMaker()
  1852. {
  1853. bool ignoreNameSpaces = 0 != ((unsigned)xmlOptions & (unsigned)ptr_ignoreNameSpaces);
  1854. if (isJson)
  1855. return new CJSONMaker(xpath, *iXMLSelect, contentRequired, ignoreNameSpaces, keepRootArray);
  1856. return new CXMLMaker(xpath, *iXMLSelect, contentRequired, ignoreNameSpaces);
  1857. }
  1858. void init()
  1859. {
  1860. xmlReader = NULL;
  1861. iXMLMaker = createMaker();
  1862. iXMLMaker->init();
  1863. }
  1864. void go(const char *fName)
  1865. {
  1866. OwnedIFile ifile = createIFile(fName);
  1867. go(*ifile);
  1868. }
  1869. void go(IFile &file)
  1870. {
  1871. OwnedIFileIO ifileio = file.open(IFOread);
  1872. if (!ifileio)
  1873. throw MakeStringException(0, "Failed to open: %s", file.queryFilename());
  1874. go(*ifileio);
  1875. }
  1876. void go(IFileIO &fileio)
  1877. {
  1878. Owned<IIOStream> stream = createIOStream(&fileio);
  1879. go(*stream);
  1880. }
  1881. void go(ISimpleReadStream &stream)
  1882. {
  1883. if (contentRequired)
  1884. {
  1885. // only need marking stream if fetching xml text content.
  1886. Owned<CMarkReadStream> markingStream = new CMarkReadStream(*LINK(&stream));
  1887. iXMLMaker->setMarkingStream(*markingStream);
  1888. if (isJson)
  1889. xmlReader = createPullJSONStreamReader(*markingStream, *iXMLMaker, xmlOptions);
  1890. else
  1891. xmlReader = createPullXMLStreamReader(*markingStream, *iXMLMaker, xmlOptions);
  1892. }
  1893. else if (isJson)
  1894. xmlReader = createPullJSONStreamReader(stream, *iXMLMaker, xmlOptions);
  1895. else
  1896. xmlReader = createPullXMLStreamReader(stream, *iXMLMaker, xmlOptions);
  1897. if (!step)
  1898. {
  1899. xmlReader->load();
  1900. xmlReader->Release();
  1901. xmlReader = NULL;
  1902. }
  1903. }
  1904. void go(const void *buffer, unsigned bufLen)
  1905. {
  1906. if (contentRequired)
  1907. {
  1908. Owned<CMarkReadBase> markingStream = new CMarkRead(buffer, bufLen);
  1909. iXMLMaker->setMarkingStream(*markingStream);
  1910. }
  1911. if (isJson)
  1912. xmlReader = createPullJSONBufferReader(buffer, bufLen, *iXMLMaker, xmlOptions);
  1913. else
  1914. xmlReader = createPullXMLBufferReader(buffer, bufLen, *iXMLMaker, xmlOptions);
  1915. if (!step)
  1916. {
  1917. xmlReader->load();
  1918. xmlReader->Release();
  1919. xmlReader = NULL;
  1920. }
  1921. }
  1922. void provideXML(const char *str)
  1923. {
  1924. if (contentRequired)
  1925. {
  1926. Owned<CMarkReadBase> markingStream = new CMarkRead(str, strlen(str));
  1927. iXMLMaker->setMarkingStream(*markingStream);
  1928. }
  1929. if (isJson)
  1930. xmlReader = createPullJSONStringReader(str, *iXMLMaker, xmlOptions);
  1931. else
  1932. xmlReader = createPullXMLStringReader(str, *iXMLMaker, xmlOptions);
  1933. if (!step)
  1934. {
  1935. xmlReader->load();
  1936. xmlReader->Release();
  1937. xmlReader = NULL;
  1938. }
  1939. }
  1940. // IXMLParse
  1941. virtual bool next()
  1942. {
  1943. return xmlReader->next();
  1944. }
  1945. virtual void reset()
  1946. {
  1947. iXMLMaker->reset();
  1948. xmlReader->reset();
  1949. }
  1950. };
  1951. IXMLParse *createXMLParse(const char *filename, const char *xpath, IXMLSelect &iselect, PTreeReaderOptions xmlOptions, bool contentRequired)
  1952. {
  1953. return new CXMLParse(filename, xpath, iselect, xmlOptions, contentRequired);
  1954. }
  1955. IXMLParse *createXMLParse(ISimpleReadStream &stream, const char *xpath, IXMLSelect &iselect, PTreeReaderOptions xmlOptions, bool contentRequired)
  1956. {
  1957. return new CXMLParse(stream, xpath, iselect, xmlOptions, contentRequired);
  1958. }
  1959. IXMLParse *createXMLParse(const void *buffer, unsigned bufLen, const char *xpath, IXMLSelect &iselect, PTreeReaderOptions xmlOptions, bool contentRequired)
  1960. {
  1961. return new CXMLParse(buffer, bufLen, xpath, iselect, xmlOptions, contentRequired);
  1962. }
  1963. IXMLParse *createXMLParseString(const char *string, const char *xpath, IXMLSelect &iselect, PTreeReaderOptions xmlOptions, bool contentRequired)
  1964. {
  1965. CXMLParse *parser = new CXMLParse(xpath, iselect, xmlOptions, contentRequired);
  1966. parser->provideXML(string);
  1967. return parser;
  1968. }
  1969. IXMLParse *createJSONParse(const char *filename, const char *xpath, IXMLSelect &iselect, PTreeReaderOptions xmlOptions, bool contentRequired)
  1970. {
  1971. return new CXMLParse(filename, xpath, iselect, xmlOptions, contentRequired, true, true);
  1972. }
  1973. IXMLParse *createJSONParse(ISimpleReadStream &stream, const char *xpath, IXMLSelect &iselect, PTreeReaderOptions xmlOptions, bool contentRequired)
  1974. {
  1975. return new CXMLParse(stream, xpath, iselect, xmlOptions, contentRequired, true, true);
  1976. }
  1977. IXMLParse *createJSONParse(const void *buffer, unsigned bufLen, const char *xpath, IXMLSelect &iselect, PTreeReaderOptions xmlOptions, bool contentRequired, bool keepRootArray)
  1978. {
  1979. return new CXMLParse(buffer, bufLen, xpath, iselect, xmlOptions, contentRequired, true, true, keepRootArray);
  1980. }
  1981. IXMLParse *createJSONParseString(const char *string, const char *xpath, IXMLSelect &iselect, PTreeReaderOptions xmlOptions, bool contentRequired)
  1982. {
  1983. CXMLParse *parser = new CXMLParse(xpath, iselect, xmlOptions, contentRequired, true, true);
  1984. parser->provideXML(string);
  1985. return parser;
  1986. }