thorxmlread.cpp 67 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #include "platform.h"
  14. #include <algorithm>
  15. #include "jlib.hpp"
  16. #include "jexcept.hpp"
  17. #include "jfile.hpp"
  18. #include "jlog.hpp"
  19. #include "csvsplitter.hpp"
  20. #include "thorherror.h"
  21. #include "thorxmlread.hpp"
  22. #include "thorcommon.ipp"
  23. #include "eclrtl.hpp"
  24. #include "jptree.ipp"
  25. #define XMLTAG_CONTENT "<>"
  26. //=====================================================================================================
  27. XmlColumnIterator::XmlColumnIterator(IPropertyTreeIterator * _iter) : iter(_iter)
  28. {
  29. }
  30. IColumnProvider * XmlColumnIterator::first()
  31. {
  32. if (!iter->first())
  33. return NULL;
  34. setCurrent();
  35. return cur;
  36. }
  37. IColumnProvider * XmlColumnIterator::next()
  38. {
  39. if (!iter->next())
  40. return NULL;
  41. setCurrent();
  42. return cur;
  43. }
  44. void XmlColumnIterator::setCurrent()
  45. {
  46. Owned<IPropertyTree> curTree = &iter->get();
  47. cur.setown(new XmlDatasetColumnProvider);
  48. cur->setRow(curTree);
  49. }
  50. //=====================================================================================================
  51. static void decodeHexPairs(const char *input, unsigned inputLen, void * outData, unsigned outLen)
  52. {
  53. byte * tgt = (byte *)outData;
  54. while (inputLen >= 2)
  55. {
  56. if (outLen-- == 0)
  57. return;
  58. byte high = hex2num(*input++);
  59. *tgt++ = (high << 4) | hex2num(*input++);
  60. inputLen -= 2;
  61. }
  62. if (outLen)
  63. memset(outData, 0, outLen);
  64. }
  65. static void decodeHexPairsX(const char *input, unsigned inputLen, void *&outData, unsigned &outLen)
  66. {
  67. if (inputLen<2)
  68. {
  69. outLen = 0;
  70. outData = NULL;
  71. return;
  72. }
  73. outLen = inputLen/2;
  74. outData = malloc(outLen);
  75. char *tgt = (char *)outData;
  76. loop
  77. {
  78. byte high = hex2num(*input++);
  79. *tgt++ = (high << 4) | hex2num(*input++);
  80. inputLen -= 2;
  81. if (inputLen<2) break;
  82. }
  83. }
  84. //=====================================================================================================
  85. bool XmlDatasetColumnProvider::getBool(const char * name)
  86. {
  87. return row->getPropBool(name, 0);
  88. }
  89. __int64 XmlDatasetColumnProvider::getInt(const char * name)
  90. {
  91. return row->getPropInt64(name, 0);
  92. }
  93. void XmlDatasetColumnProvider::getData(size32_t len, void * target, const char * name)
  94. {
  95. const char *hexPairSequence = row->queryProp(name);
  96. if (!hexPairSequence)
  97. memset(target, 0, len);
  98. else
  99. decodeHexPairs(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  100. }
  101. void XmlDatasetColumnProvider::getDataX(size32_t & len, void * & target, const char * name)
  102. {
  103. const char *hexPairSequence = row->queryProp(name);
  104. if (!hexPairSequence)
  105. {
  106. len = 0;
  107. target = NULL;
  108. return;
  109. }
  110. decodeHexPairsX(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  111. }
  112. void XmlDatasetColumnProvider::getDataRaw(size32_t len, void * target, const char * name)
  113. {
  114. const char *hexPairSequence = row->queryProp(name);
  115. if (!hexPairSequence)
  116. memset(target, 0, len);
  117. else
  118. {
  119. size32_t dLen = (size32_t)strlen(hexPairSequence);
  120. memcpy(target, hexPairSequence, dLen);
  121. if (dLen < len)
  122. memset((byte*)target+dLen, 0, len - dLen);
  123. }
  124. }
  125. void XmlDatasetColumnProvider::getDataRawX(size32_t & len, void * & target, const char * name)
  126. {
  127. const char *hexPairSequence = row->queryProp(name);
  128. if (!hexPairSequence)
  129. {
  130. len = 0;
  131. target = NULL;
  132. return;
  133. }
  134. len = (size32_t)strlen(hexPairSequence);
  135. target = malloc(len);
  136. memcpy(target, hexPairSequence, len);
  137. }
  138. void XmlDatasetColumnProvider::getQString(size32_t len, char * target, const char * name)
  139. {
  140. // You could argue that it should convert from UTF8 to ascii first but it's a no-op for any char that QString supports, and it's ok to be undefined for any char that it doesn't
  141. const char * value = row->queryProp(name);
  142. size32_t lenValue = value ? (size32_t)strlen(value) : 0;
  143. rtlStrToQStr(len, target, lenValue, value);
  144. }
  145. void XmlDatasetColumnProvider::getString(size32_t len, char * target, const char * name)
  146. {
  147. const char * value = row->queryProp(name);
  148. size32_t utf8bytes = value ? (size32_t)strlen(value) : 0;
  149. if (utf8bytes)
  150. rtlUtf8ToStr(len, target, rtlUtf8Length(utf8bytes, value), value);
  151. else
  152. memset(target, ' ', len);
  153. }
  154. void XmlDatasetColumnProvider::getStringX(size32_t & len, char * & target, const char * name)
  155. {
  156. const char * value = row->queryProp(name);
  157. size32_t utf8bytes = value ? (size32_t)strlen(value) : 0;
  158. if (utf8bytes)
  159. rtlUtf8ToStrX(len, target, rtlUtf8Length(utf8bytes, value), value);
  160. else
  161. {
  162. len = 0;
  163. target = NULL;
  164. }
  165. }
  166. void XmlDatasetColumnProvider::getUnicodeX(size32_t & len, UChar * & target, const char * name)
  167. {
  168. const char * text = row->queryProp(name);
  169. if (text)
  170. rtlCodepageToUnicodeX(len, target, (size32_t)strlen(text), text, "utf-8");
  171. else
  172. {
  173. len = 0;
  174. target = NULL;
  175. }
  176. }
  177. void XmlDatasetColumnProvider::getUtf8X(size32_t & len, char * & target, const char * path)
  178. {
  179. const char * value = row->queryProp(path);
  180. size32_t size = value ? (size32_t)strlen(value) : 0;
  181. target = (char *)malloc(size);
  182. memcpy(target, value, size);
  183. len = rtlUtf8Length(size, target);
  184. }
  185. bool XmlDatasetColumnProvider::getIsSetAll(const char * path)
  186. {
  187. StringBuffer fullpath;
  188. fullpath.append(path).append("/All");
  189. return row->hasProp(fullpath.str());
  190. }
  191. IColumnProviderIterator * XmlDatasetColumnProvider::getChildIterator(const char * path)
  192. {
  193. return new XmlColumnIterator(row->getElements(path));
  194. }
  195. bool XmlDatasetColumnProvider::readBool(const char * path, bool _default)
  196. {
  197. return row->getPropBool(path, _default);
  198. }
  199. void XmlDatasetColumnProvider::readData(size32_t len, void * target, const char * path, size32_t _lenDefault, const void * _default)
  200. {
  201. const char *hexPairSequence = row->queryProp(path);
  202. if (hexPairSequence)
  203. decodeHexPairs(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  204. else
  205. rtlDataToData(len, target, _lenDefault, _default);
  206. }
  207. void XmlDatasetColumnProvider::readDataX(size32_t & len, void * & target, const char * path, size32_t _lenDefault, const void * _default)
  208. {
  209. const char *hexPairSequence = row->queryProp(path);
  210. if (hexPairSequence)
  211. decodeHexPairsX(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  212. else
  213. rtlStrToDataX(len, target, _lenDefault, _default);
  214. }
  215. void XmlDatasetColumnProvider::readDataRaw(size32_t len, void * target, const char * path, size32_t _lenDefault, const void * _default)
  216. {
  217. rtlDataToData(len, target, _lenDefault, _default);
  218. }
  219. void XmlDatasetColumnProvider::readDataRawX(size32_t & len, void * & target, const char * path, size32_t _lenDefault, const void * _default)
  220. {
  221. rtlStrToDataX(len, target, _lenDefault, _default);
  222. }
  223. __int64 XmlDatasetColumnProvider::readInt(const char * path, __int64 _default)
  224. {
  225. return row->getPropInt64(path, _default);
  226. }
  227. void XmlDatasetColumnProvider::readQString(size32_t len, char * target, const char * path, size32_t _lenDefault, const char * _default)
  228. {
  229. const char * value = row->queryProp(path);
  230. if (value)
  231. rtlStrToQStr(len, target, (size32_t)strlen(value), value); // more: could process utf8, but characters would be lost anyway. At worse will mean extra blanks.
  232. else
  233. rtlQStrToQStr(len, target, _lenDefault, _default);
  234. }
  235. void XmlDatasetColumnProvider::readString(size32_t len, char * target, const char * path, size32_t _lenDefault, const char * _default)
  236. {
  237. const char * value = row->queryProp(path);
  238. if (value)
  239. rtlUtf8ToStr(len, target, rtlUtf8Length((size32_t)strlen(value), value), value);
  240. else
  241. rtlStrToStr(len, target, _lenDefault, _default);
  242. }
  243. void XmlDatasetColumnProvider::readStringX(size32_t & len, char * & target, const char * path, size32_t _lenDefault, const char * _default)
  244. {
  245. const char * value = row->queryProp(path);
  246. if (value)
  247. rtlUtf8ToStrX(len, target, rtlUtf8Length((size32_t)strlen(value), value), value);
  248. else
  249. rtlStrToStrX(len, target, _lenDefault, _default);
  250. }
  251. void XmlDatasetColumnProvider::readUnicodeX(size32_t & len, UChar * & target, const char * path, size32_t _lenDefault, const UChar * _default)
  252. {
  253. const char * text = row->queryProp(path);
  254. if (text)
  255. rtlCodepageToUnicodeX(len, target, (size32_t)strlen(text), text, "utf-8");
  256. else
  257. rtlUnicodeToUnicodeX(len, target, _lenDefault, _default);
  258. }
  259. bool XmlDatasetColumnProvider::readIsSetAll(const char * path, bool _default)
  260. {
  261. if (row->hasProp(path))
  262. return getIsSetAll(path);
  263. return _default;
  264. }
  265. void XmlDatasetColumnProvider::readUtf8X(size32_t & len, char * & target, const char * path, size32_t _lenDefault, const char * _default)
  266. {
  267. const char * value = row->queryProp(path);
  268. if (value)
  269. rtlUtf8ToUtf8X(len, target, rtlUtf8Length((size32_t)strlen(value), value), value);
  270. else
  271. rtlUtf8ToUtf8X(len, target, _lenDefault, _default);
  272. }
  273. //=====================================================================================================
  274. bool XmlSetColumnProvider::getBool(const char * name)
  275. {
  276. #ifdef _DEBUG
  277. assertex(stricmp(name, "value")==0);
  278. #endif
  279. return row->getPropBool(NULL, 0);
  280. }
  281. __int64 XmlSetColumnProvider::getInt(const char * name)
  282. {
  283. #ifdef _DEBUG
  284. assertex(stricmp(name, "value")==0);
  285. #endif
  286. return row->getPropInt64(NULL, 0);
  287. }
  288. void XmlSetColumnProvider::getData(size32_t len, void * target, const char * name)
  289. {
  290. #ifdef _DEBUG
  291. assertex(stricmp(name, "value")==0);
  292. #endif
  293. const char *hexPairSequence = row->queryProp(NULL);
  294. if (!hexPairSequence)
  295. memset(target, 0, len);
  296. else
  297. decodeHexPairs(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  298. }
  299. void XmlSetColumnProvider::getDataX(size32_t & len, void * & target, const char * name)
  300. {
  301. #ifdef _DEBUG
  302. assertex(stricmp(name, "value")==0);
  303. #endif
  304. const char *hexPairSequence = row->queryProp(NULL);
  305. if (!hexPairSequence)
  306. {
  307. len = 0;
  308. target = NULL;
  309. return;
  310. }
  311. decodeHexPairsX(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  312. }
  313. void XmlSetColumnProvider::getDataRaw(size32_t len, void * target, const char * name)
  314. {
  315. #ifdef _DEBUG
  316. assertex(stricmp(name, "value")==0);
  317. #endif
  318. const char *hexPairSequence = row->queryProp(NULL);
  319. if (!hexPairSequence)
  320. memset(target, 0, len);
  321. else
  322. {
  323. size32_t dLen = strlen(hexPairSequence);
  324. memcpy(target, hexPairSequence, dLen);
  325. if (dLen < len)
  326. memset((byte*)target+dLen, 0, len - dLen);
  327. }
  328. }
  329. void XmlSetColumnProvider::getDataRawX(size32_t & len, void * & target, const char * name)
  330. {
  331. #ifdef _DEBUG
  332. assertex(stricmp(name, "value")==0);
  333. #endif
  334. const char *hexPairSequence = row->queryProp(NULL);
  335. if (!hexPairSequence)
  336. {
  337. len = 0;
  338. target = NULL;
  339. return;
  340. }
  341. len = (size32_t)strlen(hexPairSequence);
  342. target = malloc(len);
  343. memcpy(target, hexPairSequence, len);
  344. }
  345. void XmlSetColumnProvider::getQString(size32_t len, char * target, const char * name)
  346. {
  347. #ifdef _DEBUG
  348. assertex(stricmp(name, "value")==0);
  349. #endif
  350. const char * value = row->queryProp(NULL);
  351. unsigned lenValue = value ? (size32_t)strlen(value) : 0;
  352. rtlStrToQStr(len, target, lenValue, value);
  353. }
  354. void XmlSetColumnProvider::getString(size32_t len, char * target, const char * name)
  355. {
  356. #ifdef _DEBUG
  357. assertex(stricmp(name, "value")==0);
  358. #endif
  359. const char * value = row->queryProp(NULL);
  360. if (value)
  361. rtlVStrToStr(len, target, value);
  362. else
  363. memset(target, ' ', len);
  364. }
  365. void XmlSetColumnProvider::getStringX(size32_t & len, char * & target, const char * name)
  366. {
  367. #ifdef _DEBUG
  368. assertex(stricmp(name, "value")==0);
  369. #endif
  370. const char * value = row->queryProp(NULL);
  371. len = value ? (size32_t)strlen(value) : 0;
  372. target = (char *)malloc(len);
  373. memcpy(target, value, len);
  374. //MORE: utf8->ascii?
  375. }
  376. void XmlSetColumnProvider::getUnicodeX(size32_t & len, UChar * & target, const char * name)
  377. {
  378. #ifdef _DEBUG
  379. assertex(stricmp(name, "value")==0);
  380. #endif
  381. const char * text = row->queryProp(NULL);
  382. if (text)
  383. rtlCodepageToUnicodeX(len, target, (size32_t)strlen(text), text, "utf-8");
  384. else
  385. {
  386. len = 0;
  387. target = NULL;
  388. }
  389. }
  390. void XmlSetColumnProvider::getUtf8X(size32_t & len, char * & target, const char * name)
  391. {
  392. #ifdef _DEBUG
  393. assertex(stricmp(name, "value")==0);
  394. #endif
  395. const char * value = row->queryProp(NULL);
  396. size32_t size = value ? (size32_t)strlen(value) : 0;
  397. target = (char *)malloc(size);
  398. memcpy(target, value, size);
  399. len = rtlUtf8Length(size, value);
  400. }
  401. bool XmlSetColumnProvider::getIsSetAll(const char * path)
  402. {
  403. UNIMPLEMENTED;
  404. StringBuffer fullpath;
  405. fullpath.append(path).append("/All");
  406. return row->hasProp(fullpath.str());
  407. }
  408. IColumnProviderIterator * XmlSetColumnProvider::getChildIterator(const char * path)
  409. {
  410. UNIMPLEMENTED;
  411. return new XmlColumnIterator(row->getElements(path));
  412. }
  413. bool XmlSetColumnProvider::readBool(const char * path, bool _default)
  414. {
  415. return row->getPropBool(NULL, _default);
  416. }
  417. void XmlSetColumnProvider::readData(size32_t len, void * target, const char * path, size32_t _lenDefault, const void * _default)
  418. {
  419. const char *hexPairSequence = row->queryProp(NULL);
  420. if (hexPairSequence)
  421. decodeHexPairs(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  422. else
  423. rtlDataToData(len, target, _lenDefault, _default);
  424. }
  425. void XmlSetColumnProvider::readDataX(size32_t & len, void * & target, const char * path, size32_t _lenDefault, const void * _default)
  426. {
  427. const char *hexPairSequence = row->queryProp(NULL);
  428. if (hexPairSequence)
  429. decodeHexPairsX(hexPairSequence, (size32_t)strlen(hexPairSequence), target, len);
  430. else
  431. rtlStrToDataX(len, target, _lenDefault, _default);
  432. }
  433. void XmlSetColumnProvider::readDataRaw(size32_t len, void * target, const char * path, size32_t _lenDefault, const void * _default)
  434. {
  435. rtlDataToData(len, target, _lenDefault, _default);
  436. }
  437. void XmlSetColumnProvider::readDataRawX(size32_t & len, void * & target, const char * path, size32_t _lenDefault, const void * _default)
  438. {
  439. rtlDataToData(len, target, _lenDefault, _default);
  440. }
  441. __int64 XmlSetColumnProvider::readInt(const char * path, __int64 _default)
  442. {
  443. return row->getPropInt64(NULL, _default);
  444. }
  445. void XmlSetColumnProvider::readQString(size32_t len, char * target, const char * path, size32_t _lenDefault, const char * _default)
  446. {
  447. const char * value = row->queryProp(NULL);
  448. if (value)
  449. rtlStrToQStr(len, target, (size32_t)strlen(value), value); // more: could process utf8, but characters would be lost anyway. At worse will mean extra blanks.
  450. else
  451. rtlQStrToQStr(len, target, _lenDefault, _default);
  452. }
  453. void XmlSetColumnProvider::readString(size32_t len, char * target, const char * path, size32_t _lenDefault, const char * _default)
  454. {
  455. const char * value = row->queryProp(NULL);
  456. if (value)
  457. rtlUtf8ToStr(len, target, rtlUtf8Length((size32_t)strlen(value), value), value);
  458. else
  459. rtlStrToStr(len, target, _lenDefault, _default);
  460. }
  461. void XmlSetColumnProvider::readStringX(size32_t & len, char * & target, const char * path, size32_t _lenDefault, const char * _default)
  462. {
  463. const char * value = row->queryProp(NULL);
  464. if (value)
  465. rtlUtf8ToStrX(len, target, rtlUtf8Length((size32_t)strlen(value), value), value);
  466. else
  467. rtlStrToStrX(len, target, _lenDefault, _default);
  468. }
  469. void XmlSetColumnProvider::readUnicodeX(size32_t & len, UChar * & target, const char * path, size32_t _lenDefault, const UChar * _default)
  470. {
  471. const char * text = row->queryProp(NULL);
  472. if (text)
  473. rtlCodepageToUnicodeX(len, target, (size32_t)strlen(text), text, "utf-8");
  474. else
  475. rtlUnicodeToUnicodeX(len, target, _lenDefault, _default);
  476. }
  477. bool XmlSetColumnProvider::readIsSetAll(const char * path, bool _default)
  478. {
  479. throwUnexpected();
  480. if (row->hasProp(NULL))
  481. return getIsSetAll(path);
  482. return _default;
  483. }
  484. void XmlSetColumnProvider::readUtf8X(size32_t & len, char * & target, const char * path, size32_t _lenDefault, const char * _default)
  485. {
  486. const char * value = row->queryProp(NULL);
  487. if (value)
  488. rtlUtf8ToUtf8X(len, target, rtlUtf8Length((size32_t)strlen(value), value), value);
  489. else
  490. rtlUtf8ToUtf8X(len, target, _lenDefault, _default);
  491. }
  492. IDataVal & CXmlToRawTransformer::transform(IDataVal & result, size32_t len, const void * text, bool isDataSet)
  493. {
  494. // MORE - should redo using a pull parser sometime
  495. Owned<IPropertyTree> root = createPTreeFromXMLString(len, (const char *)text, ipt_none, xmlReadFlags);
  496. return transformTree(result, *root, isDataSet);
  497. }
  498. IDataVal & CXmlToRawTransformer::transformTree(IDataVal & result, IPropertyTree &root, bool isDataSet)
  499. {
  500. unsigned minRecordSize = rowTransformer->queryRecordSize()->getMinRecordSize();
  501. Owned <XmlColumnProvider> columns;
  502. Owned<IPropertyTreeIterator> rows;
  503. StringBuffer decodedXML;
  504. Owned<IPropertyTree> decodedTree;
  505. MemoryBuffer raw;
  506. size32_t curLength = 0;
  507. if (isDataSet)
  508. {
  509. columns.setown(new XmlDatasetColumnProvider);
  510. if (root.hasProp("Row"))
  511. rows.setown(root.getElements("Row"));
  512. else
  513. {
  514. // HACK for Gordon to work around WSDL issues
  515. const char *body = root.queryProp(NULL);
  516. if (body)
  517. {
  518. while(isspace(*body))
  519. body++;
  520. if (strncmp(body, "<Row", 4)==0)
  521. {
  522. try
  523. {
  524. decodedXML.append("<root>").append(body).append("</root>");
  525. decodedTree.setown(createPTreeFromXMLString(decodedXML.str(), ipt_caseInsensitive));
  526. rows.setown(decodedTree->getElements("Row"));
  527. }
  528. catch (IException *E)
  529. {
  530. EXCLOG(E);
  531. E->Release();
  532. }
  533. catch (...)
  534. {
  535. ERRLOG(0, "Unexpected exception decoding XML for dataset");
  536. }
  537. }
  538. }
  539. }
  540. }
  541. else
  542. {
  543. columns.setown(new XmlSetColumnProvider);
  544. rows.setown(root.getElements("string"));
  545. ForEach(*rows)
  546. {
  547. columns->setRow(&rows->query());
  548. NullDiskCallback dummyCallback;
  549. MemoryBufferBuilder rowBuilder(raw, minRecordSize);
  550. size32_t thisSize = rowTransformer->transform(rowBuilder, columns, &dummyCallback);
  551. curLength += thisSize;
  552. rowBuilder.finishRow(thisSize);
  553. }
  554. rows.setown(root.getElements("Item"));
  555. }
  556. if (rows)
  557. {
  558. ForEach(*rows)
  559. {
  560. columns->setRow(&rows->query());
  561. NullDiskCallback dummyCallback;
  562. MemoryBufferBuilder rowBuilder(raw, minRecordSize);
  563. size32_t thisSize = rowTransformer->transform(rowBuilder, columns, &dummyCallback);
  564. curLength += thisSize;
  565. rowBuilder.finishRow(thisSize);
  566. }
  567. }
  568. result.setLen(raw.toByteArray(), curLength);
  569. return result;
  570. }
  571. size32_t createRowFromXml(ARowBuilder & rowBuilder, size32_t size, const char * utf8, IXmlToRowTransformer * xmlTransformer, bool stripWhitespace)
  572. {
  573. Owned<IPropertyTree> root = createPTreeFromXMLString(size, utf8, ipt_none, stripWhitespace ? ptr_ignoreWhiteSpace : ptr_none);
  574. if (!root)
  575. {
  576. throwError(THORCERR_InvalidXmlFromXml);
  577. return 0;
  578. }
  579. Owned <XmlColumnProvider> columns = new XmlDatasetColumnProvider;
  580. columns->setRow(root);
  581. NullDiskCallback dummyCallback;
  582. return xmlTransformer->transform(rowBuilder, columns, &dummyCallback);
  583. }
  584. const void * createRowFromXml(IEngineRowAllocator * rowAllocator, size32_t len, const char * utf8, IXmlToRowTransformer * xmlTransformer, bool stripWhitespace)
  585. {
  586. RtlDynamicRowBuilder rowBuilder(rowAllocator);
  587. size32_t newSize = createRowFromXml(rowBuilder, rtlUtf8Size(len, utf8), utf8, xmlTransformer, stripWhitespace);
  588. return rowBuilder.finalizeRowClear(newSize);
  589. }
  590. //=====================================================================================================
  591. IDataVal & CCsvToRawTransformer::transform(IDataVal & result, size32_t len, const void * text, bool isDataSet)
  592. {
  593. CSVSplitter csvSplitter;
  594. csvSplitter.init(rowTransformer->getMaxColumns(), rowTransformer->queryCsvParameters(), NULL, NULL, NULL, NULL);
  595. size32_t minRecordSize = rowTransformer->queryRecordSize()->getMinRecordSize();
  596. const byte *finger = (const byte *) text;
  597. MemoryBuffer raw;
  598. size32_t curLength = 0;
  599. while (len)
  600. {
  601. unsigned thisLineLength = csvSplitter.splitLine(len, finger);
  602. finger += thisLineLength;
  603. len -= thisLineLength;
  604. MemoryBufferBuilder rowBuilder(raw, minRecordSize);
  605. unsigned thisSize = rowTransformer->transform(rowBuilder, csvSplitter.queryLengths(), (const char * *)csvSplitter.queryData(), 0);
  606. curLength += thisSize;
  607. rowBuilder.finishRow(thisSize);
  608. }
  609. result.setLen(raw.toByteArray(), curLength);
  610. return result;
  611. }
  612. //=====================================================================================================
  613. extern thorhelper_decl IXmlToRawTransformer * createXmlRawTransformer(IXmlToRowTransformer * xmlTransformer, PTreeReaderOptions xmlReadFlags)
  614. {
  615. if (xmlTransformer)
  616. return new CXmlToRawTransformer(*xmlTransformer, xmlReadFlags);
  617. return NULL;
  618. }
  619. extern thorhelper_decl ICsvToRawTransformer * createCsvRawTransformer(ICsvToRowTransformer * csvTransformer)
  620. {
  621. if (csvTransformer)
  622. return new CCsvToRawTransformer(*csvTransformer);
  623. return NULL;
  624. }
  625. bool isContentXPath(const char *xpath, StringBuffer &head)
  626. {
  627. if (xpath)
  628. {
  629. unsigned l = (size32_t)strlen(xpath);
  630. if (l >= 2)
  631. {
  632. const char *x = xpath+l-2;
  633. if ((x[0] == '<') && (x[1] == '>'))
  634. {
  635. head.append((size32_t)(x-xpath), xpath);
  636. return true;
  637. }
  638. }
  639. }
  640. return false;
  641. }
  642. class CXPath
  643. {
  644. int topQualifier;
  645. BoolArray simpleQualifier;
  646. StringArray nodes, qualifierStack;
  647. StringAttr xpathstr;
  648. bool testForSimpleQualifier(const char *qualifier)
  649. {
  650. // first char always '['
  651. return ('@' == qualifier[1]);
  652. }
  653. public:
  654. CXPath(const char *path, bool ignoreNameSpaces)
  655. {
  656. topQualifier = -1;
  657. if (!path) return;
  658. xpathstr.set(path);
  659. if (path && '/'==*path)
  660. {
  661. if ('/' == *(path+1))
  662. throw MakeStringException(0, "// unsupported here");
  663. path++;
  664. }
  665. loop
  666. {
  667. const char *startQ = strchr(path, '[');
  668. const char *nextSep;
  669. loop
  670. {
  671. nextSep = strchr(path, '/');
  672. if (startQ && (!nextSep || startQ < nextSep))
  673. break;
  674. StringAttr node;
  675. unsigned l = nextSep ? (size32_t)(nextSep-path) : (size32_t)strlen(path);
  676. if (!l) break;
  677. if (ignoreNameSpaces)
  678. {
  679. const char *colon = path;
  680. const char *end = path+l+1;
  681. do
  682. {
  683. if (':' == *colon++)
  684. {
  685. l -= colon-path;
  686. path = colon;
  687. break;
  688. }
  689. }
  690. while (colon != end);
  691. }
  692. StringBuffer wildRemoved;
  693. node.set(path, l);
  694. const char *c = node.get();
  695. while (*c) { if ('*' != *c) wildRemoved.append(*c); c++; }
  696. if (wildRemoved.length() && !validateXMLTag(wildRemoved.str()))
  697. throw MakeStringException(0, "Invalid node syntax %s in path %s", node.get(), path);
  698. nodes.append(node);
  699. qualifierStack.append(""); // no qualifier for this segment.
  700. simpleQualifier.append(true); // not used
  701. if (!nextSep) break;
  702. path = nextSep+1;
  703. }
  704. if (!nextSep && !startQ)
  705. break;
  706. const char *endQ = strchr(startQ, ']'); // escaped '[]' chars??
  707. assertex(endQ);
  708. unsigned l=startQ-path;
  709. if (ignoreNameSpaces)
  710. {
  711. const char *colon = path;
  712. const char *end = path+l+1;
  713. do
  714. {
  715. if (':' == *colon++)
  716. {
  717. l -= colon-path;
  718. path = colon;
  719. break;
  720. }
  721. }
  722. while (colon != end);
  723. }
  724. StringAttr node(path, l);
  725. nodes.append(node);
  726. StringAttr qualifier(startQ, endQ-startQ+1);
  727. qualifierStack.append(qualifier);
  728. bool simple = testForSimpleQualifier(qualifier);
  729. simpleQualifier.append(simple);
  730. if (-1 == topQualifier && !simple) topQualifier = qualifierStack.ordinality()-1;
  731. path = nextSep+1;
  732. if (!nextSep) break;
  733. }
  734. }
  735. bool toQualify(unsigned which, bool simple)
  736. {
  737. return (which < queryDepth() && *qualifierStack.item(which) && simple==querySimpleQualifier(which));
  738. }
  739. inline unsigned queryDepth()
  740. {
  741. return nodes.ordinality();
  742. }
  743. inline const char *queryNode(unsigned which)
  744. {
  745. return nodes.item(which);
  746. }
  747. inline bool querySimpleQualifier(unsigned which)
  748. {
  749. return simpleQualifier.item(which);
  750. }
  751. bool match(unsigned level, const char *tag)
  752. {
  753. const char *nodeTag = queryNode(level);
  754. if (strchr(nodeTag, '*'))
  755. return WildMatch(tag, strlen(tag), nodeTag, strlen(nodeTag), false);
  756. else
  757. return (0 == strcmp(nodeTag, tag));
  758. }
  759. bool qualify(IPropertyTree &tree, unsigned depth)
  760. {
  761. const char *qualifier = qualifierStack.item(depth);
  762. if (qualifier && '\0' != *qualifier)
  763. {
  764. const char *q = qualifier;
  765. bool numeric = true;
  766. loop
  767. {
  768. if ('\0' == *q) break;
  769. else if (!isdigit(*q)) { numeric = false; break; }
  770. else q++;
  771. }
  772. if (numeric) throw MakeStringException(0, "Unsupported index qualifier: %s", qualifier);
  773. Owned<IPropertyTreeIterator> matchIter = tree.getElements(qualifier);
  774. if (!matchIter->first())
  775. return false;
  776. }
  777. return true;
  778. }
  779. inline int queryHighestQualifier() { return topQualifier; }
  780. const char *queryXPathStr() { return xpathstr; }
  781. };
  782. class CProspectiveMatch : public CInterface
  783. {
  784. public:
  785. CProspectiveMatch(IPropertyTree *_parent, IPropertyTree *_node, MemoryBuffer *_content=NULL) : parent(_parent), node(_node), content(_content) { }
  786. ~CProspectiveMatch() { if (content) delete content; }
  787. IPropertyTree *parent, *node;
  788. MemoryBuffer *content;
  789. };
  790. typedef CIArrayOf<CProspectiveMatch> CProcespectiveMatchArray;
  791. class CParseStackInfo : public CInterface
  792. {
  793. public:
  794. CParseStackInfo() : keep(false), nodeMade(false), keptForQualifier(false), iPTMade(NULL), startOffset(0), prospectiveMatches(NULL) { }
  795. ~CParseStackInfo()
  796. {
  797. if (prospectiveMatches)
  798. delete prospectiveMatches;
  799. }
  800. inline void reset()
  801. {
  802. keep = nodeMade = keptForQualifier = false;
  803. startOffset = 0;
  804. if (prospectiveMatches)
  805. prospectiveMatches->kill();
  806. iPTMade = NULL;
  807. }
  808. bool keep, nodeMade, keptForQualifier;
  809. offset_t startOffset;
  810. IPropertyTree *iPTMade;
  811. CProcespectiveMatchArray *prospectiveMatches;
  812. };
  813. class CMarkReadBase : public CInterface
  814. {
  815. public:
  816. virtual void reset() = 0;
  817. virtual void mark(offset_t offset) = 0;
  818. virtual void getMarkTo(offset_t offset, MemoryBuffer &mb) = 0;
  819. virtual void closeMark() = 0;
  820. };
  821. class CMarkRead : public CMarkReadBase
  822. {
  823. const void *buffer;
  824. offset_t startOffset;
  825. unsigned bufLen;
  826. bool marking;
  827. public:
  828. CMarkRead(const void *_buffer, unsigned _bufLen) : buffer(_buffer), bufLen(_bufLen)
  829. {
  830. reset();
  831. }
  832. virtual void reset()
  833. {
  834. marking = false;
  835. startOffset = 0;
  836. }
  837. virtual void mark(offset_t offset)
  838. {
  839. assertex(!marking);
  840. marking = true;
  841. if (offset >= bufLen)
  842. throw MakeStringException(0, "start offset past end of input string");
  843. startOffset = offset;
  844. }
  845. virtual void getMarkTo(offset_t offset, MemoryBuffer &mb)
  846. {
  847. assertex(marking);
  848. marking = true;
  849. if (offset < startOffset)
  850. throw MakeStringException(0, "end offset proceeds start offset");
  851. if (offset > bufLen)
  852. throw MakeStringException(0, "end offset past end of input string");
  853. mb.append((size32_t)(offset-startOffset), ((char*)buffer)+startOffset);
  854. marking = false;
  855. }
  856. virtual void closeMark()
  857. {
  858. marking = false;
  859. }
  860. };
  861. class CMarkReadStream : public CMarkReadBase, implements ISimpleReadStream
  862. {
  863. ISimpleReadStream &stream;
  864. offset_t readOffset, markingOffset;
  865. byte *buf, *bufPtr, *bufOther, *bufLowerHalf, *bufUpperHalf;
  866. size32_t remaining, bufSize;
  867. MemoryBuffer markBuffer;
  868. bool marking;
  869. public:
  870. IMPLEMENT_IINTERFACE;
  871. CMarkReadStream(ISimpleReadStream &_stream) : stream(_stream), readOffset(0)
  872. {
  873. bufSize = 0x8000/2;
  874. bufLowerHalf = buf = (byte *)malloc(bufSize*2);
  875. bufUpperHalf = bufLowerHalf+bufSize;
  876. reset();
  877. }
  878. ~CMarkReadStream()
  879. {
  880. free(bufLowerHalf); // pointer to whole buf in fact
  881. stream.Release();
  882. }
  883. virtual void reset()
  884. {
  885. remaining = 0;
  886. buf = bufPtr = bufLowerHalf;
  887. bufOther = NULL;
  888. readOffset = markingOffset = 0;
  889. marking = false;
  890. markBuffer.resetBuffer();
  891. }
  892. virtual void mark(offset_t offset)
  893. {
  894. assertex(!marking);
  895. marking=true;
  896. markingOffset = offset;
  897. offset_t from = readOffset-(bufPtr-buf);
  898. if (offset < from)
  899. {
  900. if (!bufOther)
  901. throw MakeStringException(0, "Not enough buffered to mark!");
  902. from -= bufSize;
  903. if (offset < from)
  904. throw MakeStringException(0, "Not enough buffered to mark!");
  905. size32_t a = (size32_t)(offset-from);
  906. markBuffer.append(bufSize-a, bufOther+a);
  907. }
  908. }
  909. virtual void getMarkTo(offset_t offset, MemoryBuffer &mb)
  910. {
  911. assertex(marking);
  912. size32_t markSize = (size32_t)(offset-markingOffset);
  913. int d = markSize-markBuffer.length();
  914. if (d < 0)
  915. markBuffer.setLength(markSize);
  916. else if (d > 0)
  917. {
  918. offset_t from = readOffset-(bufPtr-buf);
  919. size32_t o = 0;
  920. if (markingOffset>from)
  921. o = (size32_t)(markingOffset-from);
  922. markBuffer.append(d, buf+o);
  923. }
  924. mb.clear();
  925. mb.swapWith(markBuffer);
  926. marking = false;
  927. }
  928. virtual void closeMark()
  929. {
  930. if (marking)
  931. {
  932. markBuffer.clear();
  933. marking = false;
  934. }
  935. }
  936. // ISimpleReadStream
  937. virtual size32_t read(size32_t len, void * data)
  938. {
  939. unsigned r = 0;
  940. if (!remaining)
  941. {
  942. size32_t bufSpace = bufSize-(bufPtr-buf);
  943. if (bufSpace)
  944. {
  945. remaining = stream.read(bufSpace, bufPtr);
  946. if (remaining)
  947. {
  948. bufSpace -= remaining;
  949. r = std::min(len, remaining);
  950. memcpy(data, bufPtr, r);
  951. remaining -= r;
  952. len -= r;
  953. bufPtr += r;
  954. data = (byte *)data + r;
  955. readOffset += r;
  956. }
  957. else
  958. return 0;
  959. }
  960. if (!bufSpace && !remaining)
  961. {
  962. if (marking && bufOther)
  963. {
  964. offset_t from = readOffset-(bufPtr-buf);
  965. int d = (int)(markingOffset-from);
  966. if (d>0)
  967. markBuffer.append(bufSize-d, buf+d);
  968. else
  969. markBuffer.append(bufSize, buf);
  970. }
  971. if (buf==bufLowerHalf)
  972. {
  973. buf = bufUpperHalf;
  974. bufOther = bufLowerHalf;
  975. }
  976. else
  977. {
  978. buf = bufLowerHalf;
  979. bufOther = bufUpperHalf;
  980. }
  981. bufPtr = buf;
  982. }
  983. if (!len) return r;
  984. if (!remaining)
  985. {
  986. remaining = stream.read(bufSize, buf);
  987. if (!remaining)
  988. return r;
  989. }
  990. }
  991. unsigned r2 = std::min(len, remaining);
  992. memcpy(data, bufPtr, r2);
  993. remaining -= r2;
  994. bufPtr += r2;
  995. readOffset += r2;
  996. return r + r2;
  997. }
  998. };
  999. // could contain a IPT, but convenient and efficient to derive impl.
  1000. class CPTreeWithOffsets : public LocalPTree
  1001. {
  1002. public:
  1003. CPTreeWithOffsets(const char *name) : LocalPTree(name) { startOffset = endOffset = 0; }
  1004. offset_t startOffset, endOffset;
  1005. };
  1006. class COffsetNodeCreator : public CInterface, implements IPTreeNodeCreator
  1007. {
  1008. public:
  1009. IMPLEMENT_IINTERFACE;
  1010. COffsetNodeCreator() { }
  1011. virtual IPropertyTree *create(const char *tag) { return new CPTreeWithOffsets(tag); }
  1012. };
  1013. class thorhelper_decl CColumnIterator : public CInterface, implements IColumnProviderIterator
  1014. {
  1015. Linked<IColumnProvider> parent;
  1016. Linked<IPropertyTree> root, matchNode;
  1017. MemoryBuffer * contentMb;
  1018. offset_t contentStartOffset;
  1019. void *utf8Translator;
  1020. Linked<IPropertyTreeIterator> iter;
  1021. Owned<IColumnProvider> cur;
  1022. StringAttr xpath;
  1023. public:
  1024. CColumnIterator(IColumnProvider *_parent, void *_utf8Translator, IPropertyTree *_root, IPropertyTree *_matchNode, IPropertyTreeIterator * _iter, MemoryBuffer *_contentMb, offset_t _contentStartOffset, const char *_xpath) : parent(_parent), root(_root), matchNode(_matchNode), iter(_iter), utf8Translator(_utf8Translator), xpath(_xpath), contentStartOffset(_contentStartOffset) { contentMb = _contentMb; }
  1025. IMPLEMENT_IINTERFACE;
  1026. IColumnProvider * first()
  1027. {
  1028. if (!iter->first())
  1029. return NULL;
  1030. setCurrent();
  1031. return cur;
  1032. }
  1033. IColumnProvider * next()
  1034. {
  1035. if (!iter->next())
  1036. return NULL;
  1037. setCurrent();
  1038. return cur;
  1039. }
  1040. void setCurrent();
  1041. };
  1042. class CColumnProvider : public CInterface, implements IColumnProvider
  1043. {
  1044. Linked<IPropertyTree> root, node;
  1045. MemoryBuffer contentMb;
  1046. bool content;
  1047. offset_t contentStartOffset;
  1048. void *utf8Translator;
  1049. CriticalSection crit;
  1050. MemoryBuffer tmpMb;
  1051. MemoryBuffer sharedResult;
  1052. StringAttr xpath;
  1053. void cnv2Latin1(unsigned length, const void *data, MemoryBuffer &mb)
  1054. {
  1055. void *target = mb.reserveTruncate(length);
  1056. if (length == 0)
  1057. return;
  1058. bool f;
  1059. unsigned rl = rtlCodepageConvert(utf8Translator, length, (char *)target, length, (const char *)data, f);
  1060. if (f)
  1061. {
  1062. StringBuffer errMsg("Failure translating utf-8, matching element '");
  1063. errMsg.append(xpath).append("' data: '");
  1064. if (length>100)
  1065. {
  1066. appendDataAsHex(errMsg, 100, data);
  1067. errMsg.append("<TRUNCATED>");
  1068. }
  1069. else
  1070. appendDataAsHex(errMsg, length, data);
  1071. errMsg.append("'");
  1072. throw MakeStringExceptionDirect(0, errMsg.str());
  1073. } else if (length > rl)
  1074. mb.setLength(rl);
  1075. }
  1076. public:
  1077. IMPLEMENT_IINTERFACE;
  1078. CColumnProvider(void *_utf8Translator, IPropertyTree *_root, IPropertyTree *_node, MemoryBuffer *_contentMb, bool ownContent, offset_t _contentStartOffset, const char *_xpath) : root(_root), node(_node), utf8Translator(_utf8Translator), contentStartOffset(_contentStartOffset), xpath(_xpath)
  1079. {
  1080. if (_contentMb)
  1081. {
  1082. content = true;
  1083. if (ownContent)
  1084. contentMb.swapWith(*_contentMb);
  1085. else
  1086. contentMb.setBuffer(_contentMb->length(), (void *)_contentMb->toByteArray());
  1087. }
  1088. else
  1089. content = false;
  1090. }
  1091. bool contentRequest(const char *path, size32_t &offset, size32_t &length)
  1092. {
  1093. StringBuffer subPath;
  1094. if (isContentXPath(path, subPath))
  1095. {
  1096. assertex(content);
  1097. if (subPath.length())
  1098. {
  1099. if ('/' == *path && '/' != *(path+1))
  1100. throw MakeStringException(0, "Cannot extract xml text from absolute path specification: %s", path);
  1101. CPTreeWithOffsets *subTree = (CPTreeWithOffsets *)node->queryPropTree(subPath.str());
  1102. if (subTree)
  1103. {
  1104. offset = (size32_t)(subTree->startOffset-contentStartOffset);
  1105. length = (size32_t)(subTree->endOffset-subTree->startOffset);
  1106. }
  1107. else
  1108. {
  1109. offset = 0;
  1110. length = 0;
  1111. }
  1112. }
  1113. else
  1114. {
  1115. CPTreeWithOffsets *_node = (CPTreeWithOffsets *)node.get();
  1116. if (contentStartOffset != _node->startOffset)
  1117. { // must be child
  1118. offset = (size32_t)(_node->startOffset-contentStartOffset);
  1119. length = (size32_t)(_node->endOffset-_node->startOffset);
  1120. }
  1121. else
  1122. {
  1123. offset = 0;
  1124. length = contentMb.length();
  1125. }
  1126. }
  1127. return true;
  1128. }
  1129. return false;
  1130. }
  1131. inline bool hasProp(const char * path)
  1132. {
  1133. if (path && '/' == *path && '/' != *(path+1))
  1134. return root->hasProp(path+1);
  1135. else
  1136. return node->hasProp(path);
  1137. }
  1138. inline const char * queryProp(const char * path)
  1139. {
  1140. if (path && '/' == *path && '/' != *(path+1))
  1141. return root->queryProp(path+1);
  1142. else
  1143. return node->queryProp(path);
  1144. }
  1145. inline bool getPropBin(const char * path, MemoryBuffer & mb)
  1146. {
  1147. if (path && '/' == *path && '/' != *(path+1))
  1148. return root->getPropBin(path+1, mb);
  1149. else
  1150. return node->getPropBin(path, mb);
  1151. }
  1152. // IColumnProvider
  1153. void getData(size32_t len, void * data, const char * path)
  1154. {
  1155. readData(len, data, path, 0, NULL);
  1156. }
  1157. void getDataX(size32_t & len, void * & data, const char * path)
  1158. {
  1159. readDataX(len, data, path, 0, NULL);
  1160. }
  1161. void getDataRaw(size32_t len, void * data, const char * path)
  1162. {
  1163. readDataRaw(len, data, path, 0, NULL);
  1164. }
  1165. void getDataRawX(size32_t & len, void * & data, const char * path)
  1166. {
  1167. readDataRawX(len, data, path, 0, NULL);
  1168. }
  1169. bool getBool(const char * path)
  1170. {
  1171. return readBool(path, false);
  1172. }
  1173. __int64 getInt(const char * path)
  1174. {
  1175. return readInt(path, 0);
  1176. }
  1177. void getQString(size32_t len, char * text, const char * path)
  1178. {
  1179. readQString(len, text, path, 0, NULL);
  1180. }
  1181. void getString(size32_t len, char * text, const char * path)
  1182. {
  1183. readString(len, text, path, 0, NULL);
  1184. }
  1185. void getStringX(size32_t & len, char * & text, const char * path)
  1186. {
  1187. readStringX(len, text, path, 0, NULL);
  1188. }
  1189. void getUnicodeX(size32_t & len, UChar * & text, const char * path)
  1190. {
  1191. readUnicodeX(len, text, path, 0, NULL);
  1192. }
  1193. void getUtf8X(size32_t & len, char * & text, const char * path)
  1194. {
  1195. readUtf8X(len, text, path, 0, NULL);
  1196. }
  1197. bool getIsSetAll(const char * path)
  1198. {
  1199. return readIsSetAll(path, false);
  1200. }
  1201. IColumnProviderIterator * getChildIterator(const char * path)
  1202. {
  1203. Owned<IPropertyTreeIterator> iter;
  1204. if (path && '/' == *path && '/' != *(path+1))
  1205. iter.setown(root->getElements(path+1));
  1206. else
  1207. iter.setown(node->getElements(path));
  1208. return new CColumnIterator(this, utf8Translator, root, node, iter, content ? &contentMb : NULL, contentStartOffset, xpath);
  1209. }
  1210. //
  1211. virtual void readData(size32_t len, void * data, const char * path, size32_t _lenDefault, const void * _default)
  1212. {
  1213. CriticalBlock b(crit);
  1214. sharedResult.clear();
  1215. size32_t offset, length;
  1216. if (contentRequest(path, offset, length))
  1217. {
  1218. cnv2Latin1(length, contentMb.toByteArray()+offset, sharedResult);
  1219. }
  1220. else
  1221. {
  1222. if (!getPropBin(path, tmpMb.clear()))
  1223. {
  1224. rtlStrToData(len, data, _lenDefault, _default);
  1225. return;
  1226. }
  1227. cnv2Latin1(tmpMb.length(), tmpMb.toByteArray(), sharedResult);
  1228. }
  1229. decodeHexPairs((const char *)sharedResult.toByteArray(), sharedResult.length(), data, len);
  1230. }
  1231. virtual void readDataX(size32_t & len, void * & data, const char * path, size32_t _lenDefault, const void * _default)
  1232. {
  1233. CriticalBlock b(crit);
  1234. sharedResult.clear();
  1235. size32_t offset, length;
  1236. if (contentRequest(path, offset, length))
  1237. {
  1238. cnv2Latin1(length, contentMb.toByteArray()+offset, sharedResult);
  1239. }
  1240. else
  1241. {
  1242. if (!getPropBin(path, tmpMb.clear()))
  1243. {
  1244. rtlStrToDataX(len, data, _lenDefault, _default);
  1245. return;
  1246. }
  1247. cnv2Latin1(tmpMb.length(), tmpMb.toByteArray(), sharedResult);
  1248. }
  1249. decodeHexPairsX((const char *)sharedResult.toByteArray(), sharedResult.length(), data, len);
  1250. }
  1251. virtual void readDataRaw(size32_t len, void * data, const char * path, size32_t _lenDefault, const void * _default)
  1252. {
  1253. CriticalBlock b(crit);
  1254. sharedResult.clear();
  1255. size32_t offset, length;
  1256. if (contentRequest(path, offset, length))
  1257. {
  1258. cnv2Latin1(length, contentMb.toByteArray()+offset, sharedResult);
  1259. }
  1260. else
  1261. {
  1262. if (!getPropBin(path, tmpMb.clear()))
  1263. {
  1264. rtlStrToData(len, data, _lenDefault, _default);
  1265. return;
  1266. }
  1267. }
  1268. memcpy(data, sharedResult.toByteArray(), sharedResult.length());
  1269. if (len < sharedResult.length())
  1270. memset((byte*)data + sharedResult.length(), 0, len-sharedResult.length());
  1271. }
  1272. virtual void readDataRawX(size32_t & len, void * & data, const char * path, size32_t _lenDefault, const void * _default)
  1273. {
  1274. CriticalBlock b(crit);
  1275. sharedResult.clear();
  1276. size32_t offset, length;
  1277. if (contentRequest(path, offset, length))
  1278. {
  1279. cnv2Latin1(length, contentMb.toByteArray()+offset, sharedResult);
  1280. }
  1281. else
  1282. {
  1283. if (!getPropBin(path, tmpMb.clear()))
  1284. {
  1285. rtlStrToDataX(len, data, _lenDefault, _default);
  1286. return;
  1287. }
  1288. }
  1289. len = tmpMb.length();
  1290. if (len)
  1291. {
  1292. data = malloc(len);
  1293. memcpy(data, tmpMb.toByteArray(), len);
  1294. }
  1295. else
  1296. data = NULL;
  1297. }
  1298. virtual bool readBool(const char * path, bool _default)
  1299. {
  1300. size32_t offset, length;
  1301. if (contentRequest(path, offset, length))
  1302. throw MakeStringException(0, "Attempting to extract xml content text as boolean");
  1303. const char *str = queryProp(path);
  1304. if (!str) return _default;
  1305. CriticalBlock b(crit);
  1306. cnv2Latin1((size32_t)strlen(str), str, sharedResult.clear());
  1307. size32_t resultLen = sharedResult.length();
  1308. const char * resultText = (const char *)sharedResult.toByteArray();
  1309. return strToBool(resultLen, resultText);
  1310. }
  1311. virtual __int64 readInt(const char * path, __int64 _default)
  1312. {
  1313. size32_t offset, length;
  1314. if (contentRequest(path, offset, length))
  1315. throw MakeStringException(0, "Attempting to extract xml content text as integer");
  1316. const char *str = queryProp(path);
  1317. if (!str) return _default;
  1318. CriticalBlock b(crit);
  1319. cnv2Latin1((size32_t)strlen(str), str, sharedResult.clear());
  1320. return atoi64_l((const char *)sharedResult.toByteArray(), sharedResult.length());
  1321. }
  1322. virtual void readQString(size32_t len, char * text, const char * path, size32_t _lenDefault, const char * _default)
  1323. {
  1324. CriticalBlock b(crit);
  1325. sharedResult.clear();
  1326. size32_t offset, length;
  1327. if (contentRequest(path, offset, length))
  1328. {
  1329. cnv2Latin1(length, contentMb.toByteArray()+offset, sharedResult);
  1330. }
  1331. else
  1332. {
  1333. const char *str = queryProp(path);
  1334. if (str)
  1335. cnv2Latin1((size32_t)strlen(str), str, sharedResult);
  1336. else
  1337. {
  1338. rtlQStrToQStr(len, text, _lenDefault, _default);
  1339. return;
  1340. }
  1341. }
  1342. rtlStrToQStr(len, text, sharedResult.length(), sharedResult.toByteArray());
  1343. }
  1344. virtual void readString(size32_t len, char * text, const char * path, size32_t _lenDefault, const char * _default)
  1345. {
  1346. CriticalBlock b(crit);
  1347. sharedResult.clear();
  1348. size32_t offset, length;
  1349. if (contentRequest(path, offset, length))
  1350. {
  1351. cnv2Latin1(length, contentMb.toByteArray()+offset, sharedResult);
  1352. }
  1353. else
  1354. {
  1355. const char *str = queryProp(path);
  1356. if (str)
  1357. cnv2Latin1((size32_t)strlen(str), str, sharedResult);
  1358. else
  1359. {
  1360. rtlStrToStr(len, text, _lenDefault, _default);
  1361. return;
  1362. }
  1363. }
  1364. rtlStrToStr(len, text, sharedResult.length(), sharedResult.toByteArray());
  1365. }
  1366. virtual void readStringX(size32_t & len, char * & text, const char * path, size32_t _lenDefault, const char * _default)
  1367. {
  1368. MemoryBuffer result;
  1369. size32_t offset, length;
  1370. if (contentRequest(path, offset, length))
  1371. {
  1372. if (length)
  1373. cnv2Latin1(length, contentMb.toByteArray()+offset, result);
  1374. }
  1375. else
  1376. {
  1377. const char *str = queryProp(path);
  1378. if (str)
  1379. cnv2Latin1((size32_t)strlen(str), str, result);
  1380. else
  1381. {
  1382. rtlStrToStrX(len, text, _lenDefault, _default);
  1383. return;
  1384. }
  1385. }
  1386. len = result.length();
  1387. text = (char *) result.detach();
  1388. }
  1389. virtual void readUnicodeX(size32_t & len, UChar * & text, const char * path, size32_t _lenDefault, const UChar * _default)
  1390. {
  1391. size32_t offset, length;
  1392. if (contentRequest(path, offset, length))
  1393. {
  1394. rtlCodepageToUnicodeX(len, text, length, contentMb.toByteArray()+offset, "utf-8");
  1395. }
  1396. else
  1397. {
  1398. CriticalBlock b(crit);
  1399. const char *tmpPtr = queryProp(path);
  1400. if (tmpPtr)
  1401. rtlCodepageToUnicodeX(len, text, strlen(tmpPtr), tmpPtr, "utf-8");
  1402. else
  1403. rtlUnicodeToUnicodeX(len, text, _lenDefault, _default);
  1404. }
  1405. }
  1406. virtual void readUtf8X(size32_t & len, char * & text, const char * path, size32_t _lenDefault, const char * _default)
  1407. {
  1408. size32_t offset, length;
  1409. size32_t size;
  1410. if (contentRequest(path, offset, length))
  1411. {
  1412. rtlStrToStrX(size, text, length, contentMb.toByteArray()+offset);
  1413. }
  1414. else
  1415. {
  1416. CriticalBlock b(crit);
  1417. const char *tmpPtr = queryProp(path);
  1418. if (tmpPtr)
  1419. {
  1420. rtlStrToStrX(size, text, strlen(tmpPtr), tmpPtr);
  1421. }
  1422. else
  1423. {
  1424. rtlUtf8ToUtf8X(len, text, _lenDefault, _default);
  1425. return;
  1426. }
  1427. }
  1428. len = rtlUtf8Length(size, text);
  1429. }
  1430. virtual bool readIsSetAll(const char * path, bool _default)
  1431. {
  1432. if (hasProp(path))
  1433. {
  1434. StringBuffer fullpath;
  1435. fullpath.append(path).append("/All");
  1436. if (path && '/' == *path && '/' != *(path+1))
  1437. return root->hasProp(fullpath.str()+1);
  1438. else
  1439. return node->hasProp(fullpath.str());
  1440. }
  1441. return _default;
  1442. }
  1443. };
  1444. void CColumnIterator::setCurrent()
  1445. {
  1446. Owned<IPropertyTree> curTree = &iter->get();
  1447. if (contentMb)
  1448. cur.setown(new CColumnProvider(utf8Translator, root, curTree, contentMb, false, contentStartOffset, xpath));
  1449. else
  1450. cur.setown(new CColumnProvider(utf8Translator, root, curTree, NULL, false, 0, xpath));
  1451. }
  1452. class CXMLParse : public CInterface, implements IXMLParse
  1453. {
  1454. IPullPTreeReader *xmlReader;
  1455. StringAttr xpath;
  1456. IXMLSelect *iXMLSelect; // NOTE - not linked - creates circular links
  1457. PTreeReaderOptions xmlOptions;
  1458. bool step, contentRequired;
  1459. class CXMLMaker : public CInterface, implements IPTreeMaker
  1460. {
  1461. CXPath xpath;
  1462. IXMLSelect *iXMLSelect; // NOTE - not linked - creates circular links
  1463. CopyCIArrayOf<CParseStackInfo> stack, freeParseInfo;
  1464. IPTreeMaker *maker;
  1465. Linked<CMarkReadBase> marking;
  1466. Owned<COffsetNodeCreator> nodeCreator;
  1467. void *utf8Translator;
  1468. unsigned level;
  1469. bool contentRequired;
  1470. unsigned lastMatchKeptLevel;
  1471. IPropertyTree *lastMatchKeptNode, *lastMatchKeptNodeParent;
  1472. public:
  1473. IMPLEMENT_IINTERFACE;
  1474. CXMLMaker(const char *_xpath, IXMLSelect &_iXMLSelect, bool _contentRequired, bool ignoreNameSpaces) : xpath(_xpath, ignoreNameSpaces), iXMLSelect(&_iXMLSelect), contentRequired(_contentRequired)
  1475. {
  1476. lastMatchKeptLevel = 0;
  1477. lastMatchKeptNode = lastMatchKeptNodeParent = NULL;
  1478. maker = NULL;
  1479. utf8Translator = NULL;
  1480. }
  1481. ~CXMLMaker()
  1482. {
  1483. ForEachItemIn(i, stack)
  1484. delete &stack.item(i);
  1485. ForEachItemIn(i2, freeParseInfo)
  1486. delete &freeParseInfo.item(i2);
  1487. ::Release(maker);
  1488. rtlCloseCodepageConverter(utf8Translator);
  1489. }
  1490. void init()
  1491. {
  1492. level = 0;
  1493. nodeCreator.setown(new COffsetNodeCreator());
  1494. maker = createRootLessPTreeMaker(ipt_none, NULL, nodeCreator);
  1495. bool f;
  1496. utf8Translator = rtlOpenCodepageConverter("utf-8", "latin1", f);
  1497. if (f)
  1498. throw MakeStringException(0, "Failed to initialize unicode utf-8 translator");
  1499. }
  1500. void setMarkingStream(CMarkReadBase &_marking) { marking.set(&_marking); }
  1501. CXPath &queryXPath() { return xpath; }
  1502. // IPTreeMaker
  1503. virtual void beginNode(const char *tag, offset_t startOffset)
  1504. {
  1505. if (lastMatchKeptNode && level == lastMatchKeptLevel)
  1506. {
  1507. // NB: could be passed to match objects for removal by match object,
  1508. // but dubious if useful for greater than one path to exist above match.
  1509. if (lastMatchKeptNodeParent)
  1510. lastMatchKeptNodeParent->removeTree(lastMatchKeptNode);
  1511. else
  1512. maker->reset();
  1513. lastMatchKeptNode = NULL;
  1514. }
  1515. bool res = false;
  1516. CParseStackInfo *stackInfo;
  1517. if (freeParseInfo.ordinality())
  1518. {
  1519. stackInfo = &freeParseInfo.pop();
  1520. stackInfo->reset();
  1521. }
  1522. else
  1523. stackInfo = new CParseStackInfo();
  1524. stackInfo->startOffset = startOffset;
  1525. if (!stack.ordinality())
  1526. {
  1527. if (0 == xpath.queryDepth() || xpath.match(0, tag))
  1528. {
  1529. if (1 >= xpath.queryDepth())
  1530. {
  1531. if (contentRequired)
  1532. {
  1533. assertex(marking);
  1534. marking->mark(startOffset); // mark stream at tag start offset
  1535. }
  1536. }
  1537. res = true;
  1538. }
  1539. }
  1540. else if (xpath.queryDepth())
  1541. {
  1542. if (stack.tos().keep)
  1543. {
  1544. if (level >= xpath.queryDepth())
  1545. res = true;
  1546. else if (xpath.match(level, tag))
  1547. {
  1548. res = true;
  1549. if (level == xpath.queryDepth()-1)
  1550. {
  1551. if (contentRequired)
  1552. {
  1553. assertex(marking);
  1554. marking->mark(startOffset); // mark stream at tag start offset
  1555. }
  1556. }
  1557. }
  1558. else if (level > ((unsigned)xpath.queryHighestQualifier()))
  1559. {
  1560. stackInfo->keptForQualifier = true;
  1561. res = true; // construct content below qualified tag (!=simple) needed to qualify when back at topQ.
  1562. }
  1563. }
  1564. }
  1565. else
  1566. res = true;
  1567. stackInfo->keep = res;
  1568. stack.append(*stackInfo);
  1569. if (res)
  1570. {
  1571. maker->beginNode(tag, startOffset);
  1572. CPTreeWithOffsets *current = (CPTreeWithOffsets *)maker->queryCurrentNode();
  1573. current->startOffset = startOffset;
  1574. stackInfo->nodeMade = res;
  1575. stackInfo->iPTMade = current;
  1576. }
  1577. }
  1578. virtual void newAttribute(const char *tag, const char *value)
  1579. {
  1580. if (stack.tos().keep)
  1581. maker->newAttribute(tag, value);
  1582. }
  1583. virtual void beginNodeContent(const char *tag)
  1584. {
  1585. // Can optimize qualifiers here that contain only attribute tests.
  1586. bool &keep = stack.tos().keep;
  1587. if (keep)
  1588. {
  1589. if (xpath.toQualify(level, true))
  1590. {
  1591. IPropertyTree *currentNode = maker->queryCurrentNode();
  1592. keep = xpath.qualify(*currentNode, level);
  1593. }
  1594. }
  1595. level++;
  1596. }
  1597. virtual void endNode(const char *tag, unsigned length, const void *value, bool binary, offset_t endOffset)
  1598. {
  1599. --level;
  1600. CParseStackInfo &stackInfo = stack.tos();
  1601. bool keep = stackInfo.keep;
  1602. bool nodeMade = stackInfo.nodeMade;
  1603. IPropertyTree *currentNode = maker->queryCurrentNode();
  1604. if (nodeMade)
  1605. {
  1606. CPTreeWithOffsets *current = (CPTreeWithOffsets *)maker->queryCurrentNode();
  1607. current->endOffset = endOffset;
  1608. maker->endNode(tag, length, value, binary, endOffset);
  1609. }
  1610. if (keep)
  1611. {
  1612. if (!stackInfo.keptForQualifier)
  1613. if (xpath.toQualify(level, false))
  1614. keep = xpath.qualify(*currentNode, level);
  1615. }
  1616. bool matched = false;
  1617. if (keep)
  1618. {
  1619. if (!stackInfo.keptForQualifier)
  1620. {
  1621. if ((0 == xpath.queryDepth() && 0 == level) || level == xpath.queryDepth()-1)
  1622. {
  1623. unsigned topQ = xpath.queryHighestQualifier();
  1624. unsigned noHigherQualifiers = -1 == topQ || topQ >= level;
  1625. IPropertyTree *parent = stack.ordinality()>=2?stack.item(stack.ordinality()-2).iPTMade:NULL;
  1626. if (noHigherQualifiers)
  1627. {
  1628. MemoryBuffer mb;
  1629. MemoryBuffer *content;
  1630. if (contentRequired)
  1631. {
  1632. assertex(marking);
  1633. marking->getMarkTo(endOffset, mb);
  1634. content = &mb;
  1635. }
  1636. else
  1637. content = NULL;
  1638. CPTreeWithOffsets *currentNodeWO = (CPTreeWithOffsets *)currentNode;
  1639. Owned<CColumnProvider> provider = new CColumnProvider(utf8Translator, maker->queryRoot(), currentNode, content, true, currentNodeWO->startOffset, xpath.queryXPathStr());
  1640. iXMLSelect->match(*provider, stackInfo.startOffset, endOffset);
  1641. matched = true;
  1642. }
  1643. else
  1644. {
  1645. // only prospective match - depends on higher qualifiers being satisfied.
  1646. if (!stackInfo.prospectiveMatches)
  1647. stackInfo.prospectiveMatches = new CProcespectiveMatchArray;
  1648. MemoryBuffer *tagContent = NULL;
  1649. if (contentRequired)
  1650. {
  1651. tagContent = new MemoryBuffer;
  1652. marking->getMarkTo(endOffset, *tagContent);
  1653. }
  1654. stackInfo.prospectiveMatches->append(*new CProspectiveMatch(parent, currentNode, tagContent));
  1655. }
  1656. }
  1657. else if (stackInfo.prospectiveMatches && stackInfo.prospectiveMatches->ordinality() && level < xpath.queryDepth()-1)
  1658. {
  1659. unsigned topQ = xpath.queryHighestQualifier();
  1660. unsigned noHigherQualifiers = -1 == topQ || topQ >= level;
  1661. if (noHigherQualifiers)
  1662. {
  1663. ForEachItemIn(m, *stackInfo.prospectiveMatches)
  1664. {
  1665. CProspectiveMatch &prospectiveMatch = stackInfo.prospectiveMatches->item(m);
  1666. CPTreeWithOffsets *prospectiveNodeWO = (CPTreeWithOffsets *)prospectiveMatch.node;
  1667. Owned<CColumnProvider> provider = new CColumnProvider(utf8Translator, maker->queryRoot(), prospectiveMatch.node, prospectiveMatch.content, true, prospectiveNodeWO->startOffset, xpath.queryXPathStr());
  1668. // NB: caveat; if complex qualifiers on intermediate iterator nodes and fully qualified attributes
  1669. // are access from this match, there are potential ambiguities in the lookup.
  1670. iXMLSelect->match(*provider, stackInfo.startOffset, endOffset);
  1671. matched = true;
  1672. }
  1673. stackInfo.prospectiveMatches->kill();
  1674. stackInfo.prospectiveMatches = NULL;
  1675. }
  1676. }
  1677. else
  1678. {
  1679. if (NULL == lastMatchKeptNode && level < xpath.queryDepth())
  1680. keep = false;
  1681. }
  1682. }
  1683. }
  1684. else
  1685. {
  1686. if (contentRequired && ((0==level && 0==xpath.queryDepth()) || level == xpath.queryDepth()-1))
  1687. {
  1688. assertex(marking);
  1689. marking->closeMark();
  1690. }
  1691. }
  1692. freeParseInfo.append(stackInfo);
  1693. if (keep && stackInfo.prospectiveMatches && stackInfo.prospectiveMatches->ordinality())
  1694. {
  1695. Linked<CParseStackInfo> childStackInfo = &stackInfo;
  1696. stack.pop();
  1697. if (stack.ordinality())
  1698. {
  1699. CParseStackInfo &parentSI = stack.tos();
  1700. if (!parentSI.prospectiveMatches)
  1701. parentSI.prospectiveMatches = new CProcespectiveMatchArray;
  1702. ForEachItemIn(p, *stackInfo.prospectiveMatches)
  1703. parentSI.prospectiveMatches->append(*LINK(&stackInfo.prospectiveMatches->item(p)));
  1704. }
  1705. }
  1706. else
  1707. stack.pop();
  1708. // Track last level kept
  1709. if (lastMatchKeptNode || (keep && matched))
  1710. {
  1711. assertex(nodeMade);
  1712. lastMatchKeptLevel = level;
  1713. lastMatchKeptNode = currentNode;
  1714. lastMatchKeptNodeParent = maker->queryCurrentNode();
  1715. }
  1716. else if (!keep && nodeMade)
  1717. {
  1718. IPropertyTree *parent = maker->queryCurrentNode();
  1719. if (parent)
  1720. parent->removeTree(currentNode);
  1721. }
  1722. currentNode = NULL;
  1723. }
  1724. virtual IPropertyTree *queryRoot() { return maker->queryRoot(); }
  1725. virtual IPropertyTree *queryCurrentNode() { return maker->queryCurrentNode(); }
  1726. virtual void reset()
  1727. {
  1728. level = 0;
  1729. ForEachItemIn(i, stack)
  1730. delete &stack.item(i);
  1731. ForEachItemIn(i2, freeParseInfo)
  1732. delete &freeParseInfo.item(i2);
  1733. stack.kill();
  1734. freeParseInfo.kill();
  1735. if (marking)
  1736. marking->reset();
  1737. }
  1738. virtual IPropertyTree *create(const char *tag)
  1739. {
  1740. return nodeCreator->create(tag);
  1741. }
  1742. } *iXMLMaker;
  1743. public:
  1744. IMPLEMENT_IINTERFACE;
  1745. CXMLParse(const char *fName, const char *_xpath, IXMLSelect &_iXMLSelect, PTreeReaderOptions _xmlOptions=ptr_none, bool _contentRequired=true, bool _step=true) : xpath(_xpath), iXMLSelect(&_iXMLSelect), xmlOptions(_xmlOptions), contentRequired(_contentRequired), step(_step) { init(); go(fName); }
  1746. CXMLParse(IFile &ifile, const char *_xpath, IXMLSelect &_iXMLSelect, PTreeReaderOptions _xmlOptions=ptr_none, bool _contentRequired=true, bool _step=true) : xpath(_xpath), iXMLSelect(&_iXMLSelect), xmlOptions(_xmlOptions), contentRequired(_contentRequired), step(_step) { init(); go(ifile); }
  1747. CXMLParse(IFileIO &fileio, const char *_xpath, IXMLSelect &_iXMLSelect, PTreeReaderOptions _xmlOptions=ptr_none, bool _contentRequired=true, bool _step=true) : xpath(_xpath), iXMLSelect(&_iXMLSelect), xmlOptions(_xmlOptions), contentRequired(_contentRequired), step(_step) { init(); go(fileio); }
  1748. CXMLParse(ISimpleReadStream &stream, const char *_xpath, IXMLSelect &_iXMLSelect, PTreeReaderOptions _xmlOptions=ptr_none, bool _contentRequired=true, bool _step=true) : xpath(_xpath), iXMLSelect(&_iXMLSelect), xmlOptions(_xmlOptions), contentRequired(_contentRequired), step(_step) { init(); go(stream); }
  1749. CXMLParse(const void *buffer, unsigned bufLen, const char *_xpath, IXMLSelect &_iXMLSelect, PTreeReaderOptions _xmlOptions=ptr_none, bool _contentRequired=true, bool _step=true) : xpath(_xpath), iXMLSelect(&_iXMLSelect), xmlOptions(_xmlOptions), contentRequired(_contentRequired), step(_step) { init(); go(buffer, bufLen); }
  1750. CXMLParse(const char *_xpath, IXMLSelect &_iXMLSelect, PTreeReaderOptions _xmlOptions=ptr_none, bool _contentRequired=true, bool _step=true) : xpath(_xpath), iXMLSelect(&_iXMLSelect), xmlOptions(_xmlOptions), contentRequired(_contentRequired), step(_step) { init(); }
  1751. ~CXMLParse()
  1752. {
  1753. ::Release(iXMLMaker);
  1754. ::Release(xmlReader);
  1755. }
  1756. void init()
  1757. {
  1758. xmlReader = NULL;
  1759. bool ignoreNameSpaces = 0 != ((unsigned)xmlOptions & (unsigned)ptr_ignoreNameSpaces);
  1760. iXMLMaker = new CXMLMaker(xpath, *iXMLSelect, contentRequired, ignoreNameSpaces);
  1761. iXMLMaker->init();
  1762. }
  1763. void go(const char *fName)
  1764. {
  1765. OwnedIFile ifile = createIFile(fName);
  1766. go(*ifile);
  1767. }
  1768. void go(IFile &file)
  1769. {
  1770. OwnedIFileIO ifileio = file.open(IFOread);
  1771. if (!ifileio)
  1772. throw MakeStringException(0, "Failed to open: %s", file.queryFilename());
  1773. go(*ifileio);
  1774. }
  1775. void go(IFileIO &fileio)
  1776. {
  1777. Owned<IIOStream> stream = createIOStream(&fileio);
  1778. go(*stream);
  1779. }
  1780. void go(ISimpleReadStream &stream)
  1781. {
  1782. if (contentRequired)
  1783. {
  1784. // only need marking stream if fetching xml text content.
  1785. Owned<CMarkReadStream> markingStream = new CMarkReadStream(*LINK(&stream));
  1786. iXMLMaker->setMarkingStream(*markingStream);
  1787. xmlReader = createPullXMLStreamReader(*markingStream, *iXMLMaker, xmlOptions);
  1788. }
  1789. else
  1790. xmlReader = createPullXMLStreamReader(stream, *iXMLMaker, xmlOptions);
  1791. if (!step)
  1792. {
  1793. xmlReader->load();
  1794. xmlReader->Release();
  1795. xmlReader = NULL;
  1796. }
  1797. }
  1798. void go(const void *buffer, unsigned bufLen)
  1799. {
  1800. if (contentRequired)
  1801. {
  1802. Owned<CMarkReadBase> markingStream = new CMarkRead(buffer, bufLen);
  1803. iXMLMaker->setMarkingStream(*markingStream);
  1804. }
  1805. xmlReader = createPullXMLBufferReader(buffer, bufLen, *iXMLMaker, xmlOptions);
  1806. if (!step)
  1807. {
  1808. xmlReader->load();
  1809. xmlReader->Release();
  1810. xmlReader = NULL;
  1811. }
  1812. }
  1813. void provideXML(const char *str)
  1814. {
  1815. if (contentRequired)
  1816. {
  1817. Owned<CMarkReadBase> markingStream = new CMarkRead(str, strlen(str));
  1818. iXMLMaker->setMarkingStream(*markingStream);
  1819. }
  1820. xmlReader = createPullXMLStringReader(str, *iXMLMaker, xmlOptions);
  1821. if (!step)
  1822. {
  1823. xmlReader->load();
  1824. xmlReader->Release();
  1825. xmlReader = NULL;
  1826. }
  1827. }
  1828. // IXMLParse
  1829. virtual bool next()
  1830. {
  1831. return xmlReader->next();
  1832. }
  1833. virtual void reset()
  1834. {
  1835. iXMLMaker->reset();
  1836. xmlReader->reset();
  1837. }
  1838. };
  1839. IXMLParse *createXMLParse(const char *filename, const char *xpath, IXMLSelect &iselect, PTreeReaderOptions xmlOptions, bool contentRequired)
  1840. {
  1841. return new CXMLParse(filename, xpath, iselect, xmlOptions, contentRequired);
  1842. }
  1843. IXMLParse *createXMLParse(ISimpleReadStream &stream, const char *xpath, IXMLSelect &iselect, PTreeReaderOptions xmlOptions, bool contentRequired)
  1844. {
  1845. return new CXMLParse(stream, xpath, iselect, xmlOptions, contentRequired);
  1846. }
  1847. IXMLParse *createXMLParse(const void *buffer, unsigned bufLen, const char *xpath, IXMLSelect &iselect, PTreeReaderOptions xmlOptions, bool contentRequired)
  1848. {
  1849. return new CXMLParse(buffer, bufLen, xpath, iselect, xmlOptions, contentRequired);
  1850. }
  1851. IXMLParse *createXMLParseString(const char *string, const char *xpath, IXMLSelect &iselect, PTreeReaderOptions xmlOptions, bool contentRequired)
  1852. {
  1853. CXMLParse *parser = new CXMLParse(xpath, iselect, xmlOptions, contentRequired);
  1854. parser->provideXML(string);
  1855. return parser;
  1856. }