xsdparser.cpp 32 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #pragma warning(disable:4786)
  14. #include <limits.h>
  15. #include <map>
  16. #include <string>
  17. #include <algorithm>
  18. #include <vector>
  19. #include "xsdparser.hpp"
  20. #include "jstring.hpp"
  21. #include "jexcept.hpp"
  22. #include "jptree.hpp"
  23. #include "jlog.hpp"
  24. //==========================================================
  25. // Definitions
  26. // TODO: only support string type for now
  27. class CXmlAttribute : extends CInterface, implements IXmlAttribute
  28. {
  29. StringAttr m_name, m_defValue;
  30. public:
  31. IMPLEMENT_IINTERFACE;
  32. CXmlAttribute(const char* name) : m_name(name) { }
  33. const char* queryName() { return m_name.get(); }
  34. const char* queryTypeName() { return "string"; }
  35. bool isUseRequired() { return false; }
  36. const char* getFixedValue() { return NULL; }
  37. void setDefaultValue(const char* v) { m_defValue.set(v); }
  38. bool hasDefaultValue() { return m_defValue.get()!=NULL; }
  39. const char* getDefaultValue() { return m_defValue.get(); }
  40. virtual const char* getSampleValue(StringBuffer& out)
  41. {
  42. const char* s = getFixedValue();
  43. if (s)
  44. return out.append(s).str();
  45. out.appendf("[@%s]", queryName());
  46. return out.str();
  47. }
  48. };
  49. class CSimpleType : extends CInterface, implements IXmlType
  50. {
  51. StringAttr m_name, m_defValue;
  52. CXmlAttribute** m_attrs;
  53. size_t m_nAttrs;
  54. public:
  55. IMPLEMENT_IINTERFACE;
  56. CSimpleType(const char* name, size_t nAttrs=0, CXmlAttribute** attrs=NULL)
  57. : m_name(name), m_nAttrs(nAttrs), m_attrs(attrs) { }
  58. virtual ~CSimpleType()
  59. {
  60. if (m_attrs)
  61. {
  62. for (size_t i=0; i<m_nAttrs; i++)
  63. m_attrs[i]->Release();
  64. delete[] m_attrs;
  65. }
  66. }
  67. XmlSubType getSubType() { return SubType_Default; }
  68. bool isArray() { return false; }
  69. bool isComplexType() { return false; }
  70. size_t getFieldCount() { return 0; }
  71. IXmlType* queryFieldType(int idx) { return NULL; }
  72. const char* queryFieldName(int idx) { return NULL; }
  73. const char* queryName() { return m_name.get(); }
  74. size_t getAttrCount() { return m_nAttrs; }
  75. IXmlAttribute* queryAttr(int idx) { assert(idx>=0 && (unsigned)idx<m_nAttrs); return m_attrs[idx]; }
  76. const char* queryAttrName(int idx) { assert(idx>=0 && (unsigned)idx<m_nAttrs); return m_attrs[idx]->queryName(); }
  77. void setDefaultValue(const char* v) { m_defValue.set(v); }
  78. bool hasDefaultValue() { return m_defValue.get()!=NULL; }
  79. const char* getDefaultValue() { return m_defValue.get(); }
  80. virtual void getSampleValue(StringBuffer& out, const char* fieldName);
  81. void toString(StringBuffer& s, int indent, StringStack& parent)
  82. { s.appendf("Simple Type: %s", m_name.get()); }
  83. };
  84. void CSimpleType::getSampleValue(StringBuffer& out, const char* fieldName)
  85. {
  86. const char* name = queryName();
  87. // use default value if exists
  88. if (hasDefaultValue())
  89. out.append(getDefaultValue());
  90. // string
  91. else if (streq(name,"string"))
  92. {
  93. if (fieldName)
  94. out.appendf("[ %s ]", fieldName);
  95. else
  96. out.append("String");
  97. }
  98. // numerical
  99. else if (streq(name, "short"))
  100. out.append("4096");
  101. else if (streq(name,"int") || streq(name, "integer"))
  102. out.append("32716");
  103. else if (streq(name, "long"))
  104. out.append("2147483647");
  105. else if (streq(name, "float"))
  106. out.append("3.14159");
  107. else if (streq(name, "double"))
  108. out.append("3.14159265358979");
  109. else if (streq(name, "boolean"))
  110. out.append("1");
  111. else if (streq(name, "nonPositiveInteger"))
  112. out.append("-1");
  113. else if (streq(name, "negativeInteger"))
  114. out.append("-2");
  115. else if (streq(name, "byte"))
  116. out.append("127");
  117. else if (streq(name, "nonNegativeInteger"))
  118. out.append("1");
  119. else if (streq(name, "positiveInteger"))
  120. out.append("2");
  121. else if (streq(name, "unsignedLong"))
  122. out.append("4294967295");
  123. else if (streq(name, "unsignedInt"))
  124. out.append("0");
  125. else if (streq(name, "unsignedShort"))
  126. out.append("65535");
  127. else if (streq(name, "unsignedByte"))
  128. out.append("255");
  129. else if (streq(name, "decimal"))
  130. out.append("3.1415926535897932384626433832795");
  131. // time
  132. else if (streq(name, "duration"))
  133. out.append("P1Y2M3DT10H30M");
  134. else if (streq(name, "dateTime"))
  135. out.append("2007-10-23 11:34:30");
  136. else if (streq(name, "time"))
  137. out.append("11:34:30");
  138. else if (streq(name, "date"))
  139. out.append("2007-10-23");
  140. else if (streq(name, "gYearMonth"))
  141. out.append("2007-10");
  142. else if (streq(name, "gYear"))
  143. out.append("2007");
  144. else if (streq(name, "gMonthDay"))
  145. out.append("--10-23");
  146. else if (streq(name, "gDay"))
  147. out.append("---23");
  148. else if (streq(name, "gMonth"))
  149. out.append("--10--");
  150. // other
  151. else if (streq(name, "hexBinary"))
  152. out.append("A9D4C56EFB");
  153. else if (streq(name, "base64Binary"))
  154. out.append("YmFzZTY0QmluYXJ5");
  155. else if (streq(name, "anyURI"))
  156. out.append("http://anyURI/");
  157. else if (streq(name, "QName"))
  158. out.append("q:name");
  159. else if (streq(name, "NOTATION"))
  160. out.append("NOTATION");
  161. else if (streq(name, "normalizedString"))
  162. out.append("normalizedString");
  163. else if (streq(name, "token"))
  164. out.append("token");
  165. else if (streq(name, "language"))
  166. out.append("en-us");
  167. // unhandled
  168. else
  169. out.appendf("%s value", name);
  170. }
  171. //==========================================================
  172. // RestrictionFacetType
  173. typedef int RestrictionFacetType;
  174. const RestrictionFacetType RF_MinLength = 0x0001;
  175. const RestrictionFacetType RF_MaxLength = 0x0002;
  176. const RestrictionFacetType RF_MinExclusive = 0x0004;
  177. const RestrictionFacetType RF_MaxExclusive = 0x0008;
  178. const RestrictionFacetType RF_MinInclusive = 0x0010;
  179. const RestrictionFacetType RF_MaxInclusive = 0x0020;
  180. const RestrictionFacetType RF_Enumeration = 0x0040;
  181. const RestrictionFacetType RF_Pattern = 0x0080;
  182. const RestrictionFacetType RF_WhiteSpace = 0x0100;
  183. const RestrictionFacetType RF_TotalDigits = 0x0200;
  184. const RestrictionFacetType RF_FractionDigits=0x0400;
  185. const RestrictionFacetType RF_Length = 0x0800;
  186. static const char* getFacetName(RestrictionFacetType type)
  187. {
  188. switch(type)
  189. {
  190. case RF_MinLength: return "RF_MinLength";
  191. case RF_MaxLength: return "RF_MaxLength";
  192. case RF_MinExclusive:return "RF_MinExclusive";
  193. case RF_MaxExclusive:return "RF_MaxExclusive";
  194. case RF_MinInclusive:return "RF_MinInclusive";
  195. case RF_MaxInclusive:return "RF_MaxInclusive";
  196. case RF_Enumeration :return "RF_Enumeration";
  197. case RF_Pattern :return "RF_Pattern";
  198. case RF_WhiteSpace :return "RF_WhiteSpace";
  199. case RF_TotalDigits :return "RF_TotalDigits";
  200. case RF_FractionDigits:return "RF_FractionDigits";
  201. case RF_Length :return "RF_Length";
  202. default: return "Unknown type";
  203. }
  204. }
  205. enum RestrictionWhiteSpace
  206. {
  207. RF_WS_Preserve,
  208. RF_WS_Replace,
  209. RF_WS_Collapse
  210. };
  211. union RestrictionFacetValue
  212. {
  213. int intValue;
  214. double doubleValue;
  215. StringAttr* pattern;
  216. StringArray* enums;
  217. RestrictionWhiteSpace whiteSpace;
  218. };
  219. struct RestrictionFacet
  220. {
  221. RestrictionFacetType type;
  222. RestrictionFacetValue value;
  223. RestrictionFacet(RestrictionFacetType t, RestrictionFacetValue v) : type(t), value(v) { }
  224. };
  225. class CRestrictionType : extends CSimpleType
  226. {
  227. IXmlType* m_baseType;
  228. int m_types;
  229. typedef std::vector<RestrictionFacet> FacetArray;
  230. FacetArray m_facets;
  231. public:
  232. CRestrictionType(const char* name, IXmlType* base)
  233. : CSimpleType(name), m_baseType(base), m_types(0)
  234. { }
  235. void addFacet(RestrictionFacetType type, RestrictionFacetValue value);
  236. RestrictionFacetValue queryFacetValue(RestrictionFacetType type);
  237. IXmlType* queryBaseType() { return m_baseType; }
  238. virtual void getSampleValue(StringBuffer& out, const char* fieldName);
  239. virtual void toString(StringBuffer& s, int indent, StringStack& parent);
  240. };
  241. void CRestrictionType::addFacet(RestrictionFacetType type, RestrictionFacetValue value)
  242. {
  243. if (m_types & type)
  244. ERRLOG(-1,"Error in CRestrictionType::addFacet: one facet type can only have one value");
  245. else
  246. {
  247. m_types |= type;
  248. m_facets.push_back(RestrictionFacet(type,value));
  249. }
  250. }
  251. RestrictionFacetValue CRestrictionType::queryFacetValue(RestrictionFacetType type)
  252. {
  253. if (m_types & type)
  254. {
  255. for (FacetArray::const_iterator it = m_facets.begin(); it != m_facets.end(); it++)
  256. {
  257. if (it->type == type)
  258. return it->value;
  259. }
  260. }
  261. throw MakeStringException(-1,"Error in CRestrictionType::queryFacetValue: unknown facet: %s", getFacetName(type));
  262. }
  263. void CRestrictionType::getSampleValue(StringBuffer& out, const char* fieldName)
  264. {
  265. if (m_types & RF_Enumeration)
  266. {
  267. assert(m_facets.size()==1 && m_facets[0].type==RF_Enumeration);
  268. int count = m_facets[0].value.enums->length();
  269. out.append(m_facets[0].value.enums->item(count>1 ? 1 : 0));
  270. }
  271. else if (m_types & RF_MaxLength)
  272. {
  273. if (streq(queryBaseType()->queryName(), "string"))
  274. {
  275. int maxLength = queryFacetValue(RF_MaxLength).intValue;
  276. if (maxLength==1)
  277. out.append('[');
  278. else if (maxLength>=2)
  279. {
  280. const char* name = fieldName ? fieldName : queryName();
  281. out.append('[');
  282. int gap = maxLength - 2 - strlen(name);
  283. if (gap <= 0)
  284. out.append(maxLength-2, name);
  285. else if (gap<=2)
  286. out.append(' ').append(name);
  287. else {
  288. out.appendN((gap-2)/2, 'X');
  289. out.append(' ').append(name).append(' ');
  290. out.appendN((gap-2+1)/2,'X');
  291. }
  292. out.append(']');
  293. }
  294. }
  295. else
  296. {
  297. m_baseType->getSampleValue(out,fieldName);
  298. }
  299. }
  300. else
  301. {
  302. ERRLOG("CRestrictionType::getSampleValue() unimplemeted yet");
  303. m_baseType->getSampleValue(out,fieldName);
  304. }
  305. }
  306. void CRestrictionType::toString(StringBuffer& s, int indent, StringStack& parent)
  307. {
  308. s.appendf("CRestrictionType: %s", m_baseType->queryName());
  309. for (int i=0; i<m_facets.size(); i++)
  310. {
  311. RestrictionFacet& f = m_facets[i];
  312. switch (f.type)
  313. {
  314. case RF_Length: s.appendf(", length='%d'", f.value.intValue);
  315. break;
  316. case RF_MinLength: s.appendf(", minLength='%d'", f.value.intValue);
  317. break;
  318. case RF_MaxLength: s.appendf(", maxLength='%d'", f.value.intValue);
  319. break;
  320. case RF_TotalDigits: s.appendf(", totalDigits='%d'", f.value.intValue);
  321. break;
  322. case RF_FractionDigits: s.appendf(", fractionDigits='%d'", f.value.intValue);
  323. break;
  324. case RF_MinExclusive:
  325. if (streq(m_baseType->queryName(),"integer"))
  326. s.appendf(", minExclusive='%d'", f.value.intValue);
  327. else
  328. s.appendf(", minExclusive='%g'", f.value.doubleValue);
  329. break;
  330. case RF_MaxExclusive:
  331. if (streq(m_baseType->queryName(),"integer"))
  332. s.appendf(", maxExclusive='%d'", f.value.intValue);
  333. else
  334. s.appendf(", maxExclusive='%g'", f.value.doubleValue);
  335. break;
  336. case RF_MinInclusive:
  337. if (streq(m_baseType->queryName(),"integer"))
  338. s.appendf(", minInclusive='%d'", f.value.intValue);
  339. else
  340. s.appendf(", minInclusive='%g'", f.value.doubleValue);
  341. break;
  342. case RF_MaxInclusive:
  343. if (streq(m_baseType->queryName(),"integer"))
  344. s.appendf(", maxInclusive='%d'", f.value.intValue);
  345. else
  346. s.appendf(", maxInclusive='%g'", f.value.doubleValue);
  347. break;
  348. case RF_Pattern: s.appendf(", pattern='%s'", f.value.pattern->get());
  349. break;
  350. case RF_WhiteSpace:
  351. s.appendf(", whiteSpace='%s'", (f.value.whiteSpace==RF_WS_Preserve)?"preserve" : ((f.value.whiteSpace==RF_WS_Replace)?"replace" : "collapse"));
  352. break;
  353. case RF_Enumeration:
  354. {
  355. s.append(", enumeration={");
  356. for (int i=0; i<f.value.enums->length(); i++)
  357. {
  358. if (i>0)
  359. s.append(",");
  360. s.append(f.value.enums->item(i));
  361. }
  362. s.append("}");
  363. }
  364. break;
  365. default:
  366. throw MakeStringException(-1,"Unknown/unhandled restriction facet: %d", (int)f.type);
  367. }
  368. }
  369. }
  370. /*
  371. class CSimpleEnumType : extends CSimpleType
  372. {
  373. IXmlType* m_baseType;
  374. StringArray m_enums;
  375. public:
  376. CSimpleEnumType(const char* name, IXmlType* base)
  377. : CSimpleType(name), m_baseType(base)
  378. { }
  379. void setEnumValues(int n, const char* values[]) {
  380. for (int i=0;i<n;i++)
  381. m_enums.append(values[i]);
  382. }
  383. void addEnumValue(const char* v) { m_enums.append(v); }
  384. size32_t getEnumCounts() { return m_enums.length(); }
  385. const char* getEnumAt(int idx) { (idx>=0 && idx<m_enums.length()) ? m_enums.item(idx) : NULL; }
  386. void toString(StringBuffer& s, int indent, StringStack& parent) { s.appendf("CSimpleEnumType: %s", m_baseType->queryName()); }
  387. };
  388. */
  389. class CComplexType : extends CInterface, implements IXmlType
  390. {
  391. protected:
  392. StringAttr m_name;
  393. size_t m_fldCount;
  394. char** m_fldNames;
  395. IXmlType** m_fldTypes;
  396. size_t m_nAttrs;
  397. IXmlAttribute** m_attrs;
  398. XmlSubType m_subType;
  399. public:
  400. IMPLEMENT_IINTERFACE;
  401. CComplexType(const char* name, XmlSubType subType, size_t count, IXmlType** els, char** names, size_t nAttrs, IXmlAttribute** attrs=NULL)
  402. : m_name(name), m_subType(subType), m_fldCount(count), m_fldNames(names),
  403. m_fldTypes(els), m_nAttrs(nAttrs), m_attrs(attrs) { }
  404. virtual ~CComplexType()
  405. {
  406. // types are cached, but not linked
  407. if (m_fldTypes)
  408. delete[] m_fldTypes;
  409. if (m_fldNames)
  410. {
  411. for (int i=0; i<m_fldCount; i++)
  412. free(m_fldNames[i]);
  413. delete[] m_fldNames;
  414. }
  415. if (m_attrs)
  416. {
  417. for (int i=0; i<m_nAttrs; i++)
  418. m_attrs[i]->Release();
  419. delete[] m_attrs;
  420. }
  421. }
  422. const char* queryName() { return m_name.get(); }
  423. bool isComplexType() { return true; }
  424. bool isArray() { return false;}
  425. XmlSubType getSubType() { return m_subType; }
  426. size_t getFieldCount() { return m_fldCount; }
  427. IXmlType* queryFieldType(int idx) { return m_fldTypes[idx]; }
  428. const char* queryFieldName(int idx) { return m_fldNames[idx]; }
  429. size_t getAttrCount() { return m_nAttrs; }
  430. IXmlAttribute* queryAttr(int idx) { return m_attrs[idx]; }
  431. const char* queryAttrName(int idx) { assert(idx>=0 && idx<m_nAttrs); return m_attrs[idx]->queryName(); }
  432. bool hasDefaultValue() { assertex(!"N/A"); return false; }
  433. const char* getDefaultValue() { assertex(!"N/A"); return NULL; }
  434. void getSampleValue(StringBuffer& out, const char* fieldName) { assert(false); }
  435. void toString(StringBuffer& s, int indent, StringStack& parent);
  436. };
  437. // treat as a ComplexType with 1 field
  438. class CArrayType : extends CInterface, implements IXmlType
  439. {
  440. protected:
  441. StringAttr m_name;
  442. StringAttr m_itemName;
  443. IXmlType* m_itemType;
  444. public:
  445. IMPLEMENT_IINTERFACE;
  446. CArrayType(const char* name, const char* itemName, IXmlType* itemType)
  447. : m_name(name), m_itemName(itemName), m_itemType(itemType) { }
  448. const char* queryName() { return m_name.get(); }
  449. bool isComplexType() { return false; }
  450. bool isArray() { return true;}
  451. XmlSubType getSubType() { return SubType_Array; }
  452. const char* queryItemName() { return m_itemName.get(); }
  453. size_t getFieldCount() { return 1; }
  454. IXmlType* queryFieldType(int idx) { return m_itemType; }
  455. const char* queryFieldName(int idx) { return m_itemName.get(); }
  456. size_t getAttrCount() { return 0; } // removed assert false to account for arrays
  457. const char* queryAttrName(int idx) { assert(false); return NULL; }
  458. IXmlAttribute* queryAttr(int idx) { assert(false); return NULL; }
  459. bool hasDefaultValue() { assertex(!"N/A"); return false; }
  460. const char* getDefaultValue() { assertex(!"N/A"); return NULL; }
  461. void getSampleValue(StringBuffer& out, const char* fieldName) { assert(false); }
  462. void toString(StringBuffer& s, int indent, StringStack& parent);
  463. };
  464. class CXmlSchema : extends CInterface, implements IXmlSchema
  465. {
  466. protected:
  467. Owned<IPTree> m_schema;
  468. StringAttr m_xsdNs;
  469. int m_unnamedIdx;
  470. void setSchemaNamespace();
  471. const char* xsdNs() { return m_xsdNs.get(); }
  472. IXmlType* parseComplexType(IPTree* complexDef);
  473. IXmlType* parseSimpleType(IPTree* simpleDef);
  474. IXmlType* getNativeSchemaType(const char* type, const char* defValue);
  475. IXmlType* parseTypeDef(IPTree* el, const char* name=NULL);
  476. size_t parseAttributes(IPTree* typeDef, IXmlAttribute** &attrs);
  477. typedef std::map<std::string, IXmlType*> TypeMap;
  478. TypeMap m_types;
  479. void addCache(const char* name, IXmlType* type)
  480. {
  481. if (name)
  482. {
  483. assert(m_types.find(name) == m_types.end());
  484. m_types[name] = type;
  485. }
  486. else
  487. {
  488. VStringBuffer n("__unnnamed__%d", m_unnamedIdx++);
  489. m_types[n.str()] = type;
  490. }
  491. }
  492. IXmlType* queryTypeByName(const char* typeName,const char* defValue);
  493. public:
  494. IMPLEMENT_IINTERFACE;
  495. CXmlSchema(const char* src);
  496. CXmlSchema(IPTree* schema);
  497. virtual ~CXmlSchema();
  498. IXmlType* queryElementType(const char* name);
  499. IXmlType* queryTypeByName(const char* typeName) { return queryTypeByName(typeName, nullptr); }
  500. };
  501. XMLLIB_API IXmlSchema* createXmlSchemaFromFile(const char* file)
  502. {
  503. StringBuffer src;
  504. try {
  505. src.loadFile(file);
  506. } catch (IException* e) {
  507. StringBuffer msg;
  508. fprintf(stderr,"Exception caught: %s", e->errorMessage(msg).str());
  509. return NULL;
  510. }
  511. return new CXmlSchema(src);
  512. }
  513. XMLLIB_API IXmlSchema* createXmlSchemaFromString(const char* schemaSrc)
  514. {
  515. return new CXmlSchema(schemaSrc);
  516. }
  517. XMLLIB_API IXmlSchema* createXmlSchemaFromPTree(IPTree* schema)
  518. {
  519. return new CXmlSchema(schema);
  520. }
  521. //==========================================================
  522. // Implementation
  523. void CComplexType::toString(StringBuffer& s, int indent, StringStack& parent)
  524. {
  525. s.appendf("%s: ComplexType", queryName()?queryName():"<unnamed>");
  526. if (queryName())
  527. parent.push_back(queryName());
  528. for (int i=0; i<m_fldCount; i++)
  529. {
  530. s.append('\n').pad(indent+1).appendf("%s: ", queryFieldName(i));
  531. if (!queryFieldName(i) || std::find(parent.begin(),parent.end(),queryFieldName(i)) == parent.end())
  532. queryFieldType(i)->toString(s,indent+1,parent);
  533. else
  534. s.appendf(" --> see type: %s", m_fldTypes[i]->queryName());
  535. }
  536. if (m_nAttrs>0)
  537. {
  538. s.append('\n').pad(indent+1).append("Attributes:");
  539. for (int i=0; i<m_nAttrs; i++)
  540. s.appendf("%s", queryAttrName(i));
  541. }
  542. if (queryName())
  543. parent.pop_back();
  544. }
  545. void CArrayType::toString(StringBuffer& s, int indent, StringStack& parent)
  546. {
  547. s.appendf("%s: array of %s: item=%s", queryName()?queryName():"<unnamed>",
  548. m_itemType->queryName()?m_itemType->queryName():"<unnamed>", queryItemName());
  549. if (!m_itemType->queryName() || std::find(parent.begin(),parent.end(),m_itemType->queryName()) == parent.end())
  550. {
  551. if (queryName())
  552. parent.push_back(queryName());
  553. s.append('\n').pad(indent+1);
  554. m_itemType->toString(s, indent+2,parent);
  555. if (queryName())
  556. parent.pop_back();
  557. }
  558. else
  559. s.append('\n').pad(indent+1).appendf("--> see type: %s", m_itemType->queryName());
  560. }
  561. CXmlSchema::CXmlSchema(const char* schemaSrc)
  562. {
  563. m_unnamedIdx = 0;
  564. try {
  565. m_schema.setown(createPTreeFromXMLString(schemaSrc));
  566. setSchemaNamespace();
  567. } catch (IException* e) {
  568. StringBuffer msg;
  569. fprintf(stderr,"Exception caught: %s", e->errorMessage(msg).str());
  570. }
  571. if (!m_schema.get())
  572. m_schema.setown(createPTree("xsd:schema"));
  573. }
  574. CXmlSchema::CXmlSchema(IPTree* schema)
  575. {
  576. m_unnamedIdx = 0;
  577. m_schema.setown(schema);
  578. setSchemaNamespace();
  579. }
  580. CXmlSchema::~CXmlSchema()
  581. {
  582. for (TypeMap::const_iterator it = m_types.begin(); it != m_types.end(); it++)
  583. it->second->Release();
  584. }
  585. void CXmlSchema::setSchemaNamespace()
  586. {
  587. Owned<IAttributeIterator> attrs = m_schema->getAttributes();
  588. for (attrs->first(); attrs->isValid(); attrs->next())
  589. {
  590. if (strcmp(attrs->queryValue(), "http://www.w3.org/2001/XMLSchema") == 0)
  591. {
  592. const char* name = attrs->queryName();
  593. if (strncmp(name, "@xmlns",6)==0)
  594. {
  595. if (*(name+6)==0)
  596. m_xsdNs.set("");
  597. else if (*(name+6)==':')
  598. {
  599. char* x = (char*)malloc(strlen(name)-7+2);
  600. sprintf(x, "%s:", name+7);
  601. m_xsdNs.setown(x);
  602. }
  603. break;
  604. }
  605. }
  606. }
  607. if (!m_xsdNs.get())
  608. m_xsdNs.set("xsd:");
  609. }
  610. IXmlType* CXmlSchema::getNativeSchemaType(const char* typeName, const char* defValue)
  611. {
  612. StringBuffer key(typeName);
  613. if (defValue)
  614. key.append(':').append(defValue);
  615. TypeMap::const_iterator it = m_types.find(key.str());
  616. if (it != m_types.end())
  617. return it->second;
  618. //TODO: Need validataion?
  619. CSimpleType* typ = new CSimpleType(typeName);
  620. if (defValue)
  621. typ->setDefaultValue(defValue);
  622. addCache(key,typ);
  623. return typ;
  624. }
  625. size_t CXmlSchema::parseAttributes(IPTree* typeDef, IXmlAttribute** &attrs)
  626. {
  627. attrs=NULL;
  628. Owned<IPTreeIterator> ats = typeDef->getElements(VStringBuffer("%sattribute",xsdNs()));
  629. size_t nAttrs = 0;
  630. for (ats->first(); ats->isValid(); ats->next())
  631. nAttrs++;
  632. if (nAttrs>0)
  633. {
  634. attrs = new IXmlAttribute*[nAttrs];
  635. int idx = 0;
  636. for (ats->first(); ats->isValid(); ats->next())
  637. attrs[idx++] = new CXmlAttribute(ats->query().queryProp("@name"));
  638. }
  639. return nAttrs;
  640. }
  641. IXmlType* CXmlSchema::parseComplexType(IPTree* complexDef)
  642. {
  643. const char* name = complexDef->queryProp("@name"); // can be NULL: unnamed type (in-place type definition)
  644. XmlSubType subType = SubType_Default;
  645. // all
  646. IPTree* sub = complexDef->queryBranch(VStringBuffer("%sall",xsdNs()));
  647. if (sub)
  648. subType = SubType_Complex_All;
  649. // sequence
  650. if (!sub)
  651. {
  652. sub = complexDef->queryBranch(VStringBuffer("%ssequence",xsdNs()));
  653. if (sub)
  654. subType = SubType_Complex_Sequence;
  655. }
  656. // choice
  657. if (!sub)
  658. {
  659. sub = complexDef->queryBranch(VStringBuffer("%schoice",xsdNs()));
  660. if (sub)
  661. subType = SubType_Complex_Choice;
  662. }
  663. // simpleContent
  664. if (!sub)
  665. {
  666. sub = complexDef->queryBranch(VStringBuffer("%ssimpleContent",xsdNs()));
  667. if (sub)
  668. subType = SubType_Complex_SimpleContent;
  669. }
  670. if (!sub)
  671. {
  672. // attributes only?
  673. if (complexDef->queryBranch(VStringBuffer("%sattribute[1]",xsdNs())))
  674. {
  675. sub = complexDef; // a workaround since xsd:attribute is directly below xsd:complexType
  676. subType = SubType_Complex_All; // empty all
  677. }
  678. }
  679. if (sub)
  680. {
  681. Owned<IPTreeIterator> els = sub->getElements(VStringBuffer("%selement",xsdNs()));
  682. size_t fldCount = 0;
  683. size_t typelessCount = 0;
  684. ForEach(*els)
  685. {
  686. fldCount++;
  687. if (!els->query().hasProp("@type") && !els->query().hasChildren())
  688. typelessCount++;
  689. }
  690. // TODO: verify with struct with one field
  691. if ((fldCount-typelessCount)==1) // hack: assume 1 to be array
  692. {
  693. ForEach(*els)
  694. if (els->query().hasProp("@type") || els->query().hasChildren())
  695. break;
  696. IPTree& el = els->query();
  697. const char* maxOccurs = sub->queryProp("@maxOccurs");
  698. if (!maxOccurs)
  699. maxOccurs = els->query().queryProp("@maxOccurs");
  700. if (maxOccurs && strcmp(maxOccurs, "unbounded")==0)
  701. {
  702. const char* itemName = el.queryProp("@name");
  703. const char* typeName = el.queryProp("@type");
  704. IXmlType* type = typeName ? queryTypeByName(typeName,el.queryProp("@default")) : parseTypeDef(&el);
  705. CArrayType* typ = new CArrayType(name, itemName, type);
  706. addCache(name,typ);
  707. return typ;
  708. }
  709. }
  710. if (subType == SubType_Complex_SimpleContent)
  711. {
  712. assert(fldCount==0);
  713. IPTree* ext = sub->queryBranch(VStringBuffer("%sextension", xsdNs()));
  714. if (ext)
  715. {
  716. // attrs
  717. IXmlAttribute** attrs=NULL;
  718. size_t nAttrs = parseAttributes(ext,attrs);
  719. // let the first fldType be to the base type
  720. IXmlType** types = new IXmlType*[1];
  721. const char* base = sub->queryProp(VStringBuffer("%sextension/@base",xsdNs()));
  722. assert(base);
  723. if (startsWith(base,xsdNs()))
  724. types[0] = getNativeSchemaType(base+strlen(xsdNs()),NULL);
  725. else
  726. {
  727. StringBuffer schema;
  728. toXML(complexDef,schema);
  729. DBGLOG(-1,"Invalid schema: %s", schema.str());
  730. throw MakeStringException(-1, "Invalid schema encoutered");
  731. }
  732. CComplexType* typ = new CComplexType(name,subType,fldCount,types,NULL,nAttrs,attrs);
  733. addCache(name,typ);
  734. return typ;
  735. }
  736. else if (sub->queryBranch(VStringBuffer("%srestriction", xsdNs())))
  737. {
  738. assert(false);
  739. }
  740. else
  741. {
  742. StringBuffer schema;
  743. toXML(complexDef,schema);
  744. DBGLOG(-1,"Invalid schema: %s", schema.str());
  745. throw MakeStringException(-1, "Invalid schema encoutered");
  746. }
  747. }
  748. else
  749. {
  750. // attrs
  751. IXmlAttribute** attrs=NULL;
  752. size_t nAttrs = parseAttributes(complexDef,attrs);
  753. IXmlType** types = fldCount ? new IXmlType*[fldCount] : NULL;
  754. char** names = fldCount ? new char*[fldCount] : NULL;
  755. CComplexType* typ = new CComplexType(name,subType,fldCount,types,names,nAttrs,attrs);
  756. addCache(name,typ);
  757. int fldIdx = 0;
  758. for (els->first(); els->isValid(); els->next())
  759. {
  760. IPTree& el = els->query();
  761. const char* itemName = el.queryProp("@name");
  762. const char* typeName = el.queryProp("@type");
  763. IXmlType* type = typeName ? queryTypeByName(typeName,el.queryProp("@default")) : parseTypeDef(&el);
  764. if (!type)
  765. type = getNativeSchemaType("none", el.queryProp("@default")); //really should be tag only, no content?
  766. types[fldIdx] = type;
  767. names[fldIdx] = strdup(itemName);
  768. fldIdx++;
  769. }
  770. return typ;
  771. }
  772. }
  773. // unhandled
  774. {
  775. StringBuffer schema;
  776. toXML(complexDef,schema);
  777. DBGLOG(-1,"Parse schema failed: name=%s, schema: %s", name?name:"<no-name>",schema.str());
  778. throw MakeStringException(-1, "Internal error: parse schema failed");
  779. }
  780. return NULL;
  781. }
  782. IXmlType* CXmlSchema::parseSimpleType(IPTree* simpleDef)
  783. {
  784. IPTree* sub = simpleDef->queryBranch(VStringBuffer("%srestriction",xsdNs()));
  785. if (sub)
  786. {
  787. const char* base = sub->queryProp("@base");
  788. if (startsWith(base, xsdNs()))
  789. base += m_xsdNs.length();
  790. const char* name = simpleDef->queryProp("@name");
  791. if (!name || !*name)
  792. throw MakeStringException(-1, "Invalid schema: missing name for simple restriction type");
  793. if (!base || !*base)
  794. throw MakeStringException(-1, "Invalid schema: missing base type for simple restriction type: %s", name);
  795. IXmlType* baseType = getNativeSchemaType(base, sub->queryProp("@default"));
  796. CRestrictionType* type = new CRestrictionType(name,baseType);
  797. addCache(name,type);
  798. RestrictionFacetValue fv;
  799. if (sub->queryProp(VStringBuffer("%senumeration[1]/@value",xsdNs())))
  800. {
  801. Owned<IPTreeIterator> it = sub->getElements(VStringBuffer("%senumeration", xsdNs()));
  802. StringArray* enums = new StringArray();
  803. for (it->first(); it->isValid(); it->next())
  804. enums->append( it->query().queryProp("@value") );
  805. fv.enums = enums;
  806. type->addFacet(RF_Enumeration,fv);
  807. }
  808. const char* v = sub->queryProp(VStringBuffer("%smaxLength/@value",xsdNs()));
  809. if (v)
  810. {
  811. fv.intValue = atoi(v);
  812. type->addFacet(RF_MaxLength, fv);
  813. }
  814. v = sub->queryProp(VStringBuffer("%sminLength/@value",xsdNs()));
  815. if (v)
  816. {
  817. fv.intValue = atoi(v);
  818. type->addFacet(RF_MinLength, fv);
  819. }
  820. //TODO: more facets here
  821. return type;
  822. }
  823. assert(!"Unhandled simple type");
  824. return NULL;
  825. }
  826. IXmlType* CXmlSchema::queryElementType(const char* name)
  827. {
  828. VStringBuffer xpath("%selement[@name='%s']", xsdNs(), name);
  829. IPTree* el = m_schema->queryBranch(xpath);
  830. // <xsd:element name="xxx"
  831. if (el)
  832. {
  833. const char* type = el->queryProp("@type");
  834. if (type)
  835. return queryTypeByName(type,el->queryProp("@default"));
  836. else
  837. return parseTypeDef(el);
  838. }
  839. //TODO: roxie type:
  840. //xpath.setf("%selement[@name=\"Dataset\"]/%scomplexType/%s:sequence/%s:element[@name=\"Row\"]",xsdNs(),xsdNs(),xsdNs(),xsdNs());
  841. return NULL;
  842. }
  843. IXmlType* CXmlSchema::parseTypeDef(IPTree* el, const char* name)
  844. {
  845. // complex?
  846. VStringBuffer xpath("%scomplexType",xsdNs());
  847. if (name)
  848. xpath.appendf("[@name='%s']", name);
  849. IPTree* complex = el->queryBranch(xpath);
  850. if (complex)
  851. return parseComplexType(complex);
  852. // simple?
  853. xpath.setf("%ssimpleType",xsdNs());
  854. if (name)
  855. xpath.appendf("[@name='%s']", name);
  856. IPTree* simple = el->queryBranch(xpath);
  857. if (simple)
  858. return parseSimpleType(simple);
  859. // unknown
  860. return NULL;
  861. }
  862. IXmlType* CXmlSchema::queryTypeByName(const char* name, const char* defValue)
  863. {
  864. if (startsWith(name, xsdNs()))
  865. return getNativeSchemaType(name+m_xsdNs.length(),defValue);
  866. const char* colon = strchr(name, ':');
  867. if (colon)
  868. name = colon+1; // TODO: verify tns:
  869. TypeMap::const_iterator it = m_types.find(name);
  870. if (it != m_types.end())
  871. return it->second;
  872. return parseTypeDef(m_schema.get(), name);
  873. }