tokenserialization.hpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2016 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #ifndef TOKENSERIALIZATION_HPP
  14. #define TOKENSERIALIZATION_HPP
  15. #include "jlog.hpp"
  16. #include "jstring.hpp"
  17. #include <cmath>
  18. #include <cerrno>
  19. #include <limits>
  20. #include <type_traits>
  21. #if !defined(ERANGE)
  22. # define ERANGE 34 /* Math result not representable. */
  23. #endif
  24. // The template methods in TokenDeserializer must be redefined by each subclass
  25. // to dispatch requests correctly.
  26. #define EXTEND_TOKENDESERIALIZER(base) \
  27. template <typename TValue> \
  28. DeserializationResult operator () (const char* buffer, TValue& value) \
  29. { \
  30. return deserialize(buffer, value); \
  31. } \
  32. template <typename TValue> \
  33. DeserializationResult deserialize(const char* buffer, TValue& value) const \
  34. { \
  35. return base::deserialize(buffer, value); \
  36. }
  37. class TokenSerializer
  38. {
  39. public:
  40. // Write any type of data to a given text buffer. There must be an
  41. // overloaded operator << to insert the value type into the buffer type.
  42. //
  43. // While this does allow multiple tokens to be serialized into a buffer,
  44. // it is the caller's responsibility to add any delimiters necessary for
  45. // subsequent deserialization.
  46. template <typename TValue>
  47. StringBuffer& serialize(const TValue& value, StringBuffer& buffer) const
  48. {
  49. buffer << value;
  50. return buffer;
  51. }
  52. // Convert a buffer to a character array. Used by template methods that do
  53. // not inherently know the buffer type.
  54. const char* str(const StringBuffer& buffer) const
  55. {
  56. return buffer.str();
  57. }
  58. };
  59. enum DeserializationResult
  60. {
  61. Deserialization_UNKNOWN = -1, // no deserialization attempted
  62. Deserialization_SUCCESS,
  63. Deserialization_BAD_TYPE, // receiving value cannot be const
  64. Deserialization_UNSUPPORTED, // receiving value type not handled
  65. Deserialization_INVALID_TOKEN, // token cannot be NULL, empty, or all whitespace
  66. Deserialization_NOT_A_NUMBER, // non-numeric characters found in numeric conversion
  67. Deserialization_OVERFLOW, // number too large to be represented by receiving value
  68. Deserialization_UNDERFLOW, // number too small to be represented by receiving value
  69. };
  70. class TokenDeserializer
  71. {
  72. public:
  73. // Convert the contents of buffer from text to the requested numeric type.
  74. // The conversion fails if:
  75. // - value is a const type
  76. // - value is not a numeric type
  77. // - buffer is NULL, empty or entirely whitespace characters
  78. // - buffer contains any character not valid for the receiving type
  79. // - buffer contains a number too large for the receiving type
  80. // - buffer contains a number too small for the receiving type
  81. template <typename TValue>
  82. DeserializationResult deserialize(const char* buffer, TValue& value) const
  83. {
  84. DeserializationResult result = Deserialization_UNKNOWN;
  85. if (std::is_const<TValue>())
  86. {
  87. result = Deserialization_BAD_TYPE;
  88. }
  89. else if (std::is_arithmetic<TValue>())
  90. {
  91. const char* ptr = buffer;
  92. skipWhitespace(ptr);
  93. if (!*ptr)
  94. {
  95. result = Deserialization_INVALID_TOKEN;
  96. }
  97. else
  98. {
  99. if (std::is_integral<TValue>())
  100. {
  101. if (std::is_signed<TValue>())
  102. {
  103. result = deserializeSigned(ptr, value);
  104. }
  105. else
  106. {
  107. result = deserializeUnsigned(ptr, value);
  108. }
  109. }
  110. else if (std::is_floating_point<TValue>())
  111. {
  112. result = deserializeFloatingPoint(ptr, value);
  113. }
  114. }
  115. }
  116. else
  117. {
  118. result = Deserialization_UNSUPPORTED;
  119. }
  120. logResult<TValue>(buffer, result);
  121. return result;
  122. }
  123. // Convert the contents of buffer from text to the bool data type. The
  124. // conversion fails if buffer contains any character that is not a valid
  125. // Boolean representation.
  126. //
  127. // Supported representations:
  128. // true: "true"|"yes"|"on"|non-zero integer
  129. // false: "false"|"no"|"off"|zero
  130. DeserializationResult deserialize(const char* buffer, bool& value) const
  131. {
  132. const char* ptr = buffer;
  133. DeserializationResult result = Deserialization_UNKNOWN;
  134. skipWhitespace(ptr);
  135. if (!*ptr)
  136. {
  137. value = false;
  138. result = Deserialization_INVALID_TOKEN;
  139. }
  140. else
  141. {
  142. result = deserializeBool(ptr, value);
  143. }
  144. logResult<bool>(buffer, result);
  145. return result;
  146. }
  147. // Allow an instance of this class to be used as a functor.
  148. template <typename TValue>
  149. DeserializationResult operator () (const char* buffer, TValue& value)
  150. {
  151. return deserialize(buffer, value);
  152. }
  153. private:
  154. DeserializationResult deserializeBool(const char* buffer, bool& value) const
  155. {
  156. const char* ptr = buffer;
  157. bool tmp = false;
  158. DeserializationResult result = Deserialization_UNKNOWN;
  159. switch (*ptr)
  160. {
  161. case 't': case 'T':
  162. if (strnicmp(ptr + 1, "rue", 3) == 0)
  163. {
  164. result = Deserialization_SUCCESS;
  165. tmp = true;
  166. ptr += 4;
  167. }
  168. break;
  169. case 'f': case 'F':
  170. if (strnicmp(ptr + 1, "alse", 4) == 0)
  171. {
  172. result = Deserialization_SUCCESS;
  173. ptr += 5;
  174. }
  175. break;
  176. case 'y': case 'Y':
  177. if (strnicmp(ptr + 1, "es", 2) == 0)
  178. {
  179. result = Deserialization_SUCCESS;
  180. tmp = true;
  181. ptr += 3;
  182. }
  183. break;
  184. case 'n': case 'N':
  185. if (('o' == ptr[1]) || ('O' == ptr[1]))
  186. {
  187. result = Deserialization_SUCCESS;
  188. ptr += 2;
  189. }
  190. break;
  191. case 'o': case 'O':
  192. switch (ptr[1])
  193. {
  194. case 'n': case 'N':
  195. result = Deserialization_SUCCESS;
  196. tmp = true;
  197. ptr += 2;
  198. break;
  199. case 'f': case 'F':
  200. if (('f' == ptr[2]) || ('F' == ptr[2]))
  201. {
  202. result = Deserialization_SUCCESS;
  203. ptr += 3;
  204. }
  205. break;
  206. default:
  207. break;
  208. }
  209. break;
  210. case '0':
  211. case '1':
  212. case '2':
  213. case '3':
  214. case '4':
  215. case '5':
  216. case '6':
  217. case '7':
  218. case '8':
  219. case '9':
  220. result = Deserialization_SUCCESS;
  221. do
  222. {
  223. if (*ptr != '0')
  224. tmp = true;
  225. }
  226. while (isdigit(*++ptr));
  227. break;
  228. default:
  229. break;
  230. }
  231. if (Deserialization_SUCCESS == result && !isEmptyOrWhitespace(ptr))
  232. result = Deserialization_NOT_A_NUMBER;
  233. if (Deserialization_SUCCESS == result)
  234. value = tmp;
  235. return result;
  236. }
  237. template <typename TValue>
  238. DeserializationResult deserializeSigned(const char* buffer, TValue& value) const
  239. {
  240. char* end = NULL;
  241. long long tmp = strtoll(buffer, &end, 0);
  242. DeserializationResult result = Deserialization_UNKNOWN;
  243. if (end == buffer)
  244. {
  245. result = Deserialization_NOT_A_NUMBER;
  246. }
  247. else if (LLONG_MIN == tmp && ERANGE == errno)
  248. {
  249. result = Deserialization_UNDERFLOW;
  250. }
  251. else if (LLONG_MAX == tmp && ERANGE == errno)
  252. {
  253. result = Deserialization_OVERFLOW;
  254. }
  255. else if (!isEmptyOrWhitespace(end))
  256. {
  257. result = Deserialization_NOT_A_NUMBER;
  258. }
  259. #if defined(_WIN32)
  260. // VS2015 generates this sign mismatch warning when TValue is unsigned, and
  261. // unsigned values are not process here.
  262. #pragma warning(suppress:4018)
  263. #endif
  264. else if (tmp < std::numeric_limits<TValue>::min())
  265. {
  266. result = Deserialization_UNDERFLOW;
  267. }
  268. #if defined(_WIN32)
  269. // VS2015 generates this sign mismatch warning when TValue is unsigned, and
  270. // unsigned values are not process here.
  271. #pragma warning(suppress:4018)
  272. #endif
  273. else if (tmp > std::numeric_limits<TValue>::max())
  274. {
  275. result = Deserialization_OVERFLOW;
  276. }
  277. else
  278. {
  279. value = TValue(tmp);
  280. result = Deserialization_SUCCESS;
  281. }
  282. return result;
  283. }
  284. template <typename TValue>
  285. DeserializationResult deserializeUnsigned(const char* buffer, TValue& value) const
  286. {
  287. char* end = NULL;
  288. unsigned long long tmp = strtoull(buffer, &end, 0);
  289. DeserializationResult result = Deserialization_UNKNOWN;
  290. if (end == buffer)
  291. {
  292. result = Deserialization_NOT_A_NUMBER;
  293. }
  294. else if (ULLONG_MAX == tmp && ERANGE == errno)
  295. {
  296. result = Deserialization_OVERFLOW;
  297. }
  298. else if (!isEmptyOrWhitespace(end))
  299. {
  300. result = Deserialization_NOT_A_NUMBER;
  301. }
  302. else if ('-' == buffer[0])
  303. {
  304. result = Deserialization_UNDERFLOW;
  305. }
  306. #if defined(_WIN32)
  307. // VS2015 generates this sign mismatch warning when TValue is unsigned, and tmp
  308. // is always unsigned.
  309. #pragma warning(suppress:4018)
  310. #endif
  311. else if (tmp > std::numeric_limits<TValue>::max())
  312. {
  313. result = Deserialization_OVERFLOW;
  314. }
  315. else
  316. {
  317. value = TValue(tmp);
  318. result = Deserialization_SUCCESS;
  319. }
  320. return result;
  321. }
  322. template <typename TValue>
  323. DeserializationResult deserializeFloatingPoint(const char* buffer, TValue& value) const
  324. {
  325. char* end = NULL;
  326. long double tmp = strtold(buffer, &end);
  327. DeserializationResult result = Deserialization_UNKNOWN;
  328. if (0 == tmp && end == buffer)
  329. {
  330. result = Deserialization_NOT_A_NUMBER;
  331. }
  332. else if (0 == tmp && ERANGE == errno)
  333. {
  334. result = Deserialization_UNDERFLOW;
  335. }
  336. else if ((-HUGE_VALL == tmp || HUGE_VALL == tmp) && ERANGE == errno)
  337. {
  338. result = Deserialization_OVERFLOW;
  339. }
  340. else if (!isEmptyOrWhitespace(end))
  341. {
  342. result = Deserialization_NOT_A_NUMBER;
  343. }
  344. #if defined(_WIN32)
  345. // VS2015 generates this warning (as an error) with unsigned integral types,
  346. // even though this method does not process integral types.
  347. #pragma warning(suppress:4146)
  348. #endif
  349. else if (tmp < -std::numeric_limits<TValue>::max())
  350. {
  351. result = Deserialization_OVERFLOW;
  352. }
  353. else if (tmp > std::numeric_limits<TValue>::max())
  354. {
  355. result = Deserialization_OVERFLOW;
  356. }
  357. else
  358. {
  359. value = TValue(tmp);
  360. result = Deserialization_SUCCESS;
  361. }
  362. return result;
  363. }
  364. template <typename TValue>
  365. void logResult(const char* buffer, DeserializationResult result) const
  366. {
  367. bool success = false;
  368. if (Deserialization_SUCCESS == result)
  369. #if !defined(_DEBUG)
  370. return;
  371. #else
  372. success = true;
  373. #endif
  374. const char* typeStr = typeid(TValue).name();
  375. const char* resultStr = NULL;
  376. switch (result)
  377. {
  378. case Deserialization_UNKNOWN: resultStr = "unknown"; break;
  379. case Deserialization_SUCCESS: resultStr = "success"; break;
  380. case Deserialization_BAD_TYPE: resultStr = "bad type"; break;
  381. case Deserialization_UNSUPPORTED: resultStr = "unsupported"; break;
  382. case Deserialization_INVALID_TOKEN: resultStr = "invalid token"; break;
  383. case Deserialization_NOT_A_NUMBER: resultStr = "not a number"; break;
  384. case Deserialization_OVERFLOW: resultStr = "overflow"; break;
  385. case Deserialization_UNDERFLOW: resultStr = "underflow"; break;
  386. default: resultStr = "unexpected"; break;
  387. }
  388. if (success)
  389. DBGLOG("Result of deserializing '%s' to type '%s': %s", buffer, typeStr, resultStr);
  390. else
  391. OERRLOG("Result of deserializing '%s' to type '%s': %s", buffer, typeStr, resultStr);
  392. }
  393. bool isEmptyOrWhitespace(const char* buffer) const
  394. {
  395. skipWhitespace(buffer);
  396. return (!*buffer);
  397. }
  398. void skipWhitespace(const char*& buffer) const
  399. {
  400. while (isspace(*buffer)) buffer++;
  401. }
  402. };
  403. #endif // TOKENSERIALIZATION_HPP