checkers.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648
  1. """
  2. GRASS Python testing framework checkers
  3. Copyright (C) 2014 by the GRASS Development Team
  4. This program is free software under the GNU General Public
  5. License (>=v2). Read the file COPYING that comes with GRASS GIS
  6. for details.
  7. :authors: Vaclav Petras, Soeren Gebbert
  8. """
  9. import os
  10. import sys
  11. import re
  12. import doctest
  13. import hashlib
  14. from grass.script.utils import encode
  15. try:
  16. from grass.script.core import KeyValue
  17. except (ImportError, AttributeError):
  18. # TODO: we are silent about the error and use a object with different
  19. # interface, should be replaced by central keyvalue module
  20. # this can happen when translations are not available
  21. # TODO: grass should survive are give better error when tranlsations are not available
  22. # even the lazy loading after first _ call would be interesting
  23. # File "...grass/script/core.py", line 40, in <module>
  24. # AttributeError: 'NoneType' object has no attribute 'endswith'
  25. KeyValue = dict
  26. # alternative term to check(er(s)) would be compare
  27. def unify_projection(dic):
  28. """Unifies names of projections.
  29. Some projections are referred using different names like
  30. 'Universal Transverse Mercator' and 'Universe Transverse Mercator'.
  31. This function replaces synonyms by a unified name.
  32. Example of common typo in UTM replaced by correct spelling::
  33. >>> unify_projection({'name': ['Universe Transverse Mercator']})
  34. {'name': ['Universal Transverse Mercator']}
  35. :param dic: The dictionary containing information about projection
  36. :return: The dictionary with the new values if needed or a copy of old one
  37. """
  38. # the lookup variable is a list of list, each list contains all the
  39. # possible name for a projection system
  40. lookup = [["Universal Transverse Mercator", "Universe Transverse Mercator"]]
  41. dic = dict(dic)
  42. for item in lookup:
  43. for n in range(len(dic["name"])):
  44. if dic["name"][n] in item:
  45. dic["name"][n] = item[0]
  46. return dic
  47. def unify_units(dic):
  48. """Unifies names of units.
  49. Some units have different spelling although they are the same units.
  50. This functions replaces different spelling options by unified one.
  51. Example of British English spelling replaced by US English spelling::
  52. >>> unify_units({'units': ['metres'], 'unit': ['metre']}) # doctest: +SKIP
  53. {'units': ['meters'], 'unit': ['meter']}
  54. :param dic: The dictionary containing information about units
  55. :return: The dictionary with the new values if needed or a copy of old one
  56. """
  57. # the lookup variable is a list of list, each list contains all the
  58. # possible name for a units
  59. lookup = [
  60. ["meter", "metre"],
  61. ["meters", "metres"],
  62. ["Meter", "Metre"],
  63. ["Meters", "Metres"],
  64. ["kilometer", "kilometre"],
  65. ["kilometers", "kilometres"],
  66. ["Kilometer", "Kilometre"],
  67. ["Kilometers", "Kilometres"],
  68. ]
  69. dic = dict(dic)
  70. for item in lookup:
  71. if not isinstance(dic["unit"], str):
  72. for n in range(len(dic["unit"])):
  73. if dic["unit"][n] in item:
  74. dic["unit"][n] = item[0]
  75. else:
  76. if dic["unit"] in item:
  77. dic["unit"] = item[0]
  78. if not isinstance(dic["units"], str):
  79. for n in range(len(dic["units"])):
  80. if dic["units"][n] in item:
  81. dic["units"][n] = item[0]
  82. else:
  83. if dic["units"] in item:
  84. dic["units"] = item[0]
  85. return dic
  86. def value_from_string(value):
  87. """Create value of a most fitting type from a string.
  88. Type conversions are applied in order ``int``, ``float``, ``string``
  89. where string is no conversion.
  90. >>> value_from_string('1')
  91. 1
  92. >>> value_from_string('5.6')
  93. 5.6
  94. >>> value_from_string(' 5.6\t ')
  95. 5.6
  96. >>> value_from_string('hello')
  97. 'hello'
  98. """
  99. not_float = False
  100. not_int = False
  101. # Convert values into correct types
  102. # We first try integer then float because
  103. # int('1.0') is ValueError (although int(1.0) is not)
  104. # while float('1') is not
  105. try:
  106. value_converted = int(value)
  107. except ValueError:
  108. not_int = True
  109. if not_int:
  110. try:
  111. value_converted = float(value)
  112. except ValueError:
  113. not_float = True
  114. # strip strings from whitespace (expecting spaces and tabs)
  115. if not_int and not_float:
  116. value_converted = value.strip()
  117. return value_converted
  118. # TODO: what is the default separator?
  119. def text_to_keyvalue(
  120. text,
  121. sep=":",
  122. val_sep=",",
  123. functions=None,
  124. skip_invalid=False,
  125. skip_empty=False,
  126. from_string=value_from_string,
  127. ):
  128. """Convert test to key-value pairs (dictionary-like KeyValue object).
  129. Converts a key-value text file, where entries are separated
  130. by newlines and the key and value are separated by `sep`,
  131. into a key-value dictionary and discovers/uses the correct
  132. data types (float, int or string) for values.
  133. Besides key-value pairs it also parses values itself. Value is created
  134. with the best fitting type using `value_from_string()` function by default.
  135. When val_sep is present in value part, the resulting value is
  136. a list of values.
  137. :param text: string to convert
  138. :param sep: character that separates the keys and values
  139. :param val_sep: character that separates the values of a single key
  140. :param functions: list of functions to apply on the resulting dictionary
  141. :param skip_invalid: skip all lines which does not contain separator
  142. :param skip_empty: skip empty lines
  143. :param from_string: a function used to convert strings to values,
  144. use ``lambda x: x`` for no conversion
  145. :return: a dictionary representation of text
  146. :return type: grass.script.core.KeyValue or dict
  147. And example of converting text with text, floats, integers and list
  148. to a dictionary::
  149. >>> sorted(text_to_keyvalue('''a: Hello
  150. ... b: 1.0
  151. ... c: 1,2,3,4,5
  152. ... d : hello,8,0.1''').items()) # sorted items from the dictionary
  153. [('a', 'Hello'), ('b', 1.0), ('c', [1, 2, 3, 4, 5]), ('d', ['hello', 8, 0.1])]
  154. .. warning::
  155. And empty string is a valid input because empty dictionary is a valid
  156. dictionary. You need to test this separately according
  157. to the circumstances.
  158. """
  159. # splitting according to universal newlines approach
  160. # TODO: add also general split with vsep
  161. text = text.splitlines()
  162. kvdict = KeyValue()
  163. functions = [] if functions is None else functions
  164. for line in text:
  165. if line.find(sep) >= 0:
  166. key, value = line.split(sep, 1)
  167. key = key.strip()
  168. value = value.strip()
  169. # this strip may not be necessary, we strip each item in list
  170. # and also if there is only one value
  171. else:
  172. # lines with no separator (empty or invalid)
  173. if not line:
  174. if not skip_empty:
  175. # TODO: here should go _ for translation
  176. # TODO: the error message is not really informative
  177. # in case of skipping lines we may get here with no key
  178. msg = "Empty line in the parsed text."
  179. if kvdict:
  180. # key is the one from previous line
  181. msg = (
  182. "Empty line in the parsed text."
  183. " Previous line's key is <%s>"
  184. ) % key
  185. raise ValueError(msg)
  186. else:
  187. # line contains something but not separator
  188. if not skip_invalid:
  189. # TODO: here should go _ for translation
  190. raise ValueError(
  191. ("Line <{l}> does not contain" " separator <{s}>.").format(
  192. l=line, s=sep
  193. )
  194. )
  195. # if we get here we are silently ignoring the line
  196. # because it is invalid (does not contain key-value separator) or
  197. # because it is empty
  198. continue
  199. if value.find(val_sep) >= 0:
  200. # lists
  201. values = value.split(val_sep)
  202. value_list = []
  203. for value in values:
  204. value_converted = from_string(value)
  205. value_list.append(value_converted)
  206. kvdict[key] = value_list
  207. else:
  208. # single values
  209. kvdict[key] = from_string(value)
  210. for function in functions:
  211. kvdict = function(kvdict)
  212. return kvdict
  213. # TODO: decide if there should be some default for precision
  214. # TODO: define standard precisions for DCELL, FCELL, CELL, mm, ft, cm, ...
  215. # TODO: decide if None is valid, and use some default or no compare
  216. # TODO: is None a valid value for precision?
  217. def values_equal(value_a, value_b, precision=0.000001):
  218. """
  219. >>> values_equal(1.022, 1.02, precision=0.01)
  220. True
  221. >>> values_equal([1.2, 5.3, 6.8], [1.1, 5.2, 6.9], precision=0.2)
  222. True
  223. >>> values_equal(7, 5, precision=2)
  224. True
  225. >>> values_equal(1, 5.9, precision=10)
  226. True
  227. >>> values_equal('Hello', 'hello')
  228. False
  229. """
  230. # each if body needs to handle only not equal state
  231. if isinstance(value_a, float) and isinstance(value_b, float):
  232. # both values are float
  233. # this could be also changed to is None and raise TypeError
  234. # in Python 2 None is smaller than anything
  235. # in Python 3 None < 3 raises TypeError
  236. precision = float(precision)
  237. if abs(value_a - value_b) > precision:
  238. return False
  239. elif (isinstance(value_a, float) and isinstance(value_b, int)) or (
  240. isinstance(value_b, float) and isinstance(value_a, int)
  241. ):
  242. # on is float the other is int
  243. # don't accept None
  244. precision = float(precision)
  245. # we will apply precision to int-float comparison
  246. # rather than converting both to integer
  247. # (as in the original function from grass.script.core)
  248. if abs(value_a - value_b) > precision:
  249. return False
  250. elif (
  251. isinstance(value_a, int)
  252. and isinstance(value_b, int)
  253. and precision
  254. and int(precision) > 0
  255. ):
  256. # both int but precision applies for them
  257. if abs(value_a - value_b) > precision:
  258. return False
  259. elif isinstance(value_a, list) and isinstance(value_b, list):
  260. if len(value_a) != len(value_b):
  261. return False
  262. for i in range(len(value_a)):
  263. # apply this function for comparison of items in the list
  264. if not values_equal(value_a[i], value_b[i], precision):
  265. return False
  266. else:
  267. if value_a != value_b:
  268. return False
  269. return True
  270. def keyvalue_equals(
  271. dict_a, dict_b, precision, def_equal=values_equal, key_equal=None, a_is_subset=False
  272. ):
  273. """Compare two dictionaries.
  274. .. note::
  275. Always use keyword arguments for all parameters with defaults.
  276. It is a good idea to use keyword arguments also for the first
  277. two parameters.
  278. An example of key-value texts comparison::
  279. >>> keyvalue_equals(text_to_keyvalue('''a: Hello
  280. ... b: 1.0
  281. ... c: 1,2,3,4,5
  282. ... d: hello,8,0.1'''),
  283. ... text_to_keyvalue('''a: Hello
  284. ... b: 1.1
  285. ... c: 1,22,3,4,5
  286. ... d: hello,8,0.1'''), precision=0.1)
  287. False
  288. :param dict_a: first dictionary
  289. :param dict_b: second dictionary
  290. :param precision: precision with which the floating point values
  291. are compared (passed to equality functions)
  292. :param callable def_equal: function used for comparison by default
  293. :param dict key_equal: dictionary of functions used for comparison
  294. of specific keys, `def_equal` is used for the rest,
  295. keys in dictionary are keys in `dict_a` and `dict_b` dictionaries,
  296. values are the functions used to comapare the given key
  297. :param a_is_subset: `True` if `dict_a` is a subset of `dict_b`,
  298. `False` otherwise
  299. :return: `True` if identical, `False` if different
  300. Use `diff_keyvalue()` to get information about differeces.
  301. You can use this function to find out if there is a difference and then
  302. use `diff_keyvalue()` to determine all the differences between
  303. dictionaries.
  304. """
  305. key_equal = {} if key_equal is None else key_equal
  306. if not a_is_subset and sorted(dict_a.keys()) != sorted(dict_b.keys()):
  307. return False
  308. b_keys = dict_b.keys() if a_is_subset else None
  309. # iterate over subset or just any if not a_is_subset
  310. # check for missing keys in superset
  311. # compare matching keys
  312. for key in dict_a.keys():
  313. if a_is_subset and key not in b_keys:
  314. return False
  315. equal_fun = key_equal.get(key, def_equal)
  316. if not equal_fun(dict_a[key], dict_b[key], precision):
  317. return False
  318. return True
  319. # TODO: should the return depend on the a_is_subset parameter?
  320. # this function must have the same interface and behavior as keyvalue_equals
  321. def diff_keyvalue(
  322. dict_a, dict_b, precision, def_equal=values_equal, key_equal=None, a_is_subset=False
  323. ):
  324. """Determine the difference of two dictionaries.
  325. The function returns missing keys and different values for common keys::
  326. >>> a = {'c': 2, 'b': 3, 'a': 4}
  327. >>> b = {'c': 1, 'b': 3, 'd': 5}
  328. >>> diff_keyvalue(a, b, precision=0)
  329. (['d'], ['a'], [('c', 2, 1)])
  330. You can provide only a subset of values in dict_a, in this case
  331. first item in tuple is an emptu list::
  332. >>> diff_keyvalue(a, b, a_is_subset=True, precision=0)
  333. ([], ['a'], [('c', 2, 1)])
  334. This function behaves the same as `keyvalue_equals()`.
  335. :returns: A tuple of lists, fist is list of missing keys in dict_a,
  336. second missing keys in dict_b and third is a list of mismatched
  337. values as tuples (key, value_from_a, value_from_b)
  338. :rtype: (list, list, list)
  339. Comparing to the Python ``difflib`` package this function does not create
  340. any difference output. It just returns the dictionaries.
  341. Comparing to the Python ``unittest`` ``assertDictEqual()``,
  342. this function does not issues error or exception, it just determines
  343. what it the difference.
  344. """
  345. key_equal = {} if key_equal is None else key_equal
  346. a_keys = dict_a.keys()
  347. b_keys = dict_b.keys()
  348. missing_in_a = []
  349. missing_in_b = []
  350. mismatched = []
  351. if not a_is_subset:
  352. for key in b_keys:
  353. if key not in a_keys:
  354. missing_in_a.append(key)
  355. # iterate over a, so we know that it is in a
  356. for key in a_keys:
  357. # check if it is in b
  358. if key not in b_keys:
  359. missing_in_b.append(key)
  360. else:
  361. equal_fun = key_equal.get(key, def_equal)
  362. if not equal_fun(dict_a[key], dict_b[key], precision):
  363. mismatched.append((key, dict_a[key], dict_b[key]))
  364. return sorted(missing_in_a), sorted(missing_in_b), sorted(mismatched)
  365. def proj_info_equals(text_a, text_b):
  366. """Test if two PROJ_INFO texts are equal."""
  367. def compare_sums(list_a, list_b, precision):
  368. """Compare difference of sums of two list using precision"""
  369. # derived from the code in grass.script.core
  370. if abs(sum(list_a) - sum(list_b)) > precision:
  371. return False
  372. sep = ":"
  373. val_sep = ","
  374. key_equal = {"+towgs84": compare_sums}
  375. dict_a = text_to_keyvalue(
  376. text_a, sep=sep, val_sep=val_sep, functions=[unify_projection]
  377. )
  378. dict_b = text_to_keyvalue(
  379. text_b, sep=sep, val_sep=val_sep, functions=[unify_projection]
  380. )
  381. return keyvalue_equals(
  382. dict_a, dict_b, precision=0.000001, def_equal=values_equal, key_equal=key_equal
  383. )
  384. def proj_units_equals(text_a, text_b):
  385. """Test if two PROJ_UNITS texts are equal."""
  386. def lowercase_equals(string_a, string_b, precision=None):
  387. # we don't need a warning for unused precision
  388. # pylint: disable=W0613
  389. """Test equality of two strings ignoring their case using ``lower()``.
  390. Precision is accepted as require by `keyvalue_equals()` but ignored.
  391. """
  392. return string_a.lower() == string_b.lower()
  393. sep = ":"
  394. val_sep = ","
  395. key_equal = {"unit": lowercase_equals, "units": lowercase_equals}
  396. dict_a = text_to_keyvalue(text_a, sep=sep, val_sep=val_sep, functions=[unify_units])
  397. dict_b = text_to_keyvalue(text_b, sep, val_sep, functions=[unify_units])
  398. return keyvalue_equals(
  399. dict_a, dict_b, precision=0.000001, def_equal=values_equal, key_equal=key_equal
  400. )
  401. # TODO: support also float (with E, e, inf, nan, ...?) and int (###, ##.)
  402. # http://hg.python.org/cpython/file/943d3e289ab4/Lib/decimal.py#l6098
  403. # perhaps a separate function?
  404. # alternative names: looks like, correspond with/to
  405. # TODO: change checking over lines?
  406. # TODO: change parameter order?
  407. # TODO: the behavior with last \n is strange but now using DOTALL and $
  408. def check_text_ellipsis(reference, actual):
  409. r"""
  410. >>> check_text_ellipsis("Vector map <...> contains ... points.",
  411. ... "Vector map <bridges> contains 5268 points.")
  412. True
  413. >>> check_text_ellipsis("user: ...\\nname: elevation",
  414. ... "user: some_user\\nname: elevation")
  415. True
  416. >>> check_text_ellipsis("user: ...\\nname: elevation",
  417. ... "user: \\nname: elevation")
  418. False
  419. The ellipsis is always considered even if it is followed by another
  420. dots. Consequently, a dot at the end of the sentence with preceding
  421. ellipsis will work as well as a line filled with undefined number of dots.
  422. >>> check_text_ellipsis("The result is ....",
  423. ... "The result is 25.")
  424. True
  425. >>> check_text_ellipsis("max ..... ...",
  426. ... "max ....... 6")
  427. True
  428. However, there is no way how to express that the dot should be in the
  429. beginning and the ellipsis is at the end of the group of dots.
  430. >>> check_text_ellipsis("The result is ....",
  431. ... "The result is .25")
  432. False
  433. The matching goes over lines (TODO: should this be changed?):
  434. >>> check_text_ellipsis("a=11\nb=...", "a=11\nb=22\n")
  435. True
  436. This function is based on regular expression containing .+ but no other
  437. regular expression matching will be done.
  438. >>> check_text_ellipsis("Result: [569] (...)",
  439. ... "Result: 9 (too high)")
  440. False
  441. """
  442. ref_escaped = re.escape(reference)
  443. exp = re.compile(r"\\\.\\\.\\\.") # matching escaped ...
  444. ref_regexp = exp.sub(".+", ref_escaped) + "$"
  445. if re.match(ref_regexp, actual, re.DOTALL):
  446. return True
  447. else:
  448. return False
  449. def check_text_ellipsis_doctest(reference, actual):
  450. """
  451. >>> check_text_ellipsis_doctest("user: ...\\nname: elevation",
  452. ... "user: some_user\\nname: elevation")
  453. True
  454. >>> check_text_ellipsis_doctest("user: ...\\nname: elevation",
  455. ... "user: \\nname: elevation")
  456. True
  457. This function is using doctest's function to check the result, so we
  458. will discuss here how the underlying function behaves.
  459. >>> checker = doctest.OutputChecker()
  460. >>> checker.check_output("user: some_user\\nname: elevation",
  461. ... "user: some_user\\nname: elevation",
  462. ... optionflags=None)
  463. True
  464. >>> checker.check_output("user: user1\\nname: elevation",
  465. ... "user: some_user\\nname: elevation",
  466. ... optionflags=doctest.ELLIPSIS)
  467. False
  468. >>> checker.check_output("user: ...\\nname: elevation",
  469. ... "user: some_user\\nname: elevation",
  470. ... optionflags=doctest.ELLIPSIS)
  471. True
  472. The ellipsis matches also an empty string, so the following matches:
  473. >>> checker.check_output("user: ...\\nname: elevation",
  474. ... "user: \\nname: elevation",
  475. ... optionflags=doctest.ELLIPSIS)
  476. True
  477. It is robust concerning misspelled matching string but does not allow
  478. ellipsis followed by a dot, e.g. at the end of the sentence:
  479. >>> checker.check_output("user: ....\\nname: elevation",
  480. ... "user: some_user\\nname: elevation",
  481. ... optionflags=doctest.ELLIPSIS)
  482. False
  483. """
  484. # this can be also global
  485. checker = doctest.OutputChecker()
  486. return checker.check_output(reference, actual, optionflags=doctest.ELLIPSIS)
  487. # optimal size depends on file system and maybe on hasher.block_size
  488. _BUFFER_SIZE = 2 ** 16
  489. # TODO: accept also open file object
  490. def file_md5(filename):
  491. """Get MD5 (check) sum of a file."""
  492. hasher = hashlib.md5()
  493. with open(filename, "rb") as f:
  494. buf = f.read(_BUFFER_SIZE)
  495. while len(buf) > 0:
  496. hasher.update(buf)
  497. buf = f.read(_BUFFER_SIZE)
  498. return hasher.hexdigest()
  499. def text_file_md5(
  500. filename, exclude_lines=None, exclude_re=None, prepend_lines=None, append_lines=None
  501. ):
  502. """Get a MD5 (check) sum of a text file.
  503. Works in the same way as `file_md5()` function but ignores newlines
  504. characters and excludes lines from the file as well as prepend or
  505. append them if requested.
  506. :param exclude_lines: list of strings to be excluded
  507. (newline characters should not be part of the strings)
  508. :param exclude_re: regular expression string;
  509. lines matching this regular expression will not be considered
  510. :param prepend_lines: list of lines to be prepended to the file
  511. before computing the sum
  512. :param append_lines: list of lines to be appended to the file
  513. before computing the sum
  514. """
  515. hasher = hashlib.md5()
  516. if exclude_re:
  517. regexp = re.compile(exclude_re)
  518. if prepend_lines:
  519. for line in prepend_lines:
  520. hasher.update(line if sys.version_info[0] == 2 else encode(line))
  521. with open(filename, "r") as f:
  522. for line in f:
  523. # replace platform newlines by standard newline
  524. if os.linesep != "\n":
  525. line = line.rstrip(os.linesep) + "\n"
  526. if exclude_lines and line in exclude_lines:
  527. continue
  528. if exclude_re and regexp.match(line):
  529. continue
  530. hasher.update(line if sys.version_info[0] == 2 else encode(line))
  531. if append_lines:
  532. for line in append_lines:
  533. hasher.update(line if sys.version_info[0] == 2 else encode(line))
  534. return hasher.hexdigest()
  535. def files_equal_md5(filename_a, filename_b):
  536. """Check equality of two files according to their MD5 sums"""
  537. return file_md5(filename_a) == file_md5(filename_b)
  538. def main(): # pragma: no cover
  539. """Run the doctest"""
  540. ret = doctest.testmod()
  541. return ret.failed
  542. if __name__ == "__main__": # pragma: no cover
  543. sys.exit(main())