runners.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. # MODULE: grass.benchmark
  2. #
  3. # AUTHOR(S): Aaron Saw Min Sern <aaronsms u nus edu>
  4. # Vaclav Petras <wenzeslaus gmail com>
  5. #
  6. # PURPOSE: Benchmarking for GRASS GIS modules
  7. #
  8. # COPYRIGHT: (C) 2021 Vaclav Petras, and by the GRASS Development Team
  9. #
  10. # This program is free software under the GNU General Public
  11. # License (>=v2). Read the file COPYING that comes with GRASS
  12. # for details.
  13. """Basic functions for benchmarking modules"""
  14. import random
  15. import shutil
  16. from types import SimpleNamespace
  17. import grass.script as gs
  18. def benchmark_single(module, label, repeat=5):
  19. """Benchmark module as is without chaning anything.
  20. *module* is an instance of PyGRASS Module class or any object which
  21. has a *run* method which takes no arguments and executes the benchmarked code,
  22. and attribute *time* which is set to execution time after the *run*
  23. function returned. Additionally, the object should be convertible to *str*
  24. for printing.
  25. *repeat* sets how many times the each run is repeated.
  26. *label* is a text to add to the result (for user-facing display).
  27. Returns an object with attributes *time* (an average execution time),
  28. *all_times* (list of measured execution times),
  29. and *label* (the provided parameter as is).
  30. """
  31. term_size = shutil.get_terminal_size()
  32. if hasattr(module, "get_bash"):
  33. print(module.get_bash())
  34. else:
  35. print(module)
  36. min_avg = float("inf")
  37. print("\u2500" * term_size.columns)
  38. time_sum = 0
  39. measured_times = []
  40. for _ in range(repeat):
  41. module.run()
  42. print(f"{module.time}s")
  43. time_sum += module.time
  44. measured_times.append(module.time)
  45. avg = time_sum / repeat
  46. if avg < min_avg:
  47. min_avg = avg
  48. print(f"\nResult - {avg}s")
  49. print("\u2500" * term_size.columns)
  50. print(f"Best average time - {min_avg}s\n")
  51. return SimpleNamespace(
  52. all_times=measured_times,
  53. time=avg,
  54. label=label,
  55. )
  56. def benchmark_nprocs(module, label, max_nprocs, repeat=5, shuffle=True):
  57. """Benchmark module using values of nprocs up to *max_nprocs*.
  58. *module* is an instance of PyGRASS Module class or any object which
  59. has a *update* method taking *nprocs* as a keyword argument,
  60. a *run* which takes no arguments and executes the benchmarked code,
  61. and attribute *time* which is set to execution time after the *run*
  62. function returned. Additionally, the object should be convertible to *str*
  63. for printing.
  64. The module is executed for each generated value of nprocs. *max_nprocs* is used
  65. to generate a continuous range of integer values from 1 up to *max_nprocs*.
  66. *repeat* sets how many times the each run is repeated.
  67. So, the module will run ``max_nprocs * repeat`` times.
  68. Runs are executed in random order, set *shuffle* to false if they
  69. need to be executed in order based on number of threads.
  70. *label* is a text to add to the result (for user-facing display).
  71. Optional *nprocs* is passed to the module if present.
  72. Returns an object with attributes *times* (list of average execution times),
  73. *all_times* (list of lists of measured execution times), *nprocs*
  74. (list of *nprocs* values used), and *label* (the provided parameter as is).
  75. """
  76. term_size = shutil.get_terminal_size()
  77. if hasattr(module, "get_bash"):
  78. print(module.get_bash())
  79. else:
  80. print(module)
  81. min_avg = float("inf")
  82. min_time = None
  83. serial_avg = None
  84. avg_times = []
  85. all_times = []
  86. nprocs_list = list(range(1, max_nprocs + 1))
  87. nprocs_list_shuffled = sorted(nprocs_list * repeat)
  88. if shuffle:
  89. random.shuffle(nprocs_list_shuffled)
  90. times = {}
  91. print("\u2500" * term_size.columns)
  92. for nprocs in nprocs_list_shuffled:
  93. module.update(nprocs=nprocs)
  94. module.run()
  95. print(f"Run with {nprocs} thread(s) took {module.time}s\n")
  96. if nprocs in times:
  97. times[nprocs] += [module.time]
  98. else:
  99. times[nprocs] = [module.time]
  100. for nprocs in sorted(times):
  101. avg = sum(times[nprocs]) / repeat
  102. avg_times.append(avg)
  103. all_times.append(times[nprocs])
  104. if nprocs == 1:
  105. serial_avg = avg
  106. if avg < min_avg:
  107. min_avg = avg
  108. min_time = nprocs
  109. print("\u2500" * term_size.columns)
  110. if serial_avg is not None:
  111. print(f"\nSerial average time - {serial_avg}s")
  112. print(f"Best average time - {min_avg}s ({min_time} threads)\n")
  113. return SimpleNamespace(
  114. all_times=all_times,
  115. times=avg_times,
  116. nprocs=nprocs_list,
  117. label=label,
  118. )
  119. def benchmark_resolutions(module, resolutions, label, repeat=5, nprocs=None):
  120. """Benchmark module using different resolutions.
  121. *module* is an instance of PyGRASS Module class or any object
  122. with attributes as specified in :func:`benchmark_nprocs`
  123. except that the *update* method is required only when *nprocs* is set.
  124. *resolutions* is a list of resolutions to set (current region is currently
  125. used and changed but that may change in the future).
  126. *repeat* sets how many times the each run is repeated.
  127. So, the module will run ``len(resolutions) * repeat`` times.
  128. *label* is a text to add to the result (for user-facing display).
  129. Optional *nprocs* is passed to the module if present
  130. (the called module does not have to support nprocs parameter).
  131. Returns an object with attributes *times* (list of average execution times),
  132. *all_times* (list of lists of measured execution times), *resolutions*
  133. (the provided parameter as is), *cells* (number of cells in the region),
  134. and *label* (the provided parameter as is).
  135. """
  136. term_size = shutil.get_terminal_size()
  137. if hasattr(module, "get_bash"):
  138. print(module.get_bash())
  139. else:
  140. print(module)
  141. avg_times = []
  142. all_times = []
  143. n_cells = []
  144. for resolution in resolutions:
  145. gs.run_command("g.region", res=resolution)
  146. region = gs.region()
  147. n_cells.append(region["cells"])
  148. print("\u2500" * term_size.columns)
  149. print(f"Benchmark with {resolution} resolution...\n")
  150. time_sum = 0
  151. measured_times = []
  152. for _ in range(repeat):
  153. if nprocs:
  154. module.update(nprocs=nprocs)
  155. module.run()
  156. print(f"{module.time}s")
  157. time_sum += module.time
  158. measured_times.append(module.time)
  159. avg = time_sum / repeat
  160. avg_times.append(avg)
  161. all_times.append(measured_times)
  162. print(f"\nResult - {avg}s")
  163. return SimpleNamespace(
  164. all_times=all_times,
  165. times=avg_times,
  166. resolutions=resolutions,
  167. cells=n_cells,
  168. label=label,
  169. )