module.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594
  1. # -*- coding: utf-8 -*-
  2. from __future__ import (nested_scopes, generators, division, absolute_import,
  3. with_statement, print_function, unicode_literals)
  4. import sys
  5. from multiprocessing import cpu_count
  6. if sys.version_info[0] == 2:
  7. from itertools import izip_longest as zip_longest
  8. else:
  9. from itertools import zip_longest
  10. from xml.etree.ElementTree import fromstring
  11. import time
  12. from grass.exceptions import CalledModuleError
  13. from grass.script.core import Popen, PIPE
  14. from grass.pygrass.errors import GrassError, ParameterError
  15. from grass.pygrass.functions import docstring_property
  16. from grass.pygrass.modules.interface.parameter import Parameter
  17. from grass.pygrass.modules.interface.flag import Flag
  18. from grass.pygrass.modules.interface.typedict import TypeDict
  19. from grass.pygrass.modules.interface.read import GETFROMTAG, DOC
  20. class ParallelModuleQueue(object):
  21. """This class is designed to run an arbitrary number of pygrass Module
  22. processes in parallel.
  23. Objects of type grass.pygrass.modules.Module can be put into the
  24. queue using put() method. When the queue is full with the maximum
  25. number of parallel processes it will wait for all processes to finish,
  26. sets the stdout and stderr of the Module object and removes it
  27. from the queue when its finished.
  28. This class will raise a GrassError in case a Module process exits
  29. with a return code other than 0.
  30. Usage:
  31. >>> import copy
  32. >>> from grass.pygrass.modules import Module, ParallelModuleQueue
  33. >>> mapcalc_list = []
  34. >>> mapcalc = Module("r.mapcalc", overwrite=True, run_=False)
  35. >>> queue = ParallelModuleQueue(nprocs=3)
  36. >>> for i in xrange(5):
  37. ... new_mapcalc = copy.deepcopy(mapcalc)
  38. ... mapcalc_list.append(new_mapcalc)
  39. ... new_mapcalc(expression="test_pygrass_%i = %i"%(i, i))
  40. ... queue.put(new_mapcalc)
  41. Module('r.mapcalc')
  42. Module('r.mapcalc')
  43. Module('r.mapcalc')
  44. Module('r.mapcalc')
  45. Module('r.mapcalc')
  46. >>> queue.wait()
  47. >>> for mapcalc in mapcalc_list:
  48. ... print(mapcalc.popen.returncode)
  49. 0
  50. 0
  51. 0
  52. 0
  53. 0
  54. """
  55. def __init__(self, nprocs=1):
  56. """Constructor
  57. :param nprocs: The maximum number of Module processes that
  58. can be run in parallel, defualt is 1, if None
  59. then use all the available CPUs.
  60. :type nprocs: int
  61. """
  62. nprocs = int(nprocs) if nprocs else cpu_count()
  63. self._num_procs = nprocs
  64. self._list = nprocs * [None]
  65. self._proc_count = 0
  66. def put(self, module):
  67. """Put the next Module object in the queue
  68. To run the Module objects in parallel the run_ and finish_ options
  69. of the Module must be set to False.
  70. :param module: a preconfigured Module object with run_ and finish_
  71. set to False
  72. :type module: Module object
  73. """
  74. self._list[self._proc_count] = module
  75. # Force that finish is False, otherwise the execution
  76. # will not be parallel
  77. self._list[self._proc_count].finish_ = False
  78. self._list[self._proc_count].run()
  79. self._proc_count += 1
  80. if self._proc_count == self._num_procs:
  81. self.wait()
  82. def get(self, num):
  83. """Get a Module object from the queue
  84. :param num: the number of the object in queue
  85. :type num: int
  86. :returns: the Module object or None if num is not in the queue
  87. """
  88. if num < self._num_procs:
  89. return self._list[num]
  90. return None
  91. def get_num_run_procs(self):
  92. """Get the number of Module processes that are in the queue running
  93. or finished
  94. :returns: the maximum number fo Module processes running/finished in
  95. the queue
  96. """
  97. return len(self._list)
  98. def get_max_num_procs(self):
  99. """Return the maximum number of parallel Module processes
  100. """
  101. return self._num_procs
  102. def set_max_num_procs(self, nprocs):
  103. """Set the maximum number of Module processes that should run
  104. in parallel
  105. :param nprocs: The maximum number of Module processes that
  106. can be run in parallel
  107. :type nprocs: int
  108. """
  109. self._num_procs = int(nprocs)
  110. self.wait()
  111. def wait(self):
  112. """Wait for all Module processes that are in the list to finish
  113. and set the modules stdout and stderr output options
  114. """
  115. for proc in self._list:
  116. if proc:
  117. stdout, stderr = proc.popen.communicate(input=proc.stdin)
  118. proc.outputs['stdout'].value = stdout if stdout else ''
  119. proc.outputs['stderr'].value = stderr if stderr else ''
  120. if proc.popen.returncode != 0:
  121. GrassError(("Error running module %s") % (proc.name))
  122. self._list = self._num_procs * [None]
  123. self._proc_count = 0
  124. class Module(object):
  125. """This class is design to wrap/run/interact with the GRASS modules.
  126. The class during the init phase read the XML description generate using
  127. the ``--interface-description`` in order to understand which parameters
  128. are required which optionals. ::
  129. >>> from grass.pygrass.modules import Module
  130. >>> from subprocess import PIPE
  131. >>> import copy
  132. >>> region = Module("g.region")
  133. >>> region.flags.p = True # set flags
  134. >>> region.flags.u = True
  135. >>> region.flags["3"].value = True # set numeric flags
  136. >>> region.get_bash()
  137. u'g.region -p -3 -u'
  138. >>> new_region = copy.deepcopy(region)
  139. >>> new_region.inputs.res = "10"
  140. >>> new_region.get_bash()
  141. u'g.region res=10 -p -3 -u'
  142. >>> neighbors = Module("r.neighbors")
  143. >>> neighbors.inputs.input = "mapA"
  144. >>> neighbors.outputs.output = "mapB"
  145. >>> neighbors.inputs.size = 5
  146. >>> neighbors.inputs.quantile = 0.5
  147. >>> neighbors.get_bash()
  148. u'r.neighbors input=mapA method=average size=5 quantile=0.5 output=mapB'
  149. >>> new_neighbors1 = copy.deepcopy(neighbors)
  150. >>> new_neighbors1.inputs.input = "mapD"
  151. >>> new_neighbors1.inputs.size = 3
  152. >>> new_neighbors1.inputs.quantile = 0.5
  153. >>> new_neighbors1.get_bash()
  154. u'r.neighbors input=mapD method=average size=3 quantile=0.5 output=mapB'
  155. >>> new_neighbors2 = copy.deepcopy(neighbors)
  156. >>> new_neighbors2(input="mapD", size=3, run_=False)
  157. Module('r.neighbors')
  158. >>> new_neighbors2.get_bash()
  159. u'r.neighbors input=mapD method=average size=3 quantile=0.5 output=mapB'
  160. >>> neighbors = Module("r.neighbors")
  161. >>> neighbors.get_bash()
  162. u'r.neighbors method=average size=3'
  163. >>> new_neighbors3 = copy.deepcopy(neighbors)
  164. >>> new_neighbors3(input="mapA", size=3, output="mapB", run_=False)
  165. Module('r.neighbors')
  166. >>> new_neighbors3.get_bash()
  167. u'r.neighbors input=mapA method=average size=3 output=mapB'
  168. >>> mapcalc = Module("r.mapcalc", expression="test_a = 1",
  169. ... overwrite=True, run_=False)
  170. >>> mapcalc.run()
  171. Module('r.mapcalc')
  172. >>> mapcalc.popen.returncode
  173. 0
  174. >>> colors = Module("r.colors", map="test_a", rules="-",
  175. ... run_=False, stdout_=PIPE,
  176. ... stderr_=PIPE, stdin_="1 red")
  177. >>> colors.run()
  178. Module('r.colors')
  179. >>> colors.popen.returncode
  180. 0
  181. >>> colors.inputs["stdin"].value
  182. u'1 red'
  183. >>> colors.outputs["stdout"].value
  184. u''
  185. >>> colors.outputs["stderr"].value.strip()
  186. "Color table for raster map <test_a> set to 'rules'"
  187. >>> colors = Module("r.colors", map="test_a", rules="-",
  188. ... run_=False, finish_=False, stdin_=PIPE)
  189. >>> colors.run()
  190. Module('r.colors')
  191. >>> stdout, stderr = colors.popen.communicate(input="1 red")
  192. >>> colors.popen.returncode
  193. 0
  194. >>> stdout
  195. >>> stderr
  196. >>> colors = Module("r.colors", map="test_a", rules="-",
  197. ... run_=False, finish_=False,
  198. ... stdin_=PIPE, stderr_=PIPE)
  199. >>> colors.run()
  200. Module('r.colors')
  201. >>> stdout, stderr = colors.popen.communicate(input="1 red")
  202. >>> colors.popen.returncode
  203. 0
  204. >>> stdout
  205. >>> stderr.strip()
  206. "Color table for raster map <test_a> set to 'rules'"
  207. Run a second time
  208. >>> colors.run()
  209. Module('r.colors')
  210. >>> stdout, stderr = colors.popen.communicate(input="1 blue")
  211. >>> colors.popen.returncode
  212. 0
  213. >>> stdout
  214. >>> stderr.strip()
  215. "Color table for raster map <test_a> set to 'rules'"
  216. Multiple run test
  217. >>> colors = Module("r.colors", map="test_a",
  218. ... color="ryb", run_=False)
  219. >>> colors.run()
  220. Module('r.colors')
  221. >>> colors(color="gyr")
  222. Module('r.colors')
  223. >>> colors.run()
  224. Module('r.colors')
  225. >>> colors(color="ryg")
  226. Module('r.colors')
  227. >>> colors(stderr_=PIPE)
  228. Module('r.colors')
  229. >>> colors.run()
  230. Module('r.colors')
  231. >>> print(colors.outputs["stderr"].value.strip())
  232. Color table for raster map <test_a> set to 'ryg'
  233. >>> colors(color="byg")
  234. Module('r.colors')
  235. >>> colors(stdout_=PIPE)
  236. Module('r.colors')
  237. >>> colors.run()
  238. Module('r.colors')
  239. >>> print(colors.outputs["stderr"].value.strip())
  240. Color table for raster map <test_a> set to 'byg'
  241. Often in the Module class you can find ``*args`` and ``kwargs`` annotation
  242. in methods, like in the __call__ method.
  243. Python allow developers to not specify all the arguments and
  244. keyword arguments of a method or function. ::
  245. def f(*args):
  246. for arg in args:
  247. print arg
  248. therefore if we call the function like: ::
  249. >>> f('grass', 'gis', 'modules') # doctest: +SKIP
  250. grass
  251. gis
  252. modules
  253. or we can define a new list: ::
  254. >>> words = ['grass', 'gis', 'modules'] # doctest: +SKIP
  255. >>> f(*words) # doctest: +SKIP
  256. grass
  257. gis
  258. modules
  259. we can do the same with keyword arguments, rewrite the above function: ::
  260. def f(*args, **kargs):
  261. for arg in args:
  262. print arg
  263. for key, value in kargs.items():
  264. print "%s = %r" % (key, value)
  265. now we can use the new function, with: ::
  266. >>> f('grass', 'gis', 'modules', os = 'linux', language = 'python')
  267. ... # doctest: +SKIP
  268. grass
  269. gis
  270. modules
  271. os = 'linux'
  272. language = 'python'
  273. or, as before we can, define a dictionary and give the dictionary to
  274. the function, like: ::
  275. >>> keywords = {'os' : 'linux', 'language' : 'python'}
  276. ... # doctest: +SKIP
  277. >>> f(*words, **keywords) # doctest: +SKIP
  278. grass
  279. gis
  280. modules
  281. os = 'linux'
  282. language = 'python'
  283. In the Module class we heavily use this language feature to pass arguments
  284. and keyword arguments to the grass module.
  285. """
  286. def __init__(self, cmd, *args, **kargs):
  287. if isinstance(cmd, unicode):
  288. self.name = str(cmd)
  289. elif isinstance(cmd, str):
  290. self.name = cmd
  291. else:
  292. raise GrassError("Problem initializing the module {s}".format(s=cmd))
  293. try:
  294. # call the command with --interface-description
  295. get_cmd_xml = Popen([cmd, "--interface-description"], stdout=PIPE)
  296. except OSError as e:
  297. print("OSError error({0}): {1}".format(e.errno, e.strerror))
  298. str_err = "Error running: `%s --interface-description`."
  299. raise GrassError(str_err % self.name)
  300. # get the xml of the module
  301. self.xml = get_cmd_xml.communicate()[0]
  302. # transform and parse the xml into an Element class:
  303. # http://docs.python.org/library/xml.etree.elementtree.html
  304. tree = fromstring(self.xml)
  305. for e in tree:
  306. if e.tag not in ('parameter', 'flag'):
  307. self.__setattr__(e.tag, GETFROMTAG[e.tag](e))
  308. #
  309. # extract parameters from the xml
  310. #
  311. self.params_list = [Parameter(p) for p in tree.findall("parameter")]
  312. self.inputs = TypeDict(Parameter)
  313. self.outputs = TypeDict(Parameter)
  314. self.required = []
  315. # Insert parameters into input/output and required
  316. for par in self.params_list:
  317. if par.input:
  318. self.inputs[par.name] = par
  319. else:
  320. self.outputs[par.name] = par
  321. if par.required:
  322. self.required.append(par.name)
  323. #
  324. # extract flags from the xml
  325. #
  326. flags_list = [Flag(f) for f in tree.findall("flag")]
  327. self.flags = TypeDict(Flag)
  328. for flag in flags_list:
  329. self.flags[flag.name] = flag
  330. #
  331. # Add new attributes to the class
  332. #
  333. self.run_ = True
  334. self.finish_ = True
  335. self.env_ = None
  336. self.stdin_ = None
  337. self.stdin = None
  338. self.stdout_ = None
  339. self.stderr_ = None
  340. diz = {'name': 'stdin', 'required': False,
  341. 'multiple': False, 'type': 'all',
  342. 'value': None}
  343. self.inputs['stdin'] = Parameter(diz=diz)
  344. diz['name'] = 'stdout'
  345. self.outputs['stdout'] = Parameter(diz=diz)
  346. diz['name'] = 'stderr'
  347. self.outputs['stderr'] = Parameter(diz=diz)
  348. self.popen = None
  349. self.time = None
  350. if args or kargs:
  351. self.__call__(*args, **kargs)
  352. self.__call__.__func__.__doc__ = self.__doc__
  353. def __call__(self, *args, **kargs):
  354. """Set module paramters to the class and, if run_ is True execute the
  355. module, therefore valid parameters are all the module parameters
  356. plus some extra parameters that are: run_, stdin_, stdout_, stderr_,
  357. env_ and finish_.
  358. """
  359. if not args and not kargs:
  360. self.run()
  361. return self
  362. #
  363. # check for extra kargs, set attribute and remove from dictionary
  364. #
  365. if 'flags' in kargs:
  366. for flg in kargs['flags']:
  367. self.flags[flg].value = True
  368. del(kargs['flags'])
  369. # set attributs
  370. for key in ('run_', 'env_', 'finish_', 'stdout_', 'stderr_'):
  371. if key in kargs:
  372. setattr(self, key, kargs.pop(key))
  373. # set inputs
  374. for key in ('stdin_', ):
  375. if key in kargs:
  376. self.inputs[key[:-1]].value = kargs.pop(key)
  377. #
  378. # check args
  379. #
  380. for param, arg in zip(self.params_list, args):
  381. param.value = arg
  382. for key, val in kargs.items():
  383. if key in self.inputs:
  384. self.inputs[key].value = val
  385. elif key in self.outputs:
  386. self.outputs[key].value = val
  387. elif key in self.flags:
  388. # we need to add this, because some parameters (overwrite,
  389. # verbose and quiet) work like parameters
  390. self.flags[key].value = val
  391. else:
  392. raise ParameterError('%s is not a valid parameter.' % key)
  393. #
  394. # check if execute
  395. #
  396. if self.run_:
  397. #
  398. # check reqire parameters
  399. #
  400. for k in self.required:
  401. if ((k in self.inputs and self.inputs[k].value is None) or
  402. (k in self.outputs and self.outputs[k].value is None)):
  403. msg = "Required parameter <%s> not set."
  404. raise ParameterError(msg % k)
  405. return self.run()
  406. return self
  407. def get_bash(self):
  408. """Return a BASH rapresentation of the Module."""
  409. return ' '.join(self.make_cmd())
  410. def get_python(self):
  411. """Return a Python rapresentation of the Module."""
  412. prefix = self.name.split('.')[0]
  413. name = '_'.join(self.name.split('.')[1:])
  414. params = ', '.join([par.get_python() for par in self.params_list
  415. if par.get_python() != ''])
  416. flags = ''.join([flg.get_python()
  417. for flg in self.flags.values()
  418. if not flg.special and flg.get_python() != ''])
  419. special = ', '.join([flg.get_python()
  420. for flg in self.flags.values()
  421. if flg.special and flg.get_python() != ''])
  422. # pre name par flg special
  423. if flags and special:
  424. return "%s.%s(%s, flags=%r, %s)" % (prefix, name, params,
  425. flags, special)
  426. elif flags:
  427. return "%s.%s(%s, flags=%r)" % (prefix, name, params, flags)
  428. elif special:
  429. return "%s.%s(%s, %s)" % (prefix, name, params, special)
  430. else:
  431. return "%s.%s(%s)" % (prefix, name, params)
  432. def __str__(self):
  433. """Return the command string that can be executed in a shell"""
  434. return ' '.join(self.make_cmd())
  435. def __repr__(self):
  436. return "Module(%r)" % self.name
  437. @docstring_property(__doc__)
  438. def __doc__(self):
  439. """{cmd_name}({cmd_params})
  440. """
  441. head = DOC['head'].format(cmd_name=self.name,
  442. cmd_params=('\n' + # go to a new line
  443. # give space under the function name
  444. (' ' * (len(self.name) + 1))).join([', '.join(
  445. # transform each parameter in string
  446. [str(param) for param in line if param is not None])
  447. # make a list of parameters with only 3 param per line
  448. for line in zip_longest(*[iter(self.params_list)] * 3)]),)
  449. params = '\n'.join([par.__doc__ for par in self.params_list])
  450. flags = self.flags.__doc__
  451. return '\n'.join([head, params, DOC['flag_head'], flags, DOC['foot']])
  452. def get_dict(self):
  453. """Return a dictionary that includes the name, all valid
  454. inputs, outputs and flags
  455. """
  456. dic = {}
  457. dic['name'] = self.name
  458. dic['inputs'] = [(k, v.value) for k, v in self.inputs.items()
  459. if v.value]
  460. dic['outputs'] = [(k, v.value) for k, v in self.outputs.items()
  461. if v.value]
  462. dic['flags'] = [flg for flg in self.flags if self.flags[flg].value]
  463. return dic
  464. def make_cmd(self):
  465. """Create the command string that can be executed in a shell
  466. :returns: the command string
  467. """
  468. skip = ['stdin', 'stdout', 'stderr']
  469. args = [self.name, ]
  470. for key in self.inputs:
  471. if key not in skip and self.inputs[key].value:
  472. args.append(self.inputs[key].get_bash())
  473. for key in self.outputs:
  474. if key not in skip and self.outputs[key].value:
  475. args.append(self.outputs[key].get_bash())
  476. for flg in self.flags:
  477. if self.flags[flg].value:
  478. args.append(str(self.flags[flg]))
  479. return args
  480. def run(self, node=None):
  481. """Run the module
  482. :param node:
  483. :type node:
  484. This function will wait for the process to terminate in case
  485. finish_==True and sets up stdout and stderr. If finish_==False this
  486. function will return after starting the process. Use
  487. self.popen.communicate() of self.popen.wait() to wait for the process
  488. termination. The handling of stdout and stderr must then be done
  489. outside of this function.
  490. """
  491. if self.inputs['stdin'].value:
  492. self.stdin = self.inputs['stdin'].value
  493. self.stdin_ = PIPE
  494. cmd = self.make_cmd()
  495. start = time.time()
  496. self.popen = Popen(cmd,
  497. stdin=self.stdin_,
  498. stdout=self.stdout_,
  499. stderr=self.stderr_,
  500. env=self.env_)
  501. if self.finish_:
  502. stdout, stderr = self.popen.communicate(input=self.stdin)
  503. self.outputs['stdout'].value = stdout if stdout else ''
  504. self.outputs['stderr'].value = stderr if stderr else ''
  505. self.time = time.time() - start
  506. if self.popen.poll():
  507. raise CalledModuleError(returncode=self.popen.returncode,
  508. code=self.get_bash(),
  509. module=self.name, errors=stderr)
  510. return self
  511. ###############################################################################
  512. if __name__ == "__main__":
  513. import doctest
  514. doctest.testmod()