v.rast.stats.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511
  1. #!/usr/bin/env python3
  2. ############################################################################
  3. #
  4. # MODULE: v.rast.stats
  5. # AUTHOR(S): Markus Neteler
  6. # converted to Python by Glynn Clements
  7. # speed up by Markus Metz
  8. # add column choose by Luca Delucchi
  9. # PURPOSE: Calculates univariate statistics from a GRASS raster map
  10. # only for areas covered by vector objects on a per-category base
  11. # COPYRIGHT: (C) 2005-2016 by the GRASS Development Team
  12. #
  13. # This program is free software under the GNU General Public
  14. # License (>=v2). Read the file COPYING that comes with GRASS
  15. # for details.
  16. #
  17. #############################################################################
  18. # %module
  19. # % description: Calculates univariate statistics from a raster map based on a vector map and uploads statistics to new attribute columns.
  20. # % keyword: vector
  21. # % keyword: statistics
  22. # % keyword: raster
  23. # % keyword: univariate statistics
  24. # % keyword: zonal statistics
  25. # % keyword: sampling
  26. # % keyword: querying
  27. # %end
  28. # %flag
  29. # % key: c
  30. # % description: Continue if upload column(s) already exist
  31. # %end
  32. # %flag
  33. # % key: d
  34. # % label: Create densified lines (default: thin lines)
  35. # % description: All cells touched by the line will be set, not only those on the render path
  36. # %end
  37. # %option G_OPT_V_MAP
  38. # %end
  39. # %option G_OPT_V_FIELD
  40. # %end
  41. # %option G_OPT_V_TYPE
  42. # %end
  43. # %option G_OPT_DB_WHERE
  44. # %end
  45. # %option G_OPT_R_INPUTS
  46. # % key: raster
  47. # % description: Name of input raster map to calculate statistics from
  48. # %end
  49. # %option
  50. # % key: column_prefix
  51. # % type: string
  52. # % description: Column prefix for new attribute columns
  53. # % required : yes
  54. # % multiple: yes
  55. # %end
  56. # %option
  57. # % key: method
  58. # % type: string
  59. # % description: The methods to use
  60. # % required: no
  61. # % multiple: yes
  62. # % options: number,null_cells,minimum,maximum,range,average,stddev,variance,coeff_var,sum,first_quartile,median,third_quartile,percentile
  63. # % answer: number,null_cells,minimum,maximum,range,average,stddev,variance,coeff_var,sum,first_quartile,median,third_quartile,percentile
  64. # %end
  65. # %option
  66. # % key: percentile
  67. # % type: integer
  68. # % description: Percentile to calculate
  69. # % options: 0-100
  70. # % answer: 90
  71. # % required : no
  72. # %end
  73. import sys
  74. import os
  75. import atexit
  76. import grass.script as grass
  77. from grass.script.utils import decode
  78. from grass.exceptions import CalledModuleError
  79. def cleanup():
  80. if rastertmp:
  81. grass.run_command(
  82. "g.remove", flags="f", type="raster", name=rastertmp, quiet=True
  83. )
  84. # for f in [tmp, tmpname, sqltmp]:
  85. # grass.try_remove(f)
  86. def main():
  87. global tmp, sqltmp, tmpname, nuldev, vector, rastertmp
  88. rastertmp = False
  89. # setup temporary files
  90. tmp = grass.tempfile()
  91. sqltmp = tmp + ".sql"
  92. # we need a random name
  93. tmpname = grass.basename(tmp)
  94. nuldev = open(os.devnull, "w")
  95. rasters = options["raster"].split(",")
  96. colprefixes = options["column_prefix"].split(",")
  97. vector = options["map"]
  98. layer = options["layer"]
  99. vtypes = options["type"]
  100. where = options["where"]
  101. percentile = options["percentile"]
  102. basecols = options["method"].split(",")
  103. # Get current mapset
  104. env = grass.gisenv()
  105. mapset = env["MAPSET"]
  106. # Get mapset of the vector
  107. vs = vector.split("@")
  108. if len(vs) > 1:
  109. vect_mapset = vs[1]
  110. else:
  111. vect_mapset = mapset
  112. # does map exist in CURRENT mapset?
  113. if vect_mapset != mapset or not grass.find_file(vector, "vector", mapset)["file"]:
  114. grass.fatal(_("Vector map <%s> not found in current mapset") % vector)
  115. # check if DBF driver used, in this case cut to 10 chars col names:
  116. try:
  117. fi = grass.vector_db(map=vector)[int(layer)]
  118. except KeyError:
  119. grass.fatal(
  120. _(
  121. "There is no table connected to this map. Run v.db.connect or v.db.addtable first."
  122. )
  123. )
  124. # we need this for non-DBF driver:
  125. dbfdriver = fi["driver"] == "dbf"
  126. # colprefix for every raster map?
  127. if len(colprefixes) != len(rasters):
  128. grass.fatal(
  129. _(
  130. "Number of raster maps ({0}) different from \
  131. number of column prefixes ({1})".format(
  132. len(rasters), len(colprefixes)
  133. )
  134. )
  135. )
  136. vector = vs[0]
  137. rastertmp = "%s_%s" % (vector, tmpname)
  138. for raster in rasters:
  139. # check the input raster map
  140. if not grass.find_file(raster, "cell")["file"]:
  141. grass.fatal(_("Raster map <%s> not found") % raster)
  142. # save current settings:
  143. grass.use_temp_region()
  144. # Temporarily aligning region resolution to $RASTER resolution
  145. # keep boundary settings
  146. grass.run_command("g.region", align=rasters[0])
  147. # check if DBF driver used, in this case cut to 10 chars col names:
  148. try:
  149. fi = grass.vector_db(map=vector)[int(layer)]
  150. except KeyError:
  151. grass.fatal(
  152. _(
  153. "There is no table connected to this map. "
  154. "Run v.db.connect or v.db.addtable first."
  155. )
  156. )
  157. # we need this for non-DBF driver:
  158. dbfdriver = fi["driver"] == "dbf"
  159. # Find out which table is linked to the vector map on the given layer
  160. if not fi["table"]:
  161. grass.fatal(
  162. _(
  163. "There is no table connected to this map. "
  164. "Run v.db.connect or v.db.addtable first."
  165. )
  166. )
  167. # prepare base raster for zonal statistics
  168. prepare_base_raster(vector, layer, rastertmp, vtypes, where)
  169. # get number of raster categories to be processed
  170. number = get_nr_of_categories(
  171. vector,
  172. layer,
  173. rasters,
  174. rastertmp,
  175. percentile,
  176. colprefixes,
  177. basecols,
  178. dbfdriver,
  179. flags["c"],
  180. )
  181. # calculate statistics:
  182. grass.message(_("Processing input data (%d categories)...") % number)
  183. for i in range(len(rasters)):
  184. raster = rasters[i]
  185. colprefix, variables_dbf, variables, colnames, extstat = set_up_columns(
  186. vector, layer, percentile, colprefixes[i], basecols, dbfdriver, flags["c"]
  187. )
  188. # get rid of any earlier attempts
  189. grass.try_remove(sqltmp)
  190. # do the stats
  191. perform_stats(
  192. raster,
  193. percentile,
  194. fi,
  195. dbfdriver,
  196. colprefix,
  197. variables_dbf,
  198. variables,
  199. colnames,
  200. extstat,
  201. )
  202. grass.message(_("Updating the database ..."))
  203. exitcode = 0
  204. try:
  205. grass.run_command(
  206. "db.execute", input=sqltmp, database=fi["database"], driver=fi["driver"]
  207. )
  208. grass.verbose(
  209. (
  210. _(
  211. "Statistics calculated from raster map <{raster}>"
  212. " and uploaded to attribute table"
  213. " of vector map <{vector}>."
  214. ).format(raster=raster, vector=vector)
  215. )
  216. )
  217. except CalledModuleError:
  218. grass.warning(
  219. _("Failed to upload statistics to attribute table of vector map <%s>.")
  220. % vector
  221. )
  222. exitcode = 1
  223. sys.exit(exitcode)
  224. def prepare_base_raster(vector, layer, rastertmp, vtypes, where):
  225. """Prepare base raster for zonal statistics.
  226. :param vector: name of vector map or data source for direct OGR access
  227. :param layer: layer number or name
  228. :param where: WHERE conditions of SQL statement without 'where' keyword
  229. :param rastertmp: name of temporary raster map
  230. :param vtypes: input feature type
  231. """
  232. try:
  233. nlines = grass.vector_info_topo(vector)["lines"]
  234. kwargs = {}
  235. if where:
  236. kwargs["where"] = where
  237. # Create densified lines rather than thin lines
  238. if flags["d"] and nlines > 0:
  239. kwargs["flags"] = "d"
  240. grass.run_command(
  241. "v.to.rast",
  242. input=vector,
  243. layer=layer,
  244. output=rastertmp,
  245. use="cat",
  246. type=vtypes,
  247. quiet=True,
  248. **kwargs,
  249. )
  250. except CalledModuleError:
  251. grass.fatal(_("An error occurred while converting vector to raster"))
  252. def get_nr_of_categories(
  253. vector, layer, rasters, rastertmp, percentile, colprefixes, basecols, dbfdriver, c
  254. ):
  255. """Get number of raster categories to be processed.
  256. Perform also checks of raster and vector categories. In the case of no
  257. raster categories, create the desired columns and exit.
  258. :param vector: name of vector map or data source for direct OGR access
  259. :param layer: layer number or name
  260. :param rastertmp: name of temporary raster map
  261. :return: number of raster categories or exit (if no categories found)
  262. """
  263. # dump cats to file to avoid "too many argument" problem:
  264. p = grass.pipe_command("r.category", map=rastertmp, sep=";", quiet=True)
  265. cats = []
  266. for line in p.stdout:
  267. line = decode(line)
  268. cats.append(line.rstrip("\r\n").split(";")[0])
  269. p.wait()
  270. number = len(cats)
  271. if number < 1:
  272. # create columns and exit
  273. grass.warning(_("No categories found in raster map"))
  274. for i in range(len(rasters)):
  275. set_up_columns(
  276. vector,
  277. layer,
  278. percentile,
  279. colprefixes[i],
  280. basecols,
  281. dbfdriver,
  282. flags["c"],
  283. )
  284. sys.exit(0)
  285. # Check if all categories got converted
  286. # Report categories from vector map
  287. vect_cats = (
  288. grass.read_command("v.category", input=vector, option="report", flags="g")
  289. .rstrip("\n")
  290. .split("\n")
  291. )
  292. # get number of all categories in selected layer
  293. vect_cats_n = 0 # to be modified below
  294. for vcl in vect_cats:
  295. if vcl.split(" ")[0] == layer and vcl.split(" ")[1] == "all":
  296. vect_cats_n = int(vcl.split(" ")[2])
  297. if vect_cats_n != number:
  298. grass.warning(
  299. _(
  300. "Not all vector categories converted to raster. \
  301. Converted {0} of {1}.".format(
  302. number, vect_cats_n
  303. )
  304. )
  305. )
  306. return number
  307. def set_up_columns(vector, layer, percentile, colprefix, basecols, dbfdriver, c):
  308. """Get columns-depending variables and create columns, if needed.
  309. :param vector: name of vector map or data source for direct OGR access
  310. :param layer: layer number or name
  311. :param percentile: percentile to calculate
  312. :param colprefix: column prefix for new attribute columns
  313. :param basecols: the methods to use
  314. :param dbfdriver: boolean saying if the driver is dbf
  315. :param c: boolean saying if it should continue if upload column(s) already
  316. exist
  317. :return: colprefix, variables_dbf, variables, colnames, extstat
  318. """
  319. # we need at least three chars to distinguish [mea]n from [med]ian
  320. # so colprefix can't be longer than 6 chars with DBF driver
  321. variables_dbf = {}
  322. if dbfdriver:
  323. colprefix = colprefix[:6]
  324. # by default perccol variable is used only for "variables" variable
  325. perccol = "percentile"
  326. perc = None
  327. for b in basecols:
  328. if b.startswith("p"):
  329. perc = b
  330. if perc:
  331. # namespace is limited in DBF but the % value is important
  332. if dbfdriver:
  333. perccol = "per" + percentile
  334. else:
  335. perccol = "percentile_" + percentile
  336. percindex = basecols.index(perc)
  337. basecols[percindex] = perccol
  338. # dictionary with name of methods and position in "r.univar -gt" output
  339. variables = {
  340. "number": 2,
  341. "null_cells": 3,
  342. "minimum": 4,
  343. "maximum": 5,
  344. "range": 6,
  345. "average": 7,
  346. "stddev": 9,
  347. "variance": 10,
  348. "coeff_var": 11,
  349. "sum": 12,
  350. "first_quartile": 14,
  351. "median": 15,
  352. "third_quartile": 16,
  353. perccol: 17,
  354. }
  355. # this list is used to set the 'e' flag for r.univar
  356. extracols = ["first_quartile", "median", "third_quartile", perccol]
  357. addcols = []
  358. colnames = []
  359. extstat = ""
  360. for i in basecols:
  361. # this check the complete name of out input that should be truncated
  362. for k in variables.keys():
  363. if i in k:
  364. i = k
  365. break
  366. if i in extracols:
  367. extstat = "e"
  368. # check if column already present
  369. currcolumn = "%s_%s" % (colprefix, i)
  370. if dbfdriver:
  371. currcolumn = currcolumn[:10]
  372. variables_dbf[currcolumn.replace("%s_" % colprefix, "")] = i
  373. colnames.append(currcolumn)
  374. if currcolumn in grass.vector_columns(vector, layer).keys():
  375. if not c:
  376. grass.fatal(
  377. (_("Cannot create column " "<%s> (already present). ") % currcolumn)
  378. + _("Use -c flag to update values in this column.")
  379. )
  380. else:
  381. if i == "n":
  382. coltype = "INTEGER"
  383. else:
  384. coltype = "DOUBLE PRECISION"
  385. addcols.append(currcolumn + " " + coltype)
  386. if addcols:
  387. grass.verbose(_("Adding columns '%s'") % addcols)
  388. try:
  389. grass.run_command(
  390. "v.db.addcolumn", map=vector, columns=addcols, layer=layer
  391. )
  392. except CalledModuleError:
  393. grass.fatal(_("Adding columns failed. Exiting."))
  394. return colprefix, variables_dbf, variables, colnames, extstat
  395. def perform_stats(
  396. raster,
  397. percentile,
  398. fi,
  399. dbfdriver,
  400. colprefix,
  401. variables_dbf,
  402. variables,
  403. colnames,
  404. extstat,
  405. ):
  406. with open(sqltmp, "w") as f:
  407. # do the stats
  408. p = grass.pipe_command(
  409. "r.univar",
  410. flags="t" + extstat,
  411. map=raster,
  412. zones=rastertmp,
  413. percentile=percentile,
  414. sep=";",
  415. )
  416. first_line = 1
  417. f.write("{0}\n".format(grass.db_begin_transaction(fi["driver"])))
  418. for line in p.stdout:
  419. if first_line:
  420. first_line = 0
  421. continue
  422. vars = decode(line).rstrip("\r\n").split(";")
  423. f.write("UPDATE %s SET" % fi["table"])
  424. first_var = 1
  425. for colname in colnames:
  426. variable = colname.replace("%s_" % colprefix, "", 1)
  427. if dbfdriver:
  428. variable = variables_dbf[variable]
  429. i = variables[variable]
  430. value = vars[i]
  431. # convert nan, +nan, -nan, inf, +inf, -inf, Infinity, +Infinity,
  432. # -Infinity to NULL
  433. if value.lower().endswith("nan") or "inf" in value.lower():
  434. value = "NULL"
  435. if not first_var:
  436. f.write(" , ")
  437. else:
  438. first_var = 0
  439. f.write(" %s=%s" % (colname, value))
  440. f.write(" WHERE %s=%s;\n" % (fi["key"], vars[0]))
  441. f.write("{0}\n".format(grass.db_commit_transaction(fi["driver"])))
  442. p.wait()
  443. if __name__ == "__main__":
  444. options, flags = grass.parser()
  445. atexit.register(cleanup)
  446. main()