v.db.univar.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. #!/usr/bin/env python
  2. ############################################################################
  3. #
  4. # MODULE: v.db.univar (formerly called v.univar.sh)
  5. # AUTHOR(S): Michael Barton, Arizona State University
  6. # Converted to Python by Glynn Clements
  7. # Sync'ed to r.univar by Markus Metz
  8. # PURPOSE: Calculates univariate statistics from a GRASS vector map attribute column.
  9. # Based on r.univar.sh by Markus Neteler
  10. # COPYRIGHT: (C) 2005, 2007, 2008 by the GRASS Development Team
  11. #
  12. # This program is free software under the GNU General Public
  13. # License (>=v2). Read the file COPYING that comes with GRASS
  14. # for details.
  15. #
  16. #############################################################################
  17. # TODO: this module should be named db.univar.
  18. # v.db.univar should have as input options vector and layer instead of table
  19. #%module
  20. #% description: Calculates univariate statistics on selected table column for a GRASS vector map.
  21. #% keywords: vector
  22. #% keywords: statistics
  23. #% keywords: attribute table
  24. #%end
  25. #%flag
  26. #% key: e
  27. #% description: Extended statistics (quartiles and 90th percentile)
  28. #%end
  29. #%flag
  30. #% key: g
  31. #% description: Print stats in shell script style
  32. #%end
  33. #%option G_OPT_DB_TABLE
  34. #% key: table
  35. #% required: yes
  36. #%end
  37. #%option G_OPT_DB_COLUMN
  38. #% description: Name of attribute column on which to calculate statistics (must be numeric)
  39. #% required: yes
  40. #%end
  41. #%option G_OPT_DB_DATABASE
  42. #%end
  43. #%option G_OPT_DB_DRIVER
  44. #% options: dbf,odbc,ogr,sqlite,pg
  45. #%end
  46. #%option G_OPT_DB_WHERE
  47. #%end
  48. #%option
  49. #% key: percentile
  50. #% type: double
  51. #% description: Percentile to calculate (requires extended statistics flag)
  52. #% required : no
  53. #% answer: 90
  54. #% options: 0-100
  55. #% multiple: yes
  56. #%end
  57. import sys
  58. import os
  59. import atexit
  60. import math
  61. from grass.script import core as grass
  62. def cleanup():
  63. for ext in ['', '.sort']:
  64. grass.try_remove(tmp + ext)
  65. def sortfile(infile, outfile):
  66. inf = file(infile, 'r')
  67. outf = file(outfile, 'w')
  68. if grass.find_program('sort', ['-n']):
  69. grass.run_command('sort', flags = 'n', stdin = inf, stdout = outf)
  70. else:
  71. # FIXME: we need a large-file sorting function
  72. grass.warning(_("'sort' not found: sorting in memory"))
  73. lines = inf.readlines()
  74. for i in range(len(lines)):
  75. lines[i] = float(lines[i].rstrip('\r\n'))
  76. lines.sort()
  77. for line in lines:
  78. outf.write(str(line) + '\n')
  79. inf.close()
  80. outf.close()
  81. def main():
  82. global tmp
  83. tmp = grass.tempfile()
  84. extend = flags['e']
  85. shellstyle = flags['g']
  86. table = options['table']
  87. column = options['column']
  88. database = options['database']
  89. driver = options['driver']
  90. where = options['where']
  91. perc = options['percentile']
  92. perc = [float(p) for p in perc.split(',')]
  93. if not shellstyle:
  94. grass.message(_("Calculation for column <%s> of table <%s>...") % (column, table))
  95. grass.message(_("Reading column values..."))
  96. sql = "SELECT %s FROM %s" % (column, table)
  97. if where:
  98. sql += " WHERE " + where
  99. if not database:
  100. database = None
  101. if not driver:
  102. driver = None
  103. tmpf = file(tmp, 'w')
  104. grass.run_command('db.select', flags = 'c', table = table,
  105. database = database, driver = driver, sql = sql,
  106. stdout = tmpf)
  107. tmpf.close()
  108. # check if result is empty
  109. tmpf = file(tmp)
  110. if tmpf.read(1) == '':
  111. grass.fatal(_("Table <%s> contains no data.") % table)
  112. tmpf.close()
  113. # calculate statistics
  114. if not shellstyle:
  115. grass.message(_("Calculating statistics..."))
  116. N = 0
  117. sum = 0.0
  118. sum2 = 0.0
  119. sum3 = 0.0
  120. minv = 1e300
  121. maxv = -1e300
  122. tmpf = file(tmp)
  123. for line in tmpf:
  124. if len(line.rstrip('\r\n')) == 0:
  125. continue
  126. x = float(line.rstrip('\r\n'))
  127. N += 1
  128. sum += x
  129. sum2 += x * x
  130. sum3 += abs(x)
  131. maxv = max(maxv, x)
  132. minv = min(minv, x)
  133. tmpf.close()
  134. if N <= 0:
  135. grass.fatal(_("No non-null values found"))
  136. if not shellstyle:
  137. print ""
  138. print "Number of values: %d" % N
  139. print "Minimum: %g" % minv
  140. print "Maximum: %g" % maxv
  141. print "Range: %g" % (maxv - minv)
  142. print "-----"
  143. print "Mean: %g" % (sum/N)
  144. print "Arithmetic mean of absolute values: %g" % (sum3/N)
  145. print "Variance: %g" % ((sum2 - sum*sum/N)/N)
  146. print "Standard deviation: %g" % (math.sqrt((sum2 - sum*sum/N)/N))
  147. print "Coefficient of variation: %g" % ((math.sqrt((sum2 - sum*sum/N)/N))/(math.sqrt(sum*sum)/N))
  148. print "Sum: %g" % sum
  149. print "-----"
  150. else:
  151. print "n=%d" % N
  152. print "min=%.15g" % minv
  153. print "max=%.15g" % maxv
  154. print "range=%.15g" % (maxv - minv)
  155. print "mean=%.15g" % (sum/N)
  156. print "mean_abs=%.15g" % (sum3/N)
  157. print "variance=%.15g" % ((sum2 - sum*sum/N)/N)
  158. print "stddev=%.15g" % (math.sqrt((sum2 - sum*sum/N)/N))
  159. print "coeff_var=%.15g" % ((math.sqrt((sum2 - sum*sum/N)/N))/(math.sqrt(sum*sum)/N))
  160. print "sum=%.15g" % sum
  161. if not extend:
  162. return
  163. # preparations:
  164. sortfile(tmp, tmp + ".sort")
  165. number = N
  166. odd = N % 2
  167. eostr = ['even','odd'][odd]
  168. q25pos = round(N * 0.25)
  169. if q25pos == 0:
  170. q25pos = 1
  171. q50apos = round(N * 0.50)
  172. if q50apos == 0:
  173. q50apos = 1
  174. q50bpos = q50apos + (1 - odd)
  175. q75pos = round(N * 0.75)
  176. if q75pos == 0:
  177. q75pos = 1
  178. ppos = {}
  179. pval = {}
  180. for i in range(len(perc)):
  181. ppos[i] = round(N * perc[i] / 100)
  182. if ppos[i] == 0:
  183. ppos[i] = 1
  184. pval[i] = 0
  185. inf = file(tmp + ".sort")
  186. l = 1
  187. for line in inf:
  188. if l == q25pos:
  189. q25 = float(line.rstrip('\r\n'))
  190. if l == q50apos:
  191. q50a = float(line.rstrip('\r\n'))
  192. if l == q50bpos:
  193. q50b = float(line.rstrip('\r\n'))
  194. if l == q75pos:
  195. q75 = float(line.rstrip('\r\n'))
  196. for i in range(len(ppos)):
  197. if l == ppos[i]:
  198. pval[i] = float(line.rstrip('\r\n'))
  199. l += 1
  200. q50 = (q50a + q50b) / 2
  201. if not shellstyle:
  202. print "1st Quartile: %g" % q25
  203. print "Median (%s N): %g" % (eostr, q50)
  204. print "3rd Quartile: %g" % q75
  205. for i in range(len(perc)):
  206. if perc[i] == int(perc[i]): # integer
  207. if int(perc[i]) % 10 == 1 and int(perc[i]) != 11:
  208. print "%dst Percentile: %g" % (int(perc[i]), pval[i])
  209. elif int(perc[i]) % 10 == 2 and int(perc[i]) != 12:
  210. print "%dnd Percentile: %g" % (int(perc[i]), pval[i])
  211. elif int(perc[i]) % 10 == 3 and int(perc[i]) != 13:
  212. print "%drd Percentile: %g" % (int(perc[i]), pval[i])
  213. else:
  214. print "%dth Percentile: %g" % (int(perc[i]), pval[i])
  215. else:
  216. print "%.15g Percentile: %g" % (perc[i], pval[i])
  217. else:
  218. print "first_quartile=%g" % q25
  219. print "median=%g" % q50
  220. print "third_quartile=%g" % q75
  221. for i in range(len(perc)):
  222. percstr = "%.15g" % perc[i]
  223. percstr = percstr.replace('.','_')
  224. print "percentile_%s=%g" % (percstr, pval[i])
  225. if __name__ == "__main__":
  226. options, flags = grass.parser()
  227. atexit.register(cleanup)
  228. main()