db.univar.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. #!/usr/bin/env python
  2. ############################################################################
  3. #
  4. # MODULE: db.univar (formerly called v.univar.sh)
  5. # AUTHOR(S): Michael Barton, Arizona State University
  6. # Converted to Python by Glynn Clements
  7. # Sync'ed to r.univar by Markus Metz
  8. # PURPOSE: Calculates univariate statistics from a GRASS vector map attribute column.
  9. # Based on r.univar.sh by Markus Neteler
  10. # COPYRIGHT: (C) 2005, 2007, 2008 by the GRASS Development Team
  11. #
  12. # This program is free software under the GNU General Public
  13. # License (>=v2). Read the file COPYING that comes with GRASS
  14. # for details.
  15. #
  16. #############################################################################
  17. #%module
  18. #% description: Calculates univariate statistics on selected table column.
  19. #% keywords: database
  20. #% keywords: statistics
  21. #% keywords: attribute table
  22. #%end
  23. #%option G_OPT_DB_TABLE
  24. #% key: table
  25. #% required: yes
  26. #%end
  27. #%option G_OPT_DB_COLUMN
  28. #% description: Name of attribute column on which to calculate statistics (must be numeric)
  29. #% required: yes
  30. #%end
  31. #%option G_OPT_DB_DATABASE
  32. #%end
  33. #%option G_OPT_DB_DRIVER
  34. #% options: dbf,odbc,ogr,sqlite,pg
  35. #%end
  36. #%option G_OPT_DB_WHERE
  37. #%end
  38. #%option
  39. #% key: percentile
  40. #% type: double
  41. #% description: Percentile to calculate (requires extended statistics flag)
  42. #% required : no
  43. #% answer: 90
  44. #% options: 0-100
  45. #% multiple: yes
  46. #%end
  47. #%flag
  48. #% key: e
  49. #% description: Extended statistics (quartiles and 90th percentile)
  50. #%end
  51. #%flag
  52. #% key: g
  53. #% description: Print stats in shell script style
  54. #%end
  55. import sys
  56. import os
  57. import atexit
  58. import math
  59. from grass.script.utils import try_remove
  60. from grass.script import core as grass
  61. def cleanup():
  62. for ext in ['', '.sort']:
  63. try_remove(tmp + ext)
  64. def sortfile(infile, outfile):
  65. inf = file(infile, 'r')
  66. outf = file(outfile, 'w')
  67. if grass.find_program('sort', '--help'):
  68. grass.run_command('sort', flags = 'n', stdin = inf, stdout = outf)
  69. else:
  70. # FIXME: we need a large-file sorting function
  71. grass.warning(_("'sort' not found: sorting in memory"))
  72. lines = inf.readlines()
  73. for i in range(len(lines)):
  74. lines[i] = float(lines[i].rstrip('\r\n'))
  75. lines.sort()
  76. for line in lines:
  77. outf.write(str(line) + '\n')
  78. inf.close()
  79. outf.close()
  80. def main():
  81. global tmp
  82. tmp = grass.tempfile()
  83. extend = flags['e']
  84. shellstyle = flags['g']
  85. table = options['table']
  86. column = options['column']
  87. database = options['database']
  88. driver = options['driver']
  89. where = options['where']
  90. perc = options['percentile']
  91. perc = [float(p) for p in perc.split(',')]
  92. if not shellstyle:
  93. grass.message(_("Calculation for column <%s> of table <%s>...") % (column, table))
  94. grass.message(_("Reading column values..."))
  95. sql = "SELECT %s FROM %s" % (column, table)
  96. if where:
  97. sql += " WHERE " + where
  98. if not database:
  99. database = None
  100. if not driver:
  101. driver = None
  102. tmpf = file(tmp, 'w')
  103. grass.run_command('db.select', flags = 'c', table = table,
  104. database = database, driver = driver, sql = sql,
  105. stdout = tmpf)
  106. tmpf.close()
  107. # check if result is empty
  108. tmpf = file(tmp)
  109. if tmpf.read(1) == '':
  110. grass.fatal(_("Table <%s> contains no data.") % table)
  111. tmpf.close()
  112. # calculate statistics
  113. if not shellstyle:
  114. grass.message(_("Calculating statistics..."))
  115. N = 0
  116. sum = 0.0
  117. sum2 = 0.0
  118. sum3 = 0.0
  119. minv = 1e300
  120. maxv = -1e300
  121. tmpf = file(tmp)
  122. for line in tmpf:
  123. if len(line.rstrip('\r\n')) == 0:
  124. continue
  125. x = float(line.rstrip('\r\n'))
  126. N += 1
  127. sum += x
  128. sum2 += x * x
  129. sum3 += abs(x)
  130. maxv = max(maxv, x)
  131. minv = min(minv, x)
  132. tmpf.close()
  133. if N <= 0:
  134. grass.fatal(_("No non-null values found"))
  135. if not shellstyle:
  136. print ""
  137. print "Number of values: %d" % N
  138. print "Minimum: %.15g" % minv
  139. print "Maximum: %.15g" % maxv
  140. print "Range: %.15g" % (maxv - minv)
  141. print "-----"
  142. print "Mean: %.15g" % (sum/N)
  143. print "Arithmetic mean of absolute values: %.15g" % (sum3/N)
  144. print "Variance: %.15g" % ((sum2 - sum*sum/N)/N)
  145. print "Standard deviation: %.15g" % (math.sqrt((sum2 - sum*sum/N)/N))
  146. print "Coefficient of variation: %.15g" % ((math.sqrt((sum2 - sum*sum/N)/N))/(math.sqrt(sum*sum)/N))
  147. print "Sum: %.15g" % sum
  148. print "-----"
  149. else:
  150. print "n=%d" % N
  151. print "min=%.15g" % minv
  152. print "max=%.15g" % maxv
  153. print "range=%.15g" % (maxv - minv)
  154. print "mean=%.15g" % (sum/N)
  155. print "mean_abs=%.15g" % (sum3/N)
  156. print "variance=%.15g" % ((sum2 - sum*sum/N)/N)
  157. print "stddev=%.15g" % (math.sqrt((sum2 - sum*sum/N)/N))
  158. print "coeff_var=%.15g" % ((math.sqrt((sum2 - sum*sum/N)/N))/(math.sqrt(sum*sum)/N))
  159. print "sum=%.15g" % sum
  160. if not extend:
  161. return
  162. # preparations:
  163. sortfile(tmp, tmp + ".sort")
  164. number = N
  165. odd = N % 2
  166. eostr = ['even','odd'][odd]
  167. q25pos = round(N * 0.25)
  168. if q25pos == 0:
  169. q25pos = 1
  170. q50apos = round(N * 0.50)
  171. if q50apos == 0:
  172. q50apos = 1
  173. q50bpos = q50apos + (1 - odd)
  174. q75pos = round(N * 0.75)
  175. if q75pos == 0:
  176. q75pos = 1
  177. ppos = {}
  178. pval = {}
  179. for i in range(len(perc)):
  180. ppos[i] = round(N * perc[i] / 100)
  181. if ppos[i] == 0:
  182. ppos[i] = 1
  183. pval[i] = 0
  184. inf = file(tmp + ".sort")
  185. l = 1
  186. for line in inf:
  187. if l == q25pos:
  188. q25 = float(line.rstrip('\r\n'))
  189. if l == q50apos:
  190. q50a = float(line.rstrip('\r\n'))
  191. if l == q50bpos:
  192. q50b = float(line.rstrip('\r\n'))
  193. if l == q75pos:
  194. q75 = float(line.rstrip('\r\n'))
  195. for i in range(len(ppos)):
  196. if l == ppos[i]:
  197. pval[i] = float(line.rstrip('\r\n'))
  198. l += 1
  199. q50 = (q50a + q50b) / 2
  200. if not shellstyle:
  201. print "1st Quartile: %.15g" % q25
  202. print "Median (%s N): %.15g" % (eostr, q50)
  203. print "3rd Quartile: %.15g" % q75
  204. for i in range(len(perc)):
  205. if perc[i] == int(perc[i]): # integer
  206. if int(perc[i]) % 10 == 1 and int(perc[i]) != 11:
  207. print "%dst Percentile: %.15g" % (int(perc[i]), pval[i])
  208. elif int(perc[i]) % 10 == 2 and int(perc[i]) != 12:
  209. print "%dnd Percentile: %.15g" % (int(perc[i]), pval[i])
  210. elif int(perc[i]) % 10 == 3 and int(perc[i]) != 13:
  211. print "%drd Percentile: %.15g" % (int(perc[i]), pval[i])
  212. else:
  213. print "%dth Percentile: %.15g" % (int(perc[i]), pval[i])
  214. else:
  215. print "%.15g Percentile: %.15g" % (perc[i], pval[i])
  216. else:
  217. print "first_quartile=%.15g" % q25
  218. print "median=%.15g" % q50
  219. print "third_quartile=%.15g" % q75
  220. for i in range(len(perc)):
  221. percstr = "%.15g" % perc[i]
  222. percstr = percstr.replace('.','_')
  223. print "percentile_%s=%.15g" % (percstr, pval[i])
  224. if __name__ == "__main__":
  225. options, flags = grass.parser()
  226. atexit.register(cleanup)
  227. main()