v.db.univar.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. #!/usr/bin/env python
  2. ############################################################################
  3. #
  4. # MODULE: v.db.univar (formerly called v.univar.sh)
  5. # AUTHOR(S): Michael Barton, Arizona State University
  6. # Converted to Python by Glynn Clements
  7. # PURPOSE: Calculates univariate statistics from a GRASS vector map attribute column.
  8. # Based on r.univar.sh by Markus Neteler
  9. # COPYRIGHT: (C) 2005, 2007, 2008 by the GRASS Development Team
  10. #
  11. # This program is free software under the GNU General Public
  12. # License (>=v2). Read the file COPYING that comes with GRASS
  13. # for details.
  14. #
  15. #############################################################################
  16. #%Module
  17. #% description: Calculates univariate statistics on selected table column for a GRASS vector map.
  18. #% keywords: vector, statistics
  19. #%End
  20. #%flag
  21. #% key: e
  22. #% description: Extended statistics (quartiles and 90th percentile)
  23. #%END
  24. #%option
  25. #% key: table
  26. #% type: string
  27. #% gisprompt: old,vector,vector
  28. #% description: Name of data table
  29. #% required : yes
  30. #%End
  31. #%option
  32. #% key: column
  33. #% type: string
  34. #% description: Column on which to calculate statistics (must be numeric)
  35. #% required : yes
  36. #%end
  37. #%option
  38. #% key: database
  39. #% type: string
  40. #% description: Database/directory for table
  41. #% required : no
  42. #%end
  43. #%option
  44. #% key: driver
  45. #% type: string
  46. #% description: Database driver
  47. #% required : no
  48. #%end
  49. #%option
  50. #% key: where
  51. #% type: string
  52. #% description: WHERE conditions of SQL statement without 'where' keyword
  53. #% required : no
  54. #%end
  55. import sys
  56. import os
  57. import atexit
  58. import math
  59. import grass
  60. def cleanup():
  61. for ext in ['', '.sort']:
  62. grass.try_remove(tmp + ext)
  63. def sortfile(infile, outfile):
  64. inf = file(infile, 'r')
  65. outf = file(outfile, 'w')
  66. if grass.find_program('sort', ['-n']):
  67. grass.run_command('sort', flags = 'n', stdin = inf, stdout = outf)
  68. else:
  69. # FIXME: we need a large-file sorting function
  70. grass.warning("'sort' not found: sorting in memory")
  71. lines = inf.readlines()
  72. for i in range(len(lines)):
  73. lines[i] = float(lines[i].rstrip('\r\n'))
  74. lines.sort()
  75. for line in lines:
  76. outf.write(str(line) + '\n')
  77. inf.close()
  78. outf.close()
  79. def main():
  80. global tmp
  81. tmp = grass.tempfile()
  82. extend = flags['e']
  83. table = options['table']
  84. column = options['column']
  85. database = options['database']
  86. driver = options['driver']
  87. where = options['where']
  88. grass.message("Calculation for column <%s> of table <%s>..." % (column, table))
  89. grass.message("Reading column values...")
  90. sql = "SELECT %s FROM %s" % (column, table)
  91. if where:
  92. sql += " WHERE " + where
  93. if not database:
  94. database = None
  95. if not driver:
  96. driver = None
  97. tmpf = file(tmp, 'w')
  98. grass.run_command('db.select', flags = 'c', table = table,
  99. database = database, driver = driver, sql = sql,
  100. stdout = tmpf)
  101. tmpf.close()
  102. # check if result is empty
  103. tmpf = file(tmp)
  104. if tmpf.read(1) == '':
  105. grass.fatal("Table <%s> contains no data.", table)
  106. tmpf.close()
  107. # calculate statistics
  108. grass.message("Calculating statistics...")
  109. N = 0
  110. sum = 0.0
  111. sum2 = 0.0
  112. sum3 = 0.0
  113. minv = 1e300
  114. maxv = -1e300
  115. tmpf = file(tmp)
  116. for line in tmpf:
  117. x = float(line.rstrip('\r\n'))
  118. N += 1
  119. sum += x
  120. sum2 += x * x
  121. sum3 += abs(x)
  122. maxv = max(maxv, x)
  123. minv = min(minv, x)
  124. tmpf.close()
  125. if N <= 0:
  126. grass.fatal("No non-null values found")
  127. print ""
  128. print "Number of values:", N
  129. print "Minimum:", minv
  130. print "Maximum:", maxv
  131. print "Range:", maxv - minv
  132. print "-----"
  133. print "Mean:", sum/N
  134. print "Arithmetic mean of absolute values:", sum3/N
  135. print "Variance:", (sum2 - sum*sum/N)/N
  136. print "Standard deviation:", math.sqrt((sum2 - sum*sum/N)/N)
  137. print "Coefficient of variation:", (math.sqrt((sum2 - sum*sum/N)/N))/(math.sqrt(sum*sum)/N)
  138. print "-----"
  139. if not extend:
  140. return
  141. #preparations:
  142. sortfile(tmp, tmp + ".sort")
  143. number = N
  144. odd = N % 2
  145. eostr = ['even','odd'][odd]
  146. q25pos = round(N * 0.25)
  147. q50apos = round(N * 0.50)
  148. q50bpos = q50apos + (1 - odd)
  149. q75pos = round(N * 0.75)
  150. q90pos = round(N * 0.90)
  151. inf = file(tmp + ".sort")
  152. l = 1
  153. for line in inf:
  154. if l == q25pos:
  155. q25 = float(line.rstrip('\r\n'))
  156. if l == q50apos:
  157. q50a = float(line.rstrip('\r\n'))
  158. if l == q50bpos:
  159. q50b = float(line.rstrip('\r\n'))
  160. if l == q75pos:
  161. q75 = float(line.rstrip('\r\n'))
  162. if l == q90pos:
  163. q90 = float(line.rstrip('\r\n'))
  164. l += 1
  165. q50 = (q50a + q50b) / 2
  166. print "1st Quartile: %f" % q25
  167. print "Median (%s N): %f" % (eostr, q50)
  168. print "3rd Quartile: %f" % q75
  169. print "90th Percentile: %f" % q90
  170. if __name__ == "__main__":
  171. options, flags = grass.parser()
  172. atexit.register(cleanup)
  173. main()