v.in.geonames.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. #!/usr/bin/env python
  2. ############################################################################
  3. #
  4. # MODULE: v.in.geonames
  5. #
  6. # AUTHOR(S): Markus Neteler, neteler cealp it
  7. # Converted to Python by Glynn Clements
  8. #
  9. # PURPOSE: Import geonames.org dumps
  10. # http://download.geonames.org/export/dump/
  11. #
  12. # Feature Codes: http://www.geonames.org/export/codes.html
  13. #
  14. # COPYRIGHT: (c) 2008 Markus Neteler, GRASS Development Team
  15. #
  16. # This program is free software under the GNU General Public
  17. # License (>=v2). Read the file COPYING that comes with GRASS
  18. # for details.
  19. #
  20. # TODO: fix encoding issues for Asian fonts in 'alternatename' column (v.in.ascii)
  21. # fix spurious char stuff in elevation column
  22. #############################################################################
  23. #%Module
  24. #% description: Imports geonames.org country files into a GRASS vector points map.
  25. #% keywords: vector, import, gazetteer
  26. #%End
  27. #%option
  28. #% key: input
  29. #% type: string
  30. #% key_desc: name
  31. #% description: Uncompressed geonames file from (with .txt extension)
  32. #% gisprompt: old_file,file,input
  33. #% required : yes
  34. #%end
  35. #%option
  36. #% key: output
  37. #% type: string
  38. #% key_desc: name
  39. #% gisprompt: new,vector,vector
  40. #% description: Name for output vector map
  41. #% required : yes
  42. #%end
  43. import sys
  44. import os
  45. import grass
  46. def main():
  47. infile = options['input']
  48. outfile = options['output']
  49. #### setup temporary file
  50. tmpfile = grass.tempfile()
  51. #are we in LatLong location?
  52. s = grass.read_command("g.proj", flags='j')
  53. kv = grass.parse_key_val(s)
  54. if kv['+proj'] != 'longlat':
  55. grass.fatal("This module only operates in LatLong/WGS84 locations")
  56. # input test
  57. if not os.access(infile, os.R_OK):
  58. grass.fatal("File <%s> not found" % infile)
  59. # DBF doesn't support lengthy text fields
  60. kv = grass.db_connection()
  61. dbfdriver = kv['driver'] == 'dbf'
  62. if dbfdriver:
  63. grass.warning("Since DBF driver is used, the content of the 'alternatenames' column might be cut with respect to the original Geonames.org column content")
  64. #let's go
  65. #change TAB to vertical bar
  66. num_places = 0
  67. inf = file(infile)
  68. outf = file(tmpfile, 'wb')
  69. for line in inf:
  70. fields = line.rstrip('\r\n').split('\t')
  71. line2 = '|'.join(fields) + '\n'
  72. outf.write(line2)
  73. num_places += 1
  74. outf.close()
  75. inf.close()
  76. grass.message("Converted %d place names." % num_places)
  77. # pump data into GRASS:
  78. # http://download.geonames.org/export/dump/readme.txt
  79. # The main 'geoname' table has the following fields :
  80. # ---------------------------------------------------
  81. # geonameid : integer id of record in geonames database
  82. # name : name of geographical point (utf8) varchar(200)
  83. # asciiname : name of geographical point in plain ascii characters, varchar(200)
  84. # alternatenames : alternatenames, comma separated varchar(4000)
  85. # latitude : latitude in decimal degrees (wgs84)
  86. # longitude : longitude in decimal degrees (wgs84)
  87. # feature class : see http://www.geonames.org/export/codes.html, char(1)
  88. # feature code : see http://www.geonames.org/export/codes.html, varchar(10)
  89. # country code : ISO-3166 2-letter country code, 2 characters
  90. # cc2 : alternate country codes, comma separated, ISO-3166 2-letter country code, 60 characters
  91. # admin1 code : fipscode (subject to change to iso code), isocode for the us and ch, see file admin1Codes.txt for display names of this code; varchar(20)
  92. # admin2 code : code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80)
  93. # admin3 code : code for third level administrative division, varchar(20)
  94. # admin4 code : code for fourth level administrative division, varchar(20)
  95. # population : integer
  96. # elevation : in meters, integer
  97. # gtopo30 : average elevation of 30'x30' (ca 900mx900m) area in meters, integer
  98. # timezone : the timezone id (see file http://download.geonames.org/export/dump/timeZones.txt)
  99. # modification date : date of last modification in yyyy-MM-dd format
  100. # geonameid|name|asciiname|alternatenames|latitude|longitude|featureclass|featurecode|countrycode|cc2|admin1code|admin2code|admin3code|admin4code|population|elevation|gtopo30|timezone|modificationdate
  101. # TODO: elevation seems to contain spurious char stuff :(
  102. # debug:
  103. # head -n 3 ${TMPFILE}.csv
  104. columns = ['geonameid integer',
  105. 'name varchar(200)',
  106. 'asciiname varchar(200)',
  107. 'alternatename varchar(4000)',
  108. 'latitude double precision',
  109. 'longitude double precision',
  110. 'featureclass varchar(1)',
  111. 'featurecode varchar(10)',
  112. 'countrycode varchar(2)',
  113. 'cc2 varchar(60)',
  114. 'admin1code varchar(20)',
  115. 'admin2code varchar(20)',
  116. 'admin3code varchar(20)',
  117. 'admin4code varchar(20)',
  118. 'population integer',
  119. 'elevation varchar(5)',
  120. 'gtopo30 integer',
  121. 'timezone varchar(50)',
  122. 'modification date']
  123. grass.run_command('v.in.ascii', cat = 0, x = 6, y = 5, fs = '|',
  124. input = tmpfile, output = outfile,
  125. columns = columns)
  126. grass.try_remove(tmpfile)
  127. # write cmd history:
  128. grass.vector_history(outfile)
  129. if __name__ == "__main__":
  130. options, flags = grass.parser()
  131. atexit.register(cleanup)
  132. main()