v.in.geonames.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. #!/usr/bin/env python3
  2. ############################################################################
  3. #
  4. # MODULE: v.in.geonames
  5. #
  6. # AUTHOR(S): Markus Neteler, neteler cealp it
  7. # Converted to Python by Glynn Clements
  8. #
  9. # PURPOSE: Import geonames.org dumps
  10. # http://download.geonames.org/export/dump/
  11. #
  12. # Feature Codes: http://www.geonames.org/export/codes.html
  13. #
  14. # COPYRIGHT: (c) 2008-2014 Markus Neteler, GRASS Development Team
  15. #
  16. # This program is free software under the GNU General Public
  17. # License (>=v2). Read the file COPYING that comes with GRASS
  18. # for details.
  19. #
  20. # TODO: fix encoding issues for Asian fonts in 'alternatename' column (v.in.ascii)
  21. # fix spurious char stuff in elevation column
  22. #############################################################################
  23. # %module
  24. # % description: Imports geonames.org country files into a vector points map.
  25. # % keyword: vector
  26. # % keyword: import
  27. # % keyword: gazetteer
  28. # %end
  29. # %option G_OPT_F_INPUT
  30. # % description: Name of uncompressed geonames file (with .txt extension)
  31. # %end
  32. # %option G_OPT_V_OUTPUT
  33. # %end
  34. import os
  35. import sys
  36. if sys.version_info.major == 2:
  37. from io import open
  38. import grass.script as grass
  39. def main():
  40. infile = options["input"]
  41. outfile = options["output"]
  42. # are we in LatLong location?
  43. s = grass.read_command("g.proj", flags="j")
  44. kv = grass.parse_key_val(s)
  45. if kv["+proj"] != "longlat":
  46. grass.fatal(_("This module only operates in LatLong/WGS84 locations"))
  47. # input test
  48. if not os.access(infile, os.R_OK):
  49. grass.fatal(_("File <%s> not found") % infile)
  50. # DBF doesn't support lengthy text fields
  51. kv = grass.db_connection()
  52. dbfdriver = kv["driver"] == "dbf"
  53. if dbfdriver:
  54. grass.warning(
  55. _(
  56. "Since DBF driver is used, the content of the 'alternatenames' column might be cut with respect to the original Geonames.org column content"
  57. )
  58. )
  59. with open(infile, encoding="utf-8") as f:
  60. num_places = sum(1 for each in f)
  61. grass.message(_("Converting %d place names...") % num_places)
  62. # pump data into GRASS:
  63. # http://download.geonames.org/export/dump/readme.txt
  64. # The main 'geoname' table has the following fields :
  65. # ---------------------------------------------------
  66. # geonameid : integer id of record in geonames database
  67. # name : name of geographical point (utf8) varchar(200)
  68. # asciiname : name of geographical point in plain ascii characters, varchar(200)
  69. # alternatenames : alternatenames, comma separated varchar(4000)
  70. # latitude : latitude in decimal degrees (wgs84)
  71. # longitude : longitude in decimal degrees (wgs84)
  72. # feature class : see http://www.geonames.org/export/codes.html, char(1)
  73. # feature code : see http://www.geonames.org/export/codes.html, varchar(10)
  74. # country code : ISO-3166 2-letter country code, 2 characters
  75. # cc2 : alternate country codes, comma separated, ISO-3166 2-letter country code, 60 characters
  76. # admin1 code : fipscode (subject to change to iso code), isocode for the us and ch, see file admin1Codes.txt for display names of this code; varchar(20)
  77. # admin2 code : code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80)
  78. # admin3 code : code for third level administrative division, varchar(20)
  79. # admin4 code : code for fourth level administrative division, varchar(20)
  80. # population : integer
  81. # elevation : in meters, integer
  82. # gtopo30 : average elevation of 30'x30' (ca 900mx900m) area in meters, integer
  83. # timezone : the timezone id (see file http://download.geonames.org/export/dump/timeZones.txt)
  84. # modification date : date of last modification in yyyy-MM-dd format
  85. # geonameid|name|asciiname|alternatenames|latitude|longitude|featureclass|featurecode|countrycode|cc2|admin1code|admin2code|admin3code|admin4code|population|elevation|gtopo30|timezone|modificationdate
  86. # debug:
  87. # head -n 3 ${TMPFILE}.csv
  88. # use different column names limited to 10 chars for dbf
  89. if dbfdriver:
  90. columns = [
  91. "geonameid integer",
  92. "name varchar(200)",
  93. "asciiname varchar(200)",
  94. "altname varchar(4000)",
  95. "latitude double precision",
  96. "longitude double precision",
  97. "featrclass varchar(1)",
  98. "featrcode varchar(10)",
  99. "cntrycode varchar(2)",
  100. "cc2 varchar(60)",
  101. "admin1code varchar(20)",
  102. "admin2code varchar(20)",
  103. "admin3code varchar(20)",
  104. "admin4code varchar(20)",
  105. "population integer",
  106. "elevation integer",
  107. "gtopo30 integer",
  108. "timezone varchar(50)",
  109. "mod_date date",
  110. ]
  111. else:
  112. columns = [
  113. "geonameid integer",
  114. "name varchar(200)",
  115. "asciiname varchar(200)",
  116. "alternatename varchar(4000)",
  117. "latitude double precision",
  118. "longitude double precision",
  119. "featureclass varchar(1)",
  120. "featurecode varchar(10)",
  121. "countrycode varchar(2)",
  122. "cc2 varchar(60)",
  123. "admin1code varchar(20)",
  124. "admin2code varchar(20)",
  125. "admin3code varchar(20)",
  126. "admin4code varchar(20)",
  127. "population integer",
  128. "elevation integer",
  129. "gtopo30 integer",
  130. "timezone varchar(50)",
  131. "modification date",
  132. ]
  133. grass.run_command(
  134. "v.in.ascii",
  135. cat=0,
  136. x=6,
  137. y=5,
  138. sep="tab",
  139. input=infile,
  140. output=outfile,
  141. columns=columns,
  142. )
  143. # write cmd history:
  144. grass.vector_history(outfile)
  145. if __name__ == "__main__":
  146. options, flags = grass.parser()
  147. main()