123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166 |
- #!/usr/bin/env python3
- ############################################################################
- #
- # MODULE: v.in.geonames
- #
- # AUTHOR(S): Markus Neteler, neteler cealp it
- # Converted to Python by Glynn Clements
- #
- # PURPOSE: Import geonames.org dumps
- # http://download.geonames.org/export/dump/
- #
- # Feature Codes: http://www.geonames.org/export/codes.html
- #
- # COPYRIGHT: (c) 2008-2014 Markus Neteler, GRASS Development Team
- #
- # This program is free software under the GNU General Public
- # License (>=v2). Read the file COPYING that comes with GRASS
- # for details.
- #
- # TODO: fix encoding issues for Asian fonts in 'alternatename' column (v.in.ascii)
- # fix spurious char stuff in elevation column
- #############################################################################
- # %module
- # % description: Imports geonames.org country files into a vector points map.
- # % keyword: vector
- # % keyword: import
- # % keyword: gazetteer
- # %end
- # %option G_OPT_F_INPUT
- # % description: Name of uncompressed geonames file (with .txt extension)
- # %end
- # %option G_OPT_V_OUTPUT
- # %end
- import os
- import sys
- if sys.version_info.major == 2:
- from io import open
- import grass.script as grass
- def main():
- infile = options["input"]
- outfile = options["output"]
- # are we in LatLong location?
- s = grass.read_command("g.proj", flags="j")
- kv = grass.parse_key_val(s)
- if kv["+proj"] != "longlat":
- grass.fatal(_("This module only operates in LatLong/WGS84 locations"))
- # input test
- if not os.access(infile, os.R_OK):
- grass.fatal(_("File <%s> not found") % infile)
- # DBF doesn't support lengthy text fields
- kv = grass.db_connection()
- dbfdriver = kv["driver"] == "dbf"
- if dbfdriver:
- grass.warning(
- _(
- "Since DBF driver is used, the content of the 'alternatenames' column might be cut with respect to the original Geonames.org column content"
- )
- )
- with open(infile, encoding="utf-8") as f:
- num_places = sum(1 for each in f)
- grass.message(_("Converting %d place names...") % num_places)
- # pump data into GRASS:
- # http://download.geonames.org/export/dump/readme.txt
- # The main 'geoname' table has the following fields :
- # ---------------------------------------------------
- # geonameid : integer id of record in geonames database
- # name : name of geographical point (utf8) varchar(200)
- # asciiname : name of geographical point in plain ascii characters, varchar(200)
- # alternatenames : alternatenames, comma separated varchar(4000)
- # latitude : latitude in decimal degrees (wgs84)
- # longitude : longitude in decimal degrees (wgs84)
- # feature class : see http://www.geonames.org/export/codes.html, char(1)
- # feature code : see http://www.geonames.org/export/codes.html, varchar(10)
- # country code : ISO-3166 2-letter country code, 2 characters
- # cc2 : alternate country codes, comma separated, ISO-3166 2-letter country code, 60 characters
- # admin1 code : fipscode (subject to change to iso code), isocode for the us and ch, see file admin1Codes.txt for display names of this code; varchar(20)
- # admin2 code : code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80)
- # admin3 code : code for third level administrative division, varchar(20)
- # admin4 code : code for fourth level administrative division, varchar(20)
- # population : integer
- # elevation : in meters, integer
- # gtopo30 : average elevation of 30'x30' (ca 900mx900m) area in meters, integer
- # timezone : the timezone id (see file http://download.geonames.org/export/dump/timeZones.txt)
- # modification date : date of last modification in yyyy-MM-dd format
- # geonameid|name|asciiname|alternatenames|latitude|longitude|featureclass|featurecode|countrycode|cc2|admin1code|admin2code|admin3code|admin4code|population|elevation|gtopo30|timezone|modificationdate
- # debug:
- # head -n 3 ${TMPFILE}.csv
- # use different column names limited to 10 chars for dbf
- if dbfdriver:
- columns = [
- "geonameid integer",
- "name varchar(200)",
- "asciiname varchar(200)",
- "altname varchar(4000)",
- "latitude double precision",
- "longitude double precision",
- "featrclass varchar(1)",
- "featrcode varchar(10)",
- "cntrycode varchar(2)",
- "cc2 varchar(60)",
- "admin1code varchar(20)",
- "admin2code varchar(20)",
- "admin3code varchar(20)",
- "admin4code varchar(20)",
- "population integer",
- "elevation integer",
- "gtopo30 integer",
- "timezone varchar(50)",
- "mod_date date",
- ]
- else:
- columns = [
- "geonameid integer",
- "name varchar(200)",
- "asciiname varchar(200)",
- "alternatename varchar(4000)",
- "latitude double precision",
- "longitude double precision",
- "featureclass varchar(1)",
- "featurecode varchar(10)",
- "countrycode varchar(2)",
- "cc2 varchar(60)",
- "admin1code varchar(20)",
- "admin2code varchar(20)",
- "admin3code varchar(20)",
- "admin4code varchar(20)",
- "population integer",
- "elevation integer",
- "gtopo30 integer",
- "timezone varchar(50)",
- "modification date",
- ]
- grass.run_command(
- "v.in.ascii",
- cat=0,
- x=6,
- y=5,
- sep="tab",
- input=infile,
- output=outfile,
- columns=columns,
- )
- # write cmd history:
- grass.vector_history(outfile)
- if __name__ == "__main__":
- options, flags = grass.parser()
- main()
|