#!/usr/bin/env python3

############################################################################
#
# MODULE:       v.rast.stats
# AUTHOR(S):    Markus Neteler
#               converted to Python by Glynn Clements
#               speed up by Markus Metz
#               add column choose by Luca Delucchi
# PURPOSE:      Calculates univariate statistics from a GRASS raster map
#               only for areas covered by vector objects on a per-category base
# COPYRIGHT:    (C) 2005-2016 by the GRASS Development Team
#
#               This program is free software under the GNU General Public
#               License (>=v2). Read the file COPYING that comes with GRASS
#               for details.
#
#############################################################################

# %module
# % description: Calculates univariate statistics from a raster map based on a vector map and uploads statistics to new attribute columns.
# % keyword: vector
# % keyword: statistics
# % keyword: raster
# % keyword: univariate statistics
# % keyword: zonal statistics
# % keyword: sampling
# % keyword: querying
# %end
# %flag
# % key: c
# % description: Continue if upload column(s) already exist
# %end
# %flag
# % key: d
# % label: Create densified lines (default: thin lines)
# % description: All cells touched by the line will be set, not only those on the render path
# %end
# %option G_OPT_V_MAP
# %end
# %option G_OPT_V_FIELD
# %end
# %option G_OPT_V_TYPE
# %end
# %option G_OPT_DB_WHERE
# %end
# %option G_OPT_R_INPUTS
# % key: raster
# % description: Name of input raster map to calculate statistics from
# %end
# %option
# % key: column_prefix
# % type: string
# % description: Column prefix for new attribute columns
# % required : yes
# % multiple: yes
# %end
# %option
# % key: method
# % type: string
# % description: The methods to use
# % required: no
# % multiple: yes
# % options: number,null_cells,minimum,maximum,range,average,stddev,variance,coeff_var,sum,first_quartile,median,third_quartile,percentile
# % answer: number,null_cells,minimum,maximum,range,average,stddev,variance,coeff_var,sum,first_quartile,median,third_quartile,percentile
# %end
# %option
# % key: percentile
# % type: integer
# % description: Percentile to calculate
# % options: 0-100
# % answer: 90
# % required : no
# %end

import sys
import os
import atexit
import grass.script as grass
from grass.script.utils import decode
from grass.exceptions import CalledModuleError


def cleanup():
    if rastertmp:
        grass.run_command(
            "g.remove", flags="f", type="raster", name=rastertmp, quiet=True
        )


#    for f in [tmp, tmpname, sqltmp]:
#        grass.try_remove(f)


def main():
    global tmp, sqltmp, tmpname, nuldev, vector, rastertmp
    rastertmp = False
    # setup temporary files
    tmp = grass.tempfile()
    sqltmp = tmp + ".sql"
    # we need a random name
    tmpname = grass.basename(tmp)

    nuldev = open(os.devnull, "w")

    rasters = options["raster"].split(",")
    colprefixes = options["column_prefix"].split(",")
    vector = options["map"]
    layer = options["layer"]
    vtypes = options["type"]
    where = options["where"]
    percentile = options["percentile"]
    basecols = options["method"].split(",")

    # Get current mapset
    env = grass.gisenv()
    mapset = env["MAPSET"]

    # Get mapset of the vector
    vs = vector.split("@")
    if len(vs) > 1:
        vect_mapset = vs[1]
    else:
        vect_mapset = mapset

    # does map exist in CURRENT mapset?
    if vect_mapset != mapset or not grass.find_file(vector, "vector", mapset)["file"]:
        grass.fatal(_("Vector map <%s> not found in current mapset") % vector)

    # check if DBF driver used, in this case cut to 10 chars col names:
    try:
        fi = grass.vector_db(map=vector)[int(layer)]
    except KeyError:
        grass.fatal(
            _(
                "There is no table connected to this map. Run v.db.connect or v.db.addtable first."
            )
        )
    # we need this for non-DBF driver:
    dbfdriver = fi["driver"] == "dbf"

    # colprefix for every raster map?
    if len(colprefixes) != len(rasters):
        grass.fatal(
            _(
                "Number of raster maps ({0}) different from \
                      number of column prefixes ({1})".format(
                    len(rasters), len(colprefixes)
                )
            )
        )

    vector = vs[0]

    rastertmp = "%s_%s" % (vector, tmpname)

    for raster in rasters:
        # check the input raster map
        if not grass.find_file(raster, "cell")["file"]:
            grass.fatal(_("Raster map <%s> not found") % raster)

    # save current settings:
    grass.use_temp_region()

    # Temporarily aligning region resolution to $RASTER resolution
    # keep boundary settings
    grass.run_command("g.region", align=rasters[0])

    # check if DBF driver used, in this case cut to 10 chars col names:
    try:
        fi = grass.vector_db(map=vector)[int(layer)]
    except KeyError:
        grass.fatal(
            _(
                "There is no table connected to this map. "
                "Run v.db.connect or v.db.addtable first."
            )
        )
    # we need this for non-DBF driver:
    dbfdriver = fi["driver"] == "dbf"

    # Find out which table is linked to the vector map on the given layer
    if not fi["table"]:
        grass.fatal(
            _(
                "There is no table connected to this map. "
                "Run v.db.connect or v.db.addtable first."
            )
        )

    # prepare base raster for zonal statistics
    prepare_base_raster(vector, layer, rastertmp, vtypes, where)

    # get number of raster categories to be processed
    number = get_nr_of_categories(
        vector,
        layer,
        rasters,
        rastertmp,
        percentile,
        colprefixes,
        basecols,
        dbfdriver,
        flags["c"],
    )

    # calculate statistics:
    grass.message(_("Processing input data (%d categories)...") % number)

    for i in range(len(rasters)):
        raster = rasters[i]

        colprefix, variables_dbf, variables, colnames, extstat = set_up_columns(
            vector, layer, percentile, colprefixes[i], basecols, dbfdriver, flags["c"]
        )

        # get rid of any earlier attempts
        grass.try_remove(sqltmp)

        # do the stats
        perform_stats(
            raster,
            percentile,
            fi,
            dbfdriver,
            colprefix,
            variables_dbf,
            variables,
            colnames,
            extstat,
        )

        grass.message(_("Updating the database ..."))
        exitcode = 0
        try:
            grass.run_command(
                "db.execute", input=sqltmp, database=fi["database"], driver=fi["driver"]
            )
            grass.verbose(
                (
                    _(
                        "Statistics calculated from raster map <{raster}>"
                        " and uploaded to attribute table"
                        " of vector map <{vector}>."
                    ).format(raster=raster, vector=vector)
                )
            )
        except CalledModuleError:
            grass.warning(
                _("Failed to upload statistics to attribute table of vector map <%s>.")
                % vector
            )
            exitcode = 1

            sys.exit(exitcode)


def prepare_base_raster(vector, layer, rastertmp, vtypes, where):
    """Prepare base raster for zonal statistics.

    :param vector: name of vector map or data source for direct OGR access
    :param layer: layer number or name
    :param where: WHERE conditions of SQL statement without 'where' keyword
    :param rastertmp: name of temporary raster map
    :param vtypes: input feature type
    """
    try:
        nlines = grass.vector_info_topo(vector)["lines"]
        kwargs = {}
        if where:
            kwargs["where"] = where
        # Create densified lines rather than thin lines
        if flags["d"] and nlines > 0:
            kwargs["flags"] = "d"

        grass.run_command(
            "v.to.rast",
            input=vector,
            layer=layer,
            output=rastertmp,
            use="cat",
            type=vtypes,
            quiet=True,
            **kwargs,
        )
    except CalledModuleError:
        grass.fatal(_("An error occurred while converting vector to raster"))


def get_nr_of_categories(
    vector, layer, rasters, rastertmp, percentile, colprefixes, basecols, dbfdriver, c
):
    """Get number of raster categories to be processed.

    Perform also checks of raster and vector categories. In the case of no
    raster categories, create the desired columns and exit.

    :param vector: name of vector map or data source for direct OGR access
    :param layer: layer number or name
    :param rastertmp: name of temporary raster map
    :return: number of raster categories or exit (if no categories found)
    """
    # dump cats to file to avoid "too many argument" problem:
    p = grass.pipe_command("r.category", map=rastertmp, sep=";", quiet=True)
    cats = []

    for line in p.stdout:
        line = decode(line)
        cats.append(line.rstrip("\r\n").split(";")[0])
    p.wait()

    number = len(cats)
    if number < 1:
        # create columns and exit
        grass.warning(_("No categories found in raster map"))
        for i in range(len(rasters)):
            set_up_columns(
                vector,
                layer,
                percentile,
                colprefixes[i],
                basecols,
                dbfdriver,
                flags["c"],
            )
            sys.exit(0)

    # Check if all categories got converted
    # Report categories from vector map
    vect_cats = (
        grass.read_command("v.category", input=vector, option="report", flags="g")
        .rstrip("\n")
        .split("\n")
    )

    # get number of all categories in selected layer
    vect_cats_n = 0  # to be modified below
    for vcl in vect_cats:
        if vcl.split(" ")[0] == layer and vcl.split(" ")[1] == "all":
            vect_cats_n = int(vcl.split(" ")[2])

    if vect_cats_n != number:
        grass.warning(
            _(
                "Not all vector categories converted to raster. \
                        Converted {0} of {1}.".format(
                    number, vect_cats_n
                )
            )
        )

    return number


def set_up_columns(vector, layer, percentile, colprefix, basecols, dbfdriver, c):
    """Get columns-depending variables and create columns, if needed.

    :param vector: name of vector map or data source for direct OGR access
    :param layer: layer number or name
    :param percentile: percentile to calculate
    :param colprefix: column prefix for new attribute columns
    :param basecols: the methods to use
    :param dbfdriver: boolean saying if the driver is dbf
    :param c: boolean saying if it should continue if upload column(s) already
        exist
    :return: colprefix, variables_dbf, variables, colnames, extstat
    """
    # we need at least three chars to distinguish [mea]n from [med]ian
    # so colprefix can't be longer than 6 chars with DBF driver
    variables_dbf = {}

    if dbfdriver:
        colprefix = colprefix[:6]

    # by default perccol variable is used only for "variables" variable
    perccol = "percentile"
    perc = None
    for b in basecols:
        if b.startswith("p"):
            perc = b
    if perc:
        # namespace is limited in DBF but the % value is important
        if dbfdriver:
            perccol = "per" + percentile
        else:
            perccol = "percentile_" + percentile
        percindex = basecols.index(perc)
        basecols[percindex] = perccol

    # dictionary with name of methods and position in "r.univar -gt"  output
    variables = {
        "number": 2,
        "null_cells": 3,
        "minimum": 4,
        "maximum": 5,
        "range": 6,
        "average": 7,
        "stddev": 9,
        "variance": 10,
        "coeff_var": 11,
        "sum": 12,
        "first_quartile": 14,
        "median": 15,
        "third_quartile": 16,
        perccol: 17,
    }
    # this list is used to set the 'e' flag for r.univar
    extracols = ["first_quartile", "median", "third_quartile", perccol]
    addcols = []
    colnames = []
    extstat = ""
    for i in basecols:
        # this check the complete name of out input that should be truncated
        for k in variables.keys():
            if i in k:
                i = k
                break
        if i in extracols:
            extstat = "e"
        # check if column already present
        currcolumn = "%s_%s" % (colprefix, i)
        if dbfdriver:
            currcolumn = currcolumn[:10]
            variables_dbf[currcolumn.replace("%s_" % colprefix, "")] = i

        colnames.append(currcolumn)
        if currcolumn in grass.vector_columns(vector, layer).keys():
            if not c:
                grass.fatal(
                    (_("Cannot create column " "<%s> (already present). ") % currcolumn)
                    + _("Use -c flag to update values in this column.")
                )
        else:
            if i == "n":
                coltype = "INTEGER"
            else:
                coltype = "DOUBLE PRECISION"
            addcols.append(currcolumn + " " + coltype)

    if addcols:
        grass.verbose(_("Adding columns '%s'") % addcols)
        try:
            grass.run_command(
                "v.db.addcolumn", map=vector, columns=addcols, layer=layer
            )
        except CalledModuleError:
            grass.fatal(_("Adding columns failed. Exiting."))

    return colprefix, variables_dbf, variables, colnames, extstat


def perform_stats(
    raster,
    percentile,
    fi,
    dbfdriver,
    colprefix,
    variables_dbf,
    variables,
    colnames,
    extstat,
):
    with open(sqltmp, "w") as f:
        # do the stats
        p = grass.pipe_command(
            "r.univar",
            flags="t" + extstat,
            map=raster,
            zones=rastertmp,
            percentile=percentile,
            sep=";",
        )

        first_line = 1

        f.write("{0}\n".format(grass.db_begin_transaction(fi["driver"])))
        for line in p.stdout:
            if first_line:
                first_line = 0
                continue

            vars = decode(line).rstrip("\r\n").split(";")

            f.write("UPDATE %s SET" % fi["table"])
            first_var = 1
            for colname in colnames:
                variable = colname.replace("%s_" % colprefix, "", 1)
                if dbfdriver:
                    variable = variables_dbf[variable]
                i = variables[variable]
                value = vars[i]
                # convert nan, +nan, -nan, inf, +inf, -inf, Infinity, +Infinity,
                # -Infinity to NULL
                if value.lower().endswith("nan") or "inf" in value.lower():
                    value = "NULL"
                if not first_var:
                    f.write(" , ")
                else:
                    first_var = 0
                f.write(" %s=%s" % (colname, value))

            f.write(" WHERE %s=%s;\n" % (fi["key"], vars[0]))
        f.write("{0}\n".format(grass.db_commit_transaction(fi["driver"])))
        p.wait()


if __name__ == "__main__":
    options, flags = grass.parser()
    atexit.register(cleanup)
    main()