Przeglądaj źródła

v.in.ascii: include values into error messages (#104)

Not only the column, but also the value is now part of
the message about type since the value influenced the type
which makes it easier to find the issue. It reports the value for
the main columns such as x and y, and also for any mismatch
in requested and actual value types.

It stores first encountered value as sample for
all types and stores NULL for empty columns.
Type is used for (other) attribute columns type errors
while NULLs to avoid message about integers in data
given for empty columns which was confusing.

Uses single quotes around data which is according to
the Submitting rules (MessageStandardization) which require
single quotes around strings, but does not apply rules otherwise
to be consistent(ly wrong together) with most of v.in.ascii.

Also unify function parameters and line breaks.
Vaclav Petras 5 lat temu
rodzic
commit
13065bfaae

+ 1 - 1
vector/v.in.ascii/local_proto.h

@@ -6,7 +6,7 @@
 #include <grass/dbmi.h>
 #include <grass/dbmi.h>
 
 
 int points_analyse(FILE *, FILE *, char *, char *, int *, int *, int *, int *, int **,
 int points_analyse(FILE *, FILE *, char *, char *, int *, int *, int *, int *, int **,
-		   int **, int, int, int, int, int, int, int);
+		   char ***, int **, int, int, int, int, int, int, int);
 
 
 int points_to_bin(FILE *, int, struct Map_info *, dbDriver *,
 int points_to_bin(FILE *, int, struct Map_info *, dbDriver *,
 		  char *, char *, char *, int, int *, int, int, int, int, int);
 		  char *, char *, char *, int, int *, int, int, int, int, int);

+ 24 - 13
vector/v.in.ascii/main.c

@@ -261,6 +261,7 @@ int main(int argc, char *argv[])
 
 
     if (format == GV_ASCII_FORMAT_POINT) {
     if (format == GV_ASCII_FORMAT_POINT) {
 	int i, rowlen, ncols, minncols, *coltype, *coltype2, *collen, nrows;
 	int i, rowlen, ncols, minncols, *coltype, *coltype2, *collen, nrows;
+	char **colsample;
 	int n_int = 0, n_double = 0, n_string = 0;
 	int n_int = 0, n_double = 0, n_string = 0;
 	char buf[1000];
 	char buf[1000];
 	struct field_info *Fi;
 	struct field_info *Fi;
@@ -277,7 +278,7 @@ int main(int argc, char *argv[])
 	unlink(tmp);
 	unlink(tmp);
 
 
 	points_analyse(ascii, tmpascii, fs, td, &rowlen, &ncols, &minncols,
 	points_analyse(ascii, tmpascii, fs, td, &rowlen, &ncols, &minncols,
-		       &nrows, &coltype, &collen, skip_lines, xcol, ycol,
+		       &nrows, &coltype, &colsample, &collen, skip_lines, xcol, ycol,
 		       zcol, catcol, region_flag->answer, ignore_flag->answer);
 		       zcol, catcol, region_flag->answer, ignore_flag->answer);
 
 
 	G_verbose_message(_("Maximum input row length: %d"), rowlen);
 	G_verbose_message(_("Maximum input row length: %d"), rowlen);
@@ -312,16 +313,20 @@ int main(int argc, char *argv[])
 	}
 	}
 
 
 	if (coltype[xcol] == DB_C_TYPE_STRING) {
 	if (coltype[xcol] == DB_C_TYPE_STRING) {
-	    G_fatal_error(_("'%s' column is not of number type"), "x");
+	    G_fatal_error(_("'%s' column is not of number type, "
+	                    "encountered: '%s'"), "x", colsample[xcol]);
 	}
 	}
 	if (coltype[ycol] == DB_C_TYPE_STRING) {
 	if (coltype[ycol] == DB_C_TYPE_STRING) {
-	    G_fatal_error(_("'%s' column is not of number type"), "y");
+	    G_fatal_error(_("'%s' column is not of number type, "
+	                    "encountered: '%s'"), "y", colsample[ycol]);
 	}
 	}
 	if (zcol >= 0 && coltype[zcol] == DB_C_TYPE_STRING) {
 	if (zcol >= 0 && coltype[zcol] == DB_C_TYPE_STRING) {
-	    G_fatal_error(_("'%s' column is not of number type"), "z");
+	    G_fatal_error(_("'%s' column is not of number type, "
+	                    "encountered: '%s'"), "z", colsample[zcol]);
 	}
 	}
 	if (catcol >= 0 && coltype[catcol] == DB_C_TYPE_STRING) {
 	if (catcol >= 0 && coltype[catcol] == DB_C_TYPE_STRING) {
-	    G_fatal_error(_("'%s' column is not of number type"), "cat");
+	    G_fatal_error(_("'%s' column is not of number type, "
+	                    "encountered: '%s'"), "cat", colsample[catcol]);
 	}
 	}
 
 
 	/* Create table */
 	/* Create table */
@@ -467,12 +472,14 @@ int main(int argc, char *argv[])
 
 
 		    switch (coltype[i]) {
 		    switch (coltype[i]) {
 		    case DB_C_TYPE_INT:
 		    case DB_C_TYPE_INT:
-			if (ctype == DB_C_TYPE_DOUBLE) {
+			/* coltype=int and colsample=NULL indicate,
+			 * an empty column, so we don't report anything. */
+			if (ctype == DB_C_TYPE_DOUBLE && colsample[i]) {
 			    G_warning(_("Column number %d <%s> defined as double "
 			    G_warning(_("Column number %d <%s> defined as double "
 				       "has only integer values"), i + 1,
 				       "has only integer values"), i + 1,
 				      db_get_column_name(column));
 				      db_get_column_name(column));
 			}
 			}
-			else if (ctype == DB_C_TYPE_STRING) {
+			else if (ctype == DB_C_TYPE_STRING && colsample[i]) {
 			    G_warning(_("Column number %d <%s> defined as string "
 			    G_warning(_("Column number %d <%s> defined as string "
 				       "has only integer values"), i + 1,
 				       "has only integer values"), i + 1,
 				      db_get_column_name(column));
 				      db_get_column_name(column));
@@ -481,8 +488,10 @@ int main(int argc, char *argv[])
 		    case DB_C_TYPE_DOUBLE:
 		    case DB_C_TYPE_DOUBLE:
 			if (ctype == DB_C_TYPE_INT) {
 			if (ctype == DB_C_TYPE_INT) {
 			    G_fatal_error(_("Column number %d <%s> defined as integer "
 			    G_fatal_error(_("Column number %d <%s> defined as integer "
-					   "has double values"), i + 1,
-					  db_get_column_name(column));
+					    "has double values, encountered: '%s'"),
+					  i + 1,
+					  db_get_column_name(column),
+					  colsample[i]);
 			}
 			}
 			else if (ctype == DB_C_TYPE_STRING) {
 			else if (ctype == DB_C_TYPE_STRING) {
 			    G_warning(_("Column number %d <%s> defined as string "
 			    G_warning(_("Column number %d <%s> defined as string "
@@ -493,13 +502,15 @@ int main(int argc, char *argv[])
 		    case DB_C_TYPE_STRING:
 		    case DB_C_TYPE_STRING:
 			if (ctype == DB_C_TYPE_INT) {
 			if (ctype == DB_C_TYPE_INT) {
 			    G_fatal_error(_("Column number %d <%s> defined as integer "
 			    G_fatal_error(_("Column number %d <%s> defined as integer "
-					   "has string values"), i + 1,
-					  db_get_column_name(column));
+					    "has string values, encountered: '%s'"),
+					  i + 1, db_get_column_name(column),
+					  colsample[i]);
 			}
 			}
 			else if (ctype == DB_C_TYPE_DOUBLE) {
 			else if (ctype == DB_C_TYPE_DOUBLE) {
 			    G_fatal_error(_("Column number %d <%s> defined as double "
 			    G_fatal_error(_("Column number %d <%s> defined as double "
-					   "has string values"), i + 1,
-					  db_get_column_name(column));
+					    "has string values, encountered: '%s'"),
+					  i + 1, db_get_column_name(column),
+					  colsample[i]);
 			}
 			}
 			if (length < collen[i]) {
 			if (length < collen[i]) {
 			    G_fatal_error(_("Length of column %d <%s> (%d) is less than "
 			    G_fatal_error(_("Length of column %d <%s> (%d) is less than "

+ 29 - 2
vector/v.in.ascii/points.c

@@ -48,12 +48,17 @@ static int is_double(char *str)
  * minncolumns: minimum number of columns
  * minncolumns: minimum number of columns
  * nrows: number of rows
  * nrows: number of rows
  * column_type: column types
  * column_type: column types
+ * column_sample: values which was used to decide the type or NULLs
  * column_length: column lengths (string only)
  * column_length: column lengths (string only)
+ *
+ * If the who whole column is empty, column_sample will contain NULL
+ * for that given column.
  */
  */
 
 
 int points_analyse(FILE * ascii_in, FILE * ascii, char *fs, char *td,
 int points_analyse(FILE * ascii_in, FILE * ascii, char *fs, char *td,
 		   int *rowlength, int *ncolumns, int *minncolumns,
 		   int *rowlength, int *ncolumns, int *minncolumns,
-		   int *nrows, int **column_type, int **column_length,
+		   int *nrows, int **column_type, char ***column_sample,
+		   int **column_length,
 		   int skip_lines, int xcol, int ycol, int zcol, int catcol, 
 		   int skip_lines, int xcol, int ycol, int zcol, int catcol, 
 		   int region_flag, int ignore_flag)
 		   int region_flag, int ignore_flag)
 {
 {
@@ -64,6 +69,7 @@ int points_analyse(FILE * ascii_in, FILE * ascii, char *fs, char *td,
     int ncols = 0;		/* number of columns */
     int ncols = 0;		/* number of columns */
     int minncols = -1;
     int minncols = -1;
     int *coltype = NULL;	/* column types */
     int *coltype = NULL;	/* column types */
+    char **colsample = NULL;	/* column samples */
     int *collen = NULL;		/* column lengths */
     int *collen = NULL;		/* column lengths */
     char **tokens;
     char **tokens;
     int ntokens;		/* number of tokens */
     int ntokens;		/* number of tokens */
@@ -153,9 +159,12 @@ int points_analyse(FILE * ascii_in, FILE * ascii, char *fs, char *td,
 
 
 	if (ntokens > ncols) {
 	if (ntokens > ncols) {
 	    coltype = (int *)G_realloc(coltype, ntokens * sizeof(int));
 	    coltype = (int *)G_realloc(coltype, ntokens * sizeof(int));
+	    colsample = (char **)G_realloc(colsample, ntokens * sizeof(char *));
 	    collen = (int *)G_realloc(collen, ntokens * sizeof(int));
 	    collen = (int *)G_realloc(collen, ntokens * sizeof(int));
 	    for (i = ncols; i < ntokens; i++) {
 	    for (i = ncols; i < ntokens; i++) {
 		coltype[i] = DB_C_TYPE_INT;	/* default type */
 		coltype[i] = DB_C_TYPE_INT;	/* default type */
+		/* We store a value later if column is not empty. */
+		colsample[i] = NULL;
 		collen[i] = 0;
 		collen[i] = 0;
 	    }
 	    }
 	    ncols = ntokens;
 	    ncols = ntokens;
@@ -246,6 +255,9 @@ int points_analyse(FILE * ascii_in, FILE * ascii, char *fs, char *td,
 
 
 	    len = strlen(tokens[i]); 
 	    len = strlen(tokens[i]); 
 	    /* do not guess column type for missing values */ 
 	    /* do not guess column type for missing values */ 
+	    /* continue here ensures that we preserve NULLs in
+	     * colsample for (completely) empty columns (which, however,
+	     * should probably default to string rather than int). */
 	    if (len == 0) 
 	    if (len == 0) 
 		continue;
 		continue;
 
 
@@ -254,16 +266,30 @@ int points_analyse(FILE * ascii_in, FILE * ascii, char *fs, char *td,
 		    is_double(tokens[i]));
 		    is_double(tokens[i]));
 
 
 	    if (is_int(tokens[i])) {
 	    if (is_int(tokens[i])) {
+		/* We store the first encountered value for integers.
+		 * Rest is for consistency. */
+		if (!colsample[i] || coltype[i] != DB_C_TYPE_INT) {
+		    G_free(colsample[i]);
+		    colsample[i] = G_store(tokens[i]);
+		}
 		continue;	/* integer */
 		continue;	/* integer */
 	    }
 	    }
 	    if (is_double(tokens[i])) {	/* double */
 	    if (is_double(tokens[i])) {	/* double */
 		if (coltype[i] == DB_C_TYPE_INT) {
 		if (coltype[i] == DB_C_TYPE_INT) {
 		    coltype[i] = DB_C_TYPE_DOUBLE;
 		    coltype[i] = DB_C_TYPE_DOUBLE;
+		    G_free(colsample[i]);
+		    colsample[i] = G_store(tokens[i]);
 		}
 		}
 		continue;
 		continue;
 	    }
 	    }
 	    /* string */
 	    /* string */
-	    coltype[i] = DB_C_TYPE_STRING;
+	    if (coltype[i] != DB_C_TYPE_STRING) {
+		/* Only set type if not already set to store the field
+		 * only once and to show the first encountered item. */
+		coltype[i] = DB_C_TYPE_STRING;
+		G_free(colsample[i]);
+		colsample[i] = G_store(tokens[i]);
+	    }
 	    if (len > collen[i])
 	    if (len > collen[i])
 		collen[i] = len;
 		collen[i] = len;
 	}
 	}
@@ -289,6 +315,7 @@ int points_analyse(FILE * ascii_in, FILE * ascii, char *fs, char *td,
     *ncolumns = ncols;
     *ncolumns = ncols;
     *minncolumns = minncols;
     *minncolumns = minncols;
     *column_type = coltype;
     *column_type = coltype;
+    *column_sample = colsample;
     *column_length = collen;
     *column_length = collen;
     *nrows = row - 1;		/* including skipped lines */
     *nrows = row - 1;		/* including skipped lines */