points.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
  1. #include <stdio.h>
  2. #include <string.h>
  3. #include <stdlib.h>
  4. #include <ctype.h>
  5. #include <grass/gis.h>
  6. #include <grass/dbmi.h>
  7. #include <grass/vector.h>
  8. #include <grass/glocale.h>
  9. #include "local_proto.h"
  10. /* Determine if the string is integer, e.g. 123, +123, -123,
  11. * return 1 if integer, 0 otherwise */
  12. static int is_int(char *str)
  13. {
  14. char *tail;
  15. if (strtol(str, &tail, 10), tail == str || *tail != '\0') {
  16. /* doesn't look like a number,
  17. or has extra characters after what looks to be a number */
  18. return 0;
  19. }
  20. return 1;
  21. }
  22. /* Determine if the string is double, e.g. 123.456, +123.456, -123.456, 1.23456e2
  23. * return 1 if double, 0 otherwise */
  24. static int is_double(char *str)
  25. {
  26. char *tail;
  27. if (strtod(str, &tail), tail == str || *tail != '\0') {
  28. /* doesn't look like a number,
  29. or has extra characters after what looks to be a number */
  30. return 0;
  31. }
  32. return 1;
  33. }
  34. /* Analyse points ascii file. Determine number of columns and column types.
  35. * ascii_tmp: write copy of tempfile to ascii_tmp:
  36. * rowlength: maximum row length
  37. * ncolumns: number of columns
  38. * minncolumns: minimum number of columns
  39. * nrows: number of rows
  40. * column_type: column types
  41. * column_sample: values which was used to decide the type or NULLs
  42. * column_length: column lengths (string only)
  43. *
  44. * If the who whole column is empty, column_sample will contain NULL
  45. * for that given column.
  46. */
  47. int points_analyse(FILE * ascii_in, FILE * ascii, char *fs, char *td,
  48. int *rowlength, int *ncolumns, int *minncolumns,
  49. int *nrows, int **column_type, char ***column_sample,
  50. int **column_length,
  51. int skip_lines, int xcol, int ycol, int zcol, int catcol,
  52. int region_flag, int ignore_flag)
  53. {
  54. int i;
  55. int buflen; /* buffer length */
  56. char *buf; /* buffer */
  57. int row = 1; /* line number, first is 1 */
  58. int ncols = 0; /* number of columns */
  59. int minncols = -1;
  60. int *coltype = NULL; /* column types */
  61. char **colsample = NULL; /* column samples */
  62. int *collen = NULL; /* column lengths */
  63. char **tokens;
  64. int ntokens; /* number of tokens */
  65. int len, rowlen = 0; /* maximum row length */
  66. struct Cell_head window;
  67. double northing = .0;
  68. double easting = .0;
  69. char *xtoken, *ytoken, *sav_buf;
  70. int skip = FALSE, skipped = 0;
  71. buflen = 4000;
  72. buf = (char *)G_malloc(buflen);
  73. xtoken = (char *)G_malloc(256);
  74. ytoken = (char *)G_malloc(256);
  75. G_message(_("Scanning input for column types..."));
  76. /* fetch projection for LatLong test */
  77. G_get_window(&window);
  78. /* points_to_bin() would be faster if we would write out
  79. * clean data to ascii
  80. * points_to_bin() would then not need G_chop() and
  81. * for latlon not G_scan_[easting|northing]() */
  82. while (1) {
  83. len = 0; /* not really needed, but what the heck */
  84. skip = FALSE; /* reset out-of-region check */
  85. sav_buf = NULL;
  86. if (G_getl2(buf, buflen - 1, ascii_in) == 0)
  87. break; /* EOF */
  88. if (row <= skip_lines) {
  89. G_debug(3, "skipping header row %d : %d chars", row,
  90. (int)strlen(buf));
  91. /* this fn is read-only, write to hist with points_to_bin() */
  92. fprintf(ascii, "%s\n", buf);
  93. len = strlen(buf) + 1;
  94. if (len > rowlen)
  95. rowlen = len;
  96. row++;
  97. continue;
  98. }
  99. if ((buf[0] == '#') || (buf[0] == '\0')) {
  100. G_debug(3, "skipping comment row %d : %d chars", row,
  101. (int)strlen(buf));
  102. continue;
  103. }
  104. /* no G_chop() as first/last column may be empty fs=tab value */
  105. G_debug(3, "row %d : %d chars", row, (int)strlen(buf));
  106. tokens = G_tokenize2(buf, fs, td);
  107. ntokens = G_number_of_tokens(tokens);
  108. if (ntokens == 0) {
  109. continue;
  110. }
  111. if (ncols > 0 && ntokens != ncols) {
  112. /* these rows can not be imported into the attribute table */
  113. if (ignore_flag) {
  114. G_warning(_("Row %d: '%s' can not be imported into the attribute table"),
  115. row, buf);
  116. }
  117. else {
  118. G_warning(_("Expected %d columns, found %d columns"), ncols, ntokens);
  119. G_fatal_error(_("Broken row %d: '%s'"), row, buf);
  120. }
  121. }
  122. if (xcol >= ntokens || ycol >= ntokens || zcol >= ntokens ||
  123. catcol >= ntokens) {
  124. if (ignore_flag) {
  125. G_debug(3, "Skipping broken row %d: '%s'", row, buf);
  126. continue;
  127. }
  128. else {
  129. G_warning(_("ntokens: %d, xcol: %d, ycol: %d, zcol: %d"), ntokens, xcol, ycol, zcol);
  130. G_fatal_error(_("Broken row %d: '%s'"), row, buf);
  131. }
  132. }
  133. len = strlen(buf) + 1;
  134. if (len > rowlen)
  135. rowlen = len;
  136. if (ntokens > ncols) {
  137. coltype = (int *)G_realloc(coltype, ntokens * sizeof(int));
  138. colsample = (char **)G_realloc(colsample, ntokens * sizeof(char *));
  139. collen = (int *)G_realloc(collen, ntokens * sizeof(int));
  140. for (i = ncols; i < ntokens; i++) {
  141. coltype[i] = DB_C_TYPE_INT; /* default type */
  142. /* We store a value later if column is not empty. */
  143. colsample[i] = NULL;
  144. collen[i] = 0;
  145. }
  146. ncols = ntokens;
  147. }
  148. if (minncols == -1 || minncols > ntokens)
  149. minncols = ntokens;
  150. /* Determine column types */
  151. for (i = 0; i < ntokens; i++) {
  152. G_chop(tokens[i]);
  153. if ((G_projection() == PROJECTION_LL)) {
  154. if (i == xcol || i == ycol) {
  155. if (i == 0) { /* Save position of original internal token buffer */
  156. /* Prevent memory leaks */
  157. sav_buf = tokens[0];
  158. }
  159. /* check if coordinates are DMS or decimal or not latlong at all */
  160. if (i == xcol) {
  161. if (G_scan_easting(tokens[i], &easting, window.proj)) {
  162. G_debug(4, "is_latlong east: %g", easting);
  163. sprintf(xtoken, "%.15g", easting);
  164. /* replace current DMS token by decimal degree */
  165. tokens[i] = xtoken;
  166. if (region_flag) {
  167. if ((window.east < easting) ||
  168. (window.west > easting))
  169. skip = TRUE;
  170. }
  171. }
  172. else {
  173. fprintf(stderr, _("Current row %d:\n%s\n"), row, buf);
  174. G_fatal_error(_("Unparsable longitude value in column %d: %s"),
  175. i + 1, tokens[i]);
  176. }
  177. }
  178. if (i == ycol) {
  179. if (G_scan_northing(tokens[i], &northing, window.proj)) {
  180. G_debug(4, "is_latlong north: %g", northing);
  181. sprintf(ytoken, "%.15g", northing);
  182. /* replace current DMS token by decimal degree */
  183. tokens[i] = ytoken;
  184. if (region_flag) {
  185. if ((window.north < northing) ||
  186. (window.south > northing))
  187. skip = TRUE;
  188. }
  189. }
  190. else {
  191. fprintf(stderr, _("Current row %d:\n%s\n"), row, buf);
  192. G_fatal_error(_("Unparsable latitude value in column %d: %s"),
  193. i + 1, tokens[i]);
  194. }
  195. }
  196. } /* if (x or y) */
  197. } /* PROJECTION_LL */
  198. else {
  199. if (strlen(tokens[i]) == 0) {
  200. if (i == xcol) {
  201. G_fatal_error(_("Unparsable longitude value in column %d: %s"),
  202. i + 1, tokens[i]);
  203. }
  204. if (i == ycol) {
  205. G_fatal_error(_("Unparsable latitude value in column %d: %s"),
  206. i + 1, tokens[i]);
  207. }
  208. }
  209. if (region_flag) {
  210. /* consider z range if -z flag is used? */
  211. /* change to if(>= east,north){skip=1;} to allow correct tiling */
  212. /* don't "continue;" so multiple passes will have the
  213. same column types and length for patching */
  214. if (i == xcol) {
  215. easting = atof(tokens[i]);
  216. if ((window.east < easting) ||
  217. (window.west > easting))
  218. skip = TRUE;
  219. }
  220. if (i == ycol) {
  221. northing = atof(tokens[i]);
  222. if ((window.north < northing) ||
  223. (window.south > northing))
  224. skip = TRUE;
  225. }
  226. }
  227. }
  228. len = strlen(tokens[i]);
  229. /* do not guess column type for missing values */
  230. /* continue here ensures that we preserve NULLs in
  231. * colsample for (completely) empty columns (which, however,
  232. * should probably default to string rather than int). */
  233. if (len == 0)
  234. continue;
  235. G_debug(4, "row %d col %d: '%s' is_int = %d is_double = %d",
  236. row + 1, i + 1, tokens[i], is_int(tokens[i]),
  237. is_double(tokens[i]));
  238. if (is_int(tokens[i])) {
  239. /* We store the first encountered value for integers.
  240. * Rest is for consistency. */
  241. if (!colsample[i] || coltype[i] != DB_C_TYPE_INT) {
  242. G_free(colsample[i]);
  243. colsample[i] = G_store(tokens[i]);
  244. }
  245. continue; /* integer */
  246. }
  247. if (is_double(tokens[i])) { /* double */
  248. if (coltype[i] == DB_C_TYPE_INT) {
  249. coltype[i] = DB_C_TYPE_DOUBLE;
  250. G_free(colsample[i]);
  251. colsample[i] = G_store(tokens[i]);
  252. }
  253. continue;
  254. }
  255. /* string */
  256. if (coltype[i] != DB_C_TYPE_STRING) {
  257. /* Only set type if not already set to store the field
  258. * only once and to show the first encountered item. */
  259. coltype[i] = DB_C_TYPE_STRING;
  260. G_free(colsample[i]);
  261. colsample[i] = G_store(tokens[i]);
  262. }
  263. if (len > collen[i])
  264. collen[i] = len;
  265. }
  266. /* write dataline to tmp file */
  267. if (!skip)
  268. fprintf(ascii, "%s\n", buf);
  269. else
  270. skipped++;
  271. if (sav_buf != NULL) {
  272. /* Restore original token buffer so free_tokens works */
  273. /* Only do this if tokens[0] was re-assigned */
  274. tokens[0] = sav_buf;
  275. sav_buf = NULL;
  276. }
  277. G_free_tokens(tokens);
  278. row++;
  279. }
  280. *rowlength = rowlen;
  281. *ncolumns = ncols;
  282. *minncolumns = minncols;
  283. *column_type = coltype;
  284. *column_sample = colsample;
  285. *column_length = collen;
  286. *nrows = row - 1; /* including skipped lines */
  287. G_free(buf);
  288. G_free(xtoken);
  289. G_free(ytoken);
  290. if (region_flag)
  291. G_message(n_("Skipping %d of %d row falling outside of current region",
  292. "Skipping %d of %d rows falling outside of current region",
  293. row - 1),
  294. skipped, row - 1);
  295. return 0;
  296. }
  297. /* Import points from ascii file.
  298. *
  299. * fs: field separator
  300. * xcol, ycol, zcol, catcol: x,y,z,cat column in input file, first column is 1,
  301. * zcol and catcol may be 0 (do not use)
  302. * rowlen: maximum row length
  303. * Note: column types (both in header or coldef) must be supported by driver
  304. */
  305. int points_to_bin(FILE * ascii, int rowlen, struct Map_info *Map,
  306. dbDriver * driver, char *table, char *fs, char *td,
  307. int nrows, int *coltype, int xcol, int ycol, int zcol,
  308. int catcol, int skip_lines)
  309. {
  310. char *buf, buf2[4000];
  311. int cat = 0;
  312. int row = 0;
  313. struct line_pnts *Points;
  314. struct line_cats *Cats;
  315. dbString sql, val;
  316. struct Cell_head window;
  317. G_message(_("Importing points..."));
  318. /* fetch projection for LatLong test */
  319. G_get_window(&window);
  320. rewind(ascii);
  321. Points = Vect_new_line_struct();
  322. Cats = Vect_new_cats_struct();
  323. /* actually last 2 characters won't be read */
  324. buf = (char *)G_malloc(rowlen + 2);
  325. db_init_string(&sql);
  326. db_init_string(&val);
  327. if (skip_lines > 0) {
  328. sprintf(buf2, "HEADER: (%d lines)\n", skip_lines);
  329. Vect_hist_write(Map, buf2);
  330. }
  331. /* rowlen + 2 to read till the end of line on both UNIX and Windows */
  332. while (G_getl2(buf, rowlen + 2, ascii) != 0) {
  333. int i, len;
  334. double x, y, z;
  335. char **tokens;
  336. int ntokens; /* number of tokens */
  337. G_percent(row, nrows, 2);
  338. row++;
  339. if (row <= skip_lines) {
  340. G_debug(4, "writing skip line %d to hist : %d chars", row,
  341. (int)strlen(buf));
  342. Vect_hist_write(Map, buf);
  343. Vect_hist_write(Map, "\n");
  344. continue;
  345. }
  346. len = strlen(buf);
  347. if (len == 0)
  348. continue; /* should not happen */
  349. G_debug(4, "row: %s", buf);
  350. tokens = G_tokenize2(buf, fs, td);
  351. ntokens = G_number_of_tokens(tokens);
  352. G_chop(tokens[xcol]);
  353. G_chop(tokens[ycol]);
  354. if ((G_projection() == PROJECTION_LL)) {
  355. G_scan_easting(tokens[xcol], &x, window.proj);
  356. G_scan_northing(tokens[ycol], &y, window.proj);
  357. }
  358. else {
  359. x = atof(tokens[xcol]);
  360. y = atof(tokens[ycol]);
  361. }
  362. G_debug(4, "x: %f, y: %f", x, y);
  363. if (zcol >= 0) {
  364. G_chop(tokens[zcol]);
  365. z = atof(tokens[zcol]);
  366. }
  367. else
  368. z = 0.0;
  369. if (catcol >= 0) {
  370. G_chop(tokens[catcol]);
  371. cat = atof(tokens[catcol]);
  372. }
  373. else
  374. cat++;
  375. Vect_reset_line(Points);
  376. Vect_reset_cats(Cats);
  377. Vect_append_point(Points, x, y, z);
  378. Vect_cat_set(Cats, 1, cat);
  379. Vect_write_line(Map, GV_POINT, Points, Cats);
  380. /* Attributes */
  381. if (driver) {
  382. sprintf(buf2, "insert into %s values ( ", table);
  383. db_set_string(&sql, buf2);
  384. if (catcol < 0) {
  385. sprintf(buf2, "%d, ", cat);
  386. db_append_string(&sql, buf2);
  387. }
  388. for (i = 0; i < ntokens; i++) {
  389. G_chop(tokens[i]);
  390. if (i > 0)
  391. db_append_string(&sql, ", ");
  392. if (strlen(tokens[i]) > 0) {
  393. if (coltype[i] == DB_C_TYPE_INT ||
  394. coltype[i] == DB_C_TYPE_DOUBLE) {
  395. if (G_projection() == PROJECTION_LL &&
  396. (i == xcol || i == ycol)) {
  397. if (i == xcol)
  398. sprintf(buf2, "%.15g", x);
  399. else
  400. sprintf(buf2, "%.15g", y);
  401. }
  402. else
  403. sprintf(buf2, "%s", tokens[i]);
  404. }
  405. else {
  406. db_set_string(&val, tokens[i]);
  407. /* TODO: strip leading and trailing "quotes" from input string */
  408. db_double_quote_string(&val);
  409. sprintf(buf2, "'%s'", db_get_string(&val));
  410. }
  411. }
  412. else {
  413. sprintf(buf2, "null");
  414. }
  415. db_append_string(&sql, buf2);
  416. }
  417. db_append_string(&sql, ")");
  418. G_debug(3, "%s", db_get_string(&sql));
  419. if (db_execute_immediate(driver, &sql) != DB_OK) {
  420. G_fatal_error(_("Unable to insert new record: %s"),
  421. db_get_string(&sql));
  422. }
  423. }
  424. G_free_tokens(tokens);
  425. }
  426. G_percent(nrows, nrows, 2);
  427. return 0;
  428. }