token.c 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. /*!
  2. \file lib/gis/token.c
  3. \brief GIS Library - Tokenize strings
  4. (C) 2001-2008, 2011-2013 by the GRASS Development Team
  5. This program is free software under the GNU General Public License
  6. (>=v2). Read the file COPYING that comes with GRASS for details.
  7. \author USA CERL and others
  8. */
  9. #include <stdlib.h>
  10. #include <string.h>
  11. #include <grass/gis.h>
  12. #include <grass/glocale.h>
  13. static char **tokenize(const char *, const char *, const char *);
  14. /*!
  15. \brief Tokenize string
  16. Given a string, <em>buf</em>, turn delimiter, <em>delim</em>, into
  17. '\0' (NULL) and place pointers to tokens in tokens. <em>buf</em>
  18. must not contain a new line (\n). <em>delim</em> may consist of more
  19. than one character. G_free_tokens() must be called when finished
  20. with tokens to release memory.
  21. Example:
  22. \code
  23. char **tokens;
  24. int ntok, i;
  25. tokens = G_tokenize(buf, " |:,");
  26. ntok = G_number_of_tokens(tokens);
  27. for (i=0; i < ntok; i++) {
  28. G_debug(1, "%d=[%s]", i, tokens[i]);
  29. }
  30. G_free_tokens(tokens);
  31. \endcode
  32. \param buf input string
  33. \param delim string delimiter
  34. \return pointer to string token
  35. */
  36. char **G_tokenize(const char *buf, const char *delim)
  37. {
  38. return tokenize(buf, delim, NULL);
  39. }
  40. /*!
  41. \brief Tokenize string
  42. This function behaves similarly to G_tokenize().
  43. It introduces <em>valchar</em> which defines borders of token. Within
  44. token <em>delim</em> is ignored.
  45. Example:
  46. \code
  47. char *str = "a,'b,c',d";
  48. char **tokens1, **tokens2;
  49. int ntok1, ntok2;
  50. tokens1 = G_tokenize(str, ",");
  51. ntok1 = G_number_of_tokens(tokens1);
  52. tokens1 = G_tokenize2(str, ",", "'");
  53. ntok2 = G_number_of_tokens(tokens2);
  54. \endcode
  55. In this example <em>ntok1</em> will be 4, <em>ntok2</em> only 3,
  56. i.e. { "a", "'b, c'", "d"}
  57. \param buf input string
  58. \param delim string delimiter
  59. \param valchar character defining border of token
  60. \return pointer to string token
  61. */
  62. char **G_tokenize2(const char *buf, const char *delim, const char *valchar)
  63. {
  64. return tokenize(buf, delim, valchar);
  65. }
  66. char **tokenize(const char *buf, const char *delim, const char *inchar)
  67. {
  68. int i;
  69. char **tokens;
  70. const char *p;
  71. char *q;
  72. enum {
  73. S_START,
  74. S_IN_QUOTE,
  75. S_AFTER_QUOTE,
  76. };
  77. enum {
  78. A_NO_OP,
  79. A_ADD_CHAR,
  80. A_NEW_FIELD,
  81. A_END_RECORD,
  82. A_ERROR
  83. };
  84. int state;
  85. int quo = inchar ? *inchar : -1;
  86. /* do not modify buf, make a copy */
  87. p = q = G_store(buf);
  88. i = 0;
  89. tokens = (char **)G_malloc(2 * sizeof(char *));
  90. /* always one token */
  91. tokens[i++] = q;
  92. for (state = S_START; ; p++) {
  93. int c = *p;
  94. int action = A_NO_OP;
  95. switch (state) {
  96. case S_START:
  97. if (c == quo)
  98. state = S_IN_QUOTE;
  99. else if (c == '\0')
  100. action = A_END_RECORD;
  101. else if (strchr(delim, c))
  102. action = A_NEW_FIELD;
  103. else
  104. action = A_ADD_CHAR;
  105. break;
  106. case S_IN_QUOTE:
  107. if (c == quo)
  108. state = S_AFTER_QUOTE;
  109. else if (c == '\0')
  110. action = A_ERROR;
  111. else
  112. action = A_ADD_CHAR;
  113. break;
  114. case S_AFTER_QUOTE:
  115. if (c == quo)
  116. state = S_IN_QUOTE, action = A_ADD_CHAR;
  117. else if (c == '\0')
  118. action = A_END_RECORD;
  119. else if (strchr(delim, c))
  120. state = S_START, action = A_NEW_FIELD;
  121. else
  122. action = A_ERROR;
  123. break;
  124. }
  125. switch (action) {
  126. case A_NO_OP:
  127. break;
  128. case A_ADD_CHAR:
  129. *q++ = *p;
  130. break;
  131. case A_NEW_FIELD:
  132. *q++ = '\0';
  133. tokens[i++] = q;
  134. tokens = G_realloc(tokens, (i + 2) * sizeof(char *));
  135. break;
  136. case A_END_RECORD:
  137. *q++ = '\0';
  138. tokens[i++] = NULL;
  139. return tokens;
  140. case A_ERROR:
  141. G_warning(_("parse error"));
  142. *q++ = '\0';
  143. tokens[i++] = NULL;
  144. return tokens;
  145. }
  146. }
  147. }
  148. /*!
  149. \brief Return number of tokens
  150. \param tokens
  151. \return number of tokens
  152. */
  153. int G_number_of_tokens(char **tokens)
  154. {
  155. int n;
  156. n = 0;
  157. for (n = 0; tokens[n] != NULL; n++)
  158. ;
  159. return n;
  160. }
  161. /*!
  162. \brief Free memory allocated to tokens.
  163. <b>Note:</b> <i>G_free_tokens()</i> must be called when finished with
  164. tokens to release memory.
  165. \param[out] tokens
  166. */
  167. void G_free_tokens(char **tokens)
  168. {
  169. if (tokens[0] != NULL)
  170. G_free(tokens[0]);
  171. G_free(tokens);
  172. }