fml_parser.cc 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. /* Copyright 2016 Google Inc. All Rights Reserved.
  2. Licensed under the Apache License, Version 2.0 (the "License");
  3. you may not use this file except in compliance with the License.
  4. You may obtain a copy of the License at
  5. http://www.apache.org/licenses/LICENSE-2.0
  6. Unless required by applicable law or agreed to in writing, software
  7. distributed under the License is distributed on an "AS IS" BASIS,
  8. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. See the License for the specific language governing permissions and
  10. limitations under the License.
  11. ==============================================================================*/
  12. #include "syntaxnet/fml_parser.h"
  13. #include <ctype.h>
  14. #include <string>
  15. #include "syntaxnet/utils.h"
  16. #include "tensorflow/core/lib/strings/strcat.h"
  17. namespace syntaxnet {
  18. void FMLParser::Initialize(const string &source) {
  19. // Initialize parser state.
  20. source_ = source;
  21. current_ = source_.begin();
  22. item_start_ = line_start_ = current_;
  23. line_number_ = item_line_number_ = 1;
  24. // Read first input item.
  25. NextItem();
  26. }
  27. void FMLParser::Error(const string &error_message) {
  28. LOG(FATAL) << "Error in feature model, line " << item_line_number_
  29. << ", position " << (item_start_ - line_start_ + 1)
  30. << ": " << error_message
  31. << "\n " << string(line_start_, current_) << " <--HERE";
  32. }
  33. void FMLParser::Next() {
  34. // Move to the next input character. If we are at a line break update line
  35. // number and line start position.
  36. if (*current_ == '\n') {
  37. ++line_number_;
  38. ++current_;
  39. line_start_ = current_;
  40. } else {
  41. ++current_;
  42. }
  43. }
  44. void FMLParser::NextItem() {
  45. // Skip white space and comments.
  46. while (!eos()) {
  47. if (*current_ == '#') {
  48. // Skip comment.
  49. while (!eos() && *current_ != '\n') Next();
  50. } else if (isspace(*current_)) {
  51. // Skip whitespace.
  52. while (!eos() && isspace(*current_)) Next();
  53. } else {
  54. break;
  55. }
  56. }
  57. // Record start position for next item.
  58. item_start_ = current_;
  59. item_line_number_ = line_number_;
  60. // Check for end of input.
  61. if (eos()) {
  62. item_type_ = END;
  63. return;
  64. }
  65. // Parse number.
  66. if (isdigit(*current_) || *current_ == '+' || *current_ == '-') {
  67. string::iterator start = current_;
  68. Next();
  69. while (isdigit(*current_) || *current_ == '.') Next();
  70. item_text_.assign(start, current_);
  71. item_type_ = NUMBER;
  72. return;
  73. }
  74. // Parse string.
  75. if (*current_ == '"') {
  76. Next();
  77. string::iterator start = current_;
  78. while (*current_ != '"') {
  79. if (eos()) Error("Unterminated string");
  80. Next();
  81. }
  82. item_text_.assign(start, current_);
  83. item_type_ = STRING;
  84. Next();
  85. return;
  86. }
  87. // Parse identifier name.
  88. if (isalpha(*current_) || *current_ == '_' || *current_ == '/') {
  89. string::iterator start = current_;
  90. while (isalnum(*current_) || *current_ == '_' || *current_ == '-' ||
  91. *current_ == '/') Next();
  92. item_text_.assign(start, current_);
  93. item_type_ = NAME;
  94. return;
  95. }
  96. // Single character item.
  97. item_type_ = *current_;
  98. Next();
  99. }
  100. void FMLParser::Parse(const string &source,
  101. FeatureExtractorDescriptor *result) {
  102. // Initialize parser.
  103. Initialize(source);
  104. while (item_type_ != END) {
  105. // Parse either a parameter name or a feature.
  106. if (item_type_ != NAME) Error("Feature type name expected");
  107. string name = item_text_;
  108. NextItem();
  109. if (item_type_ == '=') {
  110. Error("Invalid syntax: feature expected");
  111. } else {
  112. // Parse feature.
  113. FeatureFunctionDescriptor *descriptor = result->add_feature();
  114. descriptor->set_type(name);
  115. ParseFeature(descriptor);
  116. }
  117. }
  118. }
  119. void FMLParser::ParseFeature(FeatureFunctionDescriptor *result) {
  120. // Parse argument and parameters.
  121. if (item_type_ == '(') {
  122. NextItem();
  123. ParseParameter(result);
  124. while (item_type_ == ',') {
  125. NextItem();
  126. ParseParameter(result);
  127. }
  128. if (item_type_ != ')') Error(") expected");
  129. NextItem();
  130. }
  131. // Parse feature name.
  132. if (item_type_ == ':') {
  133. NextItem();
  134. if (item_type_ != NAME && item_type_ != STRING) {
  135. Error("Feature name expected");
  136. }
  137. string name = item_text_;
  138. NextItem();
  139. // Set feature name.
  140. result->set_name(name);
  141. }
  142. // Parse sub-features.
  143. if (item_type_ == '.') {
  144. // Parse dotted sub-feature.
  145. NextItem();
  146. if (item_type_ != NAME) Error("Feature type name expected");
  147. string type = item_text_;
  148. NextItem();
  149. // Parse sub-feature.
  150. FeatureFunctionDescriptor *subfeature = result->add_feature();
  151. subfeature->set_type(type);
  152. ParseFeature(subfeature);
  153. } else if (item_type_ == '{') {
  154. // Parse sub-feature block.
  155. NextItem();
  156. while (item_type_ != '}') {
  157. if (item_type_ != NAME) Error("Feature type name expected");
  158. string type = item_text_;
  159. NextItem();
  160. // Parse sub-feature.
  161. FeatureFunctionDescriptor *subfeature = result->add_feature();
  162. subfeature->set_type(type);
  163. ParseFeature(subfeature);
  164. }
  165. NextItem();
  166. }
  167. }
  168. void FMLParser::ParseParameter(FeatureFunctionDescriptor *result) {
  169. if (item_type_ == NUMBER) {
  170. int argument =
  171. utils::ParseUsing<int>(item_text_, tensorflow::strings::safe_strto32);
  172. NextItem();
  173. // Set default argument for feature.
  174. result->set_argument(argument);
  175. } else if (item_type_ == NAME) {
  176. string name = item_text_;
  177. NextItem();
  178. if (item_type_ != '=') Error("= expected");
  179. NextItem();
  180. if (item_type_ >= END) Error("Parameter value expected");
  181. string value = item_text_;
  182. NextItem();
  183. // Add parameter to feature.
  184. Parameter *parameter;
  185. parameter = result->add_parameter();
  186. parameter->set_name(name);
  187. parameter->set_value(value);
  188. } else {
  189. Error("Syntax error in parameter list");
  190. }
  191. }
  192. void ToFMLFunction(const FeatureFunctionDescriptor &function, string *output) {
  193. output->append(function.type());
  194. if (function.argument() != 0 || function.parameter_size() > 0) {
  195. output->append("(");
  196. bool first = true;
  197. if (function.argument() != 0) {
  198. tensorflow::strings::StrAppend(output, function.argument());
  199. first = false;
  200. }
  201. for (int i = 0; i < function.parameter_size(); ++i) {
  202. if (!first) output->append(",");
  203. output->append(function.parameter(i).name());
  204. output->append("=");
  205. output->append("\"");
  206. output->append(function.parameter(i).value());
  207. output->append("\"");
  208. first = false;
  209. }
  210. output->append(")");
  211. }
  212. }
  213. void ToFML(const FeatureFunctionDescriptor &function, string *output) {
  214. ToFMLFunction(function, output);
  215. if (function.feature_size() == 1) {
  216. output->append(".");
  217. ToFML(function.feature(0), output);
  218. } else if (function.feature_size() > 1) {
  219. output->append(" { ");
  220. for (int i = 0; i < function.feature_size(); ++i) {
  221. if (i > 0) output->append(" ");
  222. ToFML(function.feature(i), output);
  223. }
  224. output->append(" } ");
  225. }
  226. }
  227. void ToFML(const FeatureExtractorDescriptor &extractor, string *output) {
  228. for (int i = 0; i < extractor.feature_size(); ++i) {
  229. ToFML(extractor.feature(i), output);
  230. output->append("\n");
  231. }
  232. }
  233. string AsFML(const FeatureFunctionDescriptor &function) {
  234. string str;
  235. ToFML(function, &str);
  236. return str;
  237. }
  238. string AsFML(const FeatureExtractorDescriptor &extractor) {
  239. string str;
  240. ToFML(extractor, &str);
  241. return str;
  242. }
  243. void StripFML(string *fml_string) {
  244. auto it = fml_string->begin();
  245. while (it != fml_string->end()) {
  246. if (*it == '"') {
  247. it = fml_string->erase(it);
  248. } else {
  249. ++it;
  250. }
  251. }
  252. }
  253. } // namespace syntaxnet