parser_features.cc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. /* Copyright 2016 Google Inc. All Rights Reserved.
  2. Licensed under the Apache License, Version 2.0 (the "License");
  3. you may not use this file except in compliance with the License.
  4. You may obtain a copy of the License at
  5. http://www.apache.org/licenses/LICENSE-2.0
  6. Unless required by applicable law or agreed to in writing, software
  7. distributed under the License is distributed on an "AS IS" BASIS,
  8. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. See the License for the specific language governing permissions and
  10. limitations under the License.
  11. ==============================================================================*/
  12. #include "syntaxnet/parser_features.h"
  13. #include <string>
  14. #include "syntaxnet/registry.h"
  15. #include "syntaxnet/sentence_features.h"
  16. #include "syntaxnet/workspace.h"
  17. namespace syntaxnet {
  18. // Registry for the parser feature functions.
  19. REGISTER_SYNTAXNET_CLASS_REGISTRY("parser feature function",
  20. ParserFeatureFunction);
  21. // Registry for the parser state + token index feature functions.
  22. REGISTER_SYNTAXNET_CLASS_REGISTRY("parser+index feature function",
  23. ParserIndexFeatureFunction);
  24. RootFeatureType::RootFeatureType(const string &name,
  25. const FeatureType &wrapped_type,
  26. int root_value)
  27. : FeatureType(name), wrapped_type_(wrapped_type), root_value_(root_value) {}
  28. string RootFeatureType::GetFeatureValueName(FeatureValue value) const {
  29. if (value == root_value_) return "<ROOT>";
  30. return wrapped_type_.GetFeatureValueName(value);
  31. }
  32. FeatureValue RootFeatureType::GetDomainSize() const {
  33. return wrapped_type_.GetDomainSize() + 1;
  34. }
  35. // Parser feature locator for accessing the remaining input tokens in the parser
  36. // state. It takes the offset relative to the current input token as argument.
  37. // Negative values represent tokens to the left, positive values to the right
  38. // and 0 (the default argument value) represents the current input token.
  39. class InputParserLocator : public ParserLocator<InputParserLocator> {
  40. public:
  41. // Gets the new focus.
  42. int GetFocus(const WorkspaceSet &workspaces, const ParserState &state) const {
  43. const int offset = argument();
  44. return state.Input(offset);
  45. }
  46. };
  47. REGISTER_PARSER_FEATURE_FUNCTION("input", InputParserLocator);
  48. // Parser feature locator for accessing the stack in the parser state. The
  49. // argument represents the position on the stack, 0 being the top of the stack.
  50. class StackParserLocator : public ParserLocator<StackParserLocator> {
  51. public:
  52. // Gets the new focus.
  53. int GetFocus(const WorkspaceSet &workspaces, const ParserState &state) const {
  54. const int position = argument();
  55. return state.Stack(position);
  56. }
  57. };
  58. REGISTER_PARSER_FEATURE_FUNCTION("stack", StackParserLocator);
  59. // Parser feature locator for locating the head of the focus token. The argument
  60. // specifies the number of times the head function is applied. Please note that
  61. // this operates on partially built dependency trees.
  62. class HeadFeatureLocator : public ParserIndexLocator<HeadFeatureLocator> {
  63. public:
  64. // Updates the current focus to a new location. If the initial focus is
  65. // outside the range of the sentence, returns -2.
  66. void UpdateArgs(const WorkspaceSet &workspaces, const ParserState &state,
  67. int *focus) const {
  68. if (*focus < -1 || *focus >= state.sentence().token_size()) {
  69. *focus = -2;
  70. return;
  71. }
  72. const int levels = argument();
  73. *focus = state.Parent(*focus, levels);
  74. }
  75. };
  76. REGISTER_PARSER_IDX_FEATURE_FUNCTION("head", HeadFeatureLocator);
  77. // Parser feature locator for locating children of the focus token. The argument
  78. // specifies the number of times the leftmost (when the argument is < 0) or
  79. // rightmost (when the argument > 0) child function is applied. Please note that
  80. // this operates on partially built dependency trees.
  81. class ChildFeatureLocator : public ParserIndexLocator<ChildFeatureLocator> {
  82. public:
  83. // Updates the current focus to a new location. If the initial focus is
  84. // outside the range of the sentence, returns -2.
  85. void UpdateArgs(const WorkspaceSet &workspaces, const ParserState &state,
  86. int *focus) const {
  87. if (*focus < -1 || *focus >= state.sentence().token_size()) {
  88. *focus = -2;
  89. return;
  90. }
  91. const int levels = argument();
  92. if (levels < 0) {
  93. *focus = state.LeftmostChild(*focus, -levels);
  94. } else {
  95. *focus = state.RightmostChild(*focus, levels);
  96. }
  97. }
  98. };
  99. REGISTER_PARSER_IDX_FEATURE_FUNCTION("child", ChildFeatureLocator);
  100. // Parser feature locator for locating siblings of the focus token. The argument
  101. // specifies the sibling position relative to the focus token: a negative value
  102. // triggers a search to the left, while a positive value one to the right.
  103. // Please note that this operates on partially built dependency trees.
  104. class SiblingFeatureLocator
  105. : public ParserIndexLocator<SiblingFeatureLocator> {
  106. public:
  107. // Updates the current focus to a new location. If the initial focus is
  108. // outside the range of the sentence, returns -2.
  109. void UpdateArgs(const WorkspaceSet &workspaces, const ParserState &state,
  110. int *focus) const {
  111. if (*focus < -1 || *focus >= state.sentence().token_size()) {
  112. *focus = -2;
  113. return;
  114. }
  115. const int position = argument();
  116. if (position < 0) {
  117. *focus = state.LeftSibling(*focus, -position);
  118. } else {
  119. *focus = state.RightSibling(*focus, position);
  120. }
  121. }
  122. };
  123. REGISTER_PARSER_IDX_FEATURE_FUNCTION("sibling", SiblingFeatureLocator);
  124. // Feature function for computing the label from focus token. Note that this
  125. // does not use the precomputed values, since we get the labels from the stack;
  126. // the reason it utilizes sentence_features::Label is to obtain the label map.
  127. class LabelFeatureFunction : public BasicParserSentenceFeatureFunction<Label> {
  128. public:
  129. // Computes the label of the relation between the focus token and its parent.
  130. // Valid focus values range from -1 to sentence->size() - 1, inclusively.
  131. FeatureValue Compute(const WorkspaceSet &workspaces, const ParserState &state,
  132. int focus, const FeatureVector *result) const override {
  133. if (focus == -1) return RootValue();
  134. if (focus < -1 || focus >= state.sentence().token_size()) {
  135. return feature_.NumValues();
  136. }
  137. const int label = state.Label(focus);
  138. return label == -1 ? RootValue() : label;
  139. }
  140. };
  141. REGISTER_PARSER_IDX_FEATURE_FUNCTION("label", LabelFeatureFunction);
  142. typedef BasicParserSentenceFeatureFunction<Word> WordFeatureFunction;
  143. REGISTER_PARSER_IDX_FEATURE_FUNCTION("word", WordFeatureFunction);
  144. typedef BasicParserSentenceFeatureFunction<KnownWord> KnownWordFeatureFunction;
  145. REGISTER_PARSER_IDX_FEATURE_FUNCTION("known-word", KnownWordFeatureFunction);
  146. typedef BasicParserSentenceFeatureFunction<Char> CharFeatureFunction;
  147. REGISTER_PARSER_IDX_FEATURE_FUNCTION("char", CharFeatureFunction);
  148. typedef BasicParserSentenceFeatureFunction<Tag> TagFeatureFunction;
  149. REGISTER_PARSER_IDX_FEATURE_FUNCTION("tag", TagFeatureFunction);
  150. typedef BasicParserSentenceFeatureFunction<Digit> DigitFeatureFunction;
  151. REGISTER_PARSER_IDX_FEATURE_FUNCTION("digit", DigitFeatureFunction);
  152. typedef BasicParserSentenceFeatureFunction<Hyphen> HyphenFeatureFunction;
  153. REGISTER_PARSER_IDX_FEATURE_FUNCTION("hyphen", HyphenFeatureFunction);
  154. typedef BasicParserSentenceFeatureFunction<Capitalization>
  155. CapitalizationFeatureFunction;
  156. REGISTER_PARSER_IDX_FEATURE_FUNCTION("capitalization",
  157. CapitalizationFeatureFunction);
  158. typedef BasicParserSentenceFeatureFunction<PunctuationAmount>
  159. PunctuationAmountFeatureFunction;
  160. REGISTER_PARSER_IDX_FEATURE_FUNCTION("punctuation-amount",
  161. PunctuationAmountFeatureFunction);
  162. typedef BasicParserSentenceFeatureFunction<Quote>
  163. QuoteFeatureFunction;
  164. REGISTER_PARSER_IDX_FEATURE_FUNCTION("quote",
  165. QuoteFeatureFunction);
  166. typedef BasicParserSentenceFeatureFunction<PrefixFeature> PrefixFeatureFunction;
  167. REGISTER_PARSER_IDX_FEATURE_FUNCTION("prefix", PrefixFeatureFunction);
  168. typedef BasicParserSentenceFeatureFunction<SuffixFeature> SuffixFeatureFunction;
  169. REGISTER_PARSER_IDX_FEATURE_FUNCTION("suffix", SuffixFeatureFunction);
  170. // Parser feature function that can use nested sentence feature functions for
  171. // feature extraction.
  172. class ParserTokenFeatureFunction
  173. : public NestedFeatureFunction<FeatureFunction<Sentence, int>, ParserState,
  174. int> {
  175. public:
  176. void Preprocess(WorkspaceSet *workspaces, ParserState *state) const override {
  177. for (auto *function : nested_) {
  178. function->Preprocess(workspaces, state->mutable_sentence());
  179. }
  180. }
  181. void Evaluate(const WorkspaceSet &workspaces, const ParserState &state,
  182. int focus, FeatureVector *result) const override {
  183. for (auto *function : nested_) {
  184. function->Evaluate(workspaces, state.sentence(), focus, result);
  185. }
  186. }
  187. // Returns the first nested feature's computed value.
  188. FeatureValue Compute(const WorkspaceSet &workspaces, const ParserState &state,
  189. int focus, const FeatureVector *result) const override {
  190. if (nested_.empty()) return kNone;
  191. return nested_[0]->Compute(workspaces, state.sentence(), focus, result);
  192. }
  193. };
  194. REGISTER_PARSER_IDX_FEATURE_FUNCTION("token", ParserTokenFeatureFunction);
  195. class ParserWholeSentenceFeatureFunction
  196. : public NestedFeatureFunction<FeatureFunction<Sentence>, ParserState> {
  197. public:
  198. void Preprocess(WorkspaceSet *workspaces, ParserState *state) const override {
  199. for (auto *function : nested_) {
  200. function->Preprocess(workspaces, state->mutable_sentence());
  201. }
  202. }
  203. void Evaluate(const WorkspaceSet &workspaces, const ParserState &state,
  204. FeatureVector *result) const override {
  205. for (auto *function : nested_) {
  206. function->Evaluate(workspaces, state.sentence(), result);
  207. }
  208. }
  209. // Returns the first nested feature's computed value.
  210. FeatureValue Compute(const WorkspaceSet &workspaces, const ParserState &state,
  211. const FeatureVector *result) const override {
  212. if (nested_.empty()) return kNone;
  213. return nested_[0]->Compute(workspaces, state.sentence(), result);
  214. }
  215. };
  216. REGISTER_PARSER_FEATURE_FUNCTION("sentence",
  217. ParserWholeSentenceFeatureFunction);
  218. // Parser feature that always fetches the focus (position) of the token.
  219. class FocusFeatureFunction : public ParserIndexFeatureFunction {
  220. public:
  221. // Initializes the feature function.
  222. void Init(TaskContext *context) override {
  223. // Note: this feature can return up to N values, where N is the length of
  224. // the input sentence. Here, we give the arbitrary number 100 since it
  225. // is not used.
  226. set_feature_type(new NumericFeatureType(name(), 100));
  227. }
  228. void Evaluate(const WorkspaceSet &workspaces, const ParserState &object,
  229. int focus, FeatureVector *result) const override {
  230. FeatureValue value = focus;
  231. result->add(feature_type(), value);
  232. }
  233. FeatureValue Compute(const WorkspaceSet &workspaces, const ParserState &state,
  234. int focus, const FeatureVector *result) const override {
  235. return focus;
  236. }
  237. };
  238. REGISTER_PARSER_IDX_FEATURE_FUNCTION("focus", FocusFeatureFunction);
  239. // Parser feature that returns the gold head of the token.
  240. class GoldHeadFeatureFunction : public FocusFeatureFunction {
  241. public:
  242. void Evaluate(const WorkspaceSet &workspaces, const ParserState &state,
  243. int focus, FeatureVector *result) const override {
  244. if (focus >= -1 && focus < state.NumTokens()) focus = state.GoldHead(focus);
  245. result->add(feature_type(), focus);
  246. }
  247. FeatureValue Compute(const WorkspaceSet &workspaces, const ParserState &state,
  248. int focus, const FeatureVector *result) const override {
  249. if (focus >= -1 && focus < state.NumTokens()) focus = state.GoldHead(focus);
  250. return focus;
  251. }
  252. };
  253. REGISTER_PARSER_IDX_FEATURE_FUNCTION("gold-head", GoldHeadFeatureFunction);
  254. // Parser feature returning a previous predicted action.
  255. class LastActionFeatureFunction : public ParserFeatureFunction {
  256. public:
  257. void Init(TaskContext *context) override {
  258. // NB: The "100" here is totally bogus, but it doesn't matter if predicate
  259. // maps will be used.
  260. set_feature_type(new NumericFeatureType(name(), 100));
  261. }
  262. // Turn on history tracking for the parser state to get the history of
  263. // features.
  264. void Preprocess(WorkspaceSet *workspaces, ParserState *state) const override {
  265. state->set_keep_history(true);
  266. }
  267. // Returns '0' for no prior action, otherwise returns the action.
  268. FeatureValue Compute(const WorkspaceSet &workspaces, const ParserState &state,
  269. const FeatureVector *result) const override {
  270. const int history_size = state.history().size();
  271. const int offset = history_size - argument() - 1;
  272. if (offset < 0 || offset >= history_size) return 0;
  273. return state.history().at(offset) + 1;
  274. }
  275. };
  276. REGISTER_PARSER_FEATURE_FUNCTION("last-action", LastActionFeatureFunction);
  277. class Constant : public ParserFeatureFunction {
  278. public:
  279. void Init(TaskContext *context) override {
  280. value_ = this->GetIntParameter("value", 0);
  281. this->set_feature_type(new NumericFeatureType(this->name(), value_ + 1));
  282. }
  283. // Returns the constant's value.
  284. FeatureValue Compute(const WorkspaceSet &workspaces, const ParserState &state,
  285. const FeatureVector *result) const override {
  286. return value_;
  287. }
  288. private:
  289. int value_ = 0;
  290. };
  291. REGISTER_PARSER_FEATURE_FUNCTION("constant", Constant);
  292. } // namespace syntaxnet