sentence_features.h 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669
  1. /* Copyright 2016 Google Inc. All Rights Reserved.
  2. Licensed under the Apache License, Version 2.0 (the "License");
  3. you may not use this file except in compliance with the License.
  4. You may obtain a copy of the License at
  5. http://www.apache.org/licenses/LICENSE-2.0
  6. Unless required by applicable law or agreed to in writing, software
  7. distributed under the License is distributed on an "AS IS" BASIS,
  8. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. See the License for the specific language governing permissions and
  10. limitations under the License.
  11. ==============================================================================*/
  12. // Features that operate on Sentence objects. Most features are defined
  13. // in this header so they may be re-used via composition into other more
  14. // advanced feature classes.
  15. #ifndef SYNTAXNET_SENTENCE_FEATURES_H_
  16. #define SYNTAXNET_SENTENCE_FEATURES_H_
  17. #include <map>
  18. #include <string>
  19. #include "syntaxnet/affix.h"
  20. #include "syntaxnet/char_ngram_string_extractor.h"
  21. #include "syntaxnet/feature_extractor.h"
  22. #include "syntaxnet/feature_types.h"
  23. #include "syntaxnet/segmenter_utils.h"
  24. #include "syntaxnet/shared_store.h"
  25. #include "syntaxnet/task_context.h"
  26. #include "syntaxnet/workspace.h"
  27. namespace syntaxnet {
  28. // Feature function for any component that processes Sentences, whose
  29. // focus is a token index into the sentence.
  30. typedef FeatureFunction<Sentence, int> SentenceFeature;
  31. // Alias for Locator type features that take (Sentence, int) signatures
  32. // and call other (Sentence, int) features.
  33. template <class DER>
  34. using Locator = FeatureLocator<DER, Sentence, int>;
  35. class TokenLookupFeature : public SentenceFeature {
  36. public:
  37. void Init(TaskContext *context) override {
  38. std::map<FeatureValue, string> special_values;
  39. if (use_outside_) {
  40. outside_value_ = NumValues();
  41. special_values[outside_value_] = "<OUTSIDE>";
  42. }
  43. set_feature_type(new ResourceBasedFeatureType<TokenLookupFeature>(
  44. name(), this, special_values));
  45. }
  46. // Given a position in a sentence and workspaces, looks up the corresponding
  47. // feature value. The index is relative to the start of the sentence.
  48. virtual FeatureValue ComputeValue(const Token &token) const = 0;
  49. // Number of unique values.
  50. virtual int64 NumValues() const = 0;
  51. // Convert the numeric value of the feature to a human readable string.
  52. virtual string GetFeatureValueName(FeatureValue value) const = 0;
  53. // Name of the shared workspace.
  54. virtual string WorkspaceName() const = 0;
  55. // Runs ComputeValue for each token in the sentence.
  56. void Preprocess(WorkspaceSet *workspaces,
  57. Sentence *sentence) const override {
  58. if (workspaces->Has<VectorIntWorkspace>(workspace_)) return;
  59. VectorIntWorkspace *workspace = new VectorIntWorkspace(
  60. sentence->token_size());
  61. for (int i = 0; i < sentence->token_size(); ++i) {
  62. const int value = ComputeValue(sentence->token(i));
  63. workspace->set_element(i, value);
  64. }
  65. workspaces->Set<VectorIntWorkspace>(workspace_, workspace);
  66. }
  67. // Requests a vector of int's to store in the workspace registry.
  68. void RequestWorkspaces(WorkspaceRegistry *registry) override {
  69. workspace_ = registry->Request<VectorIntWorkspace>(WorkspaceName());
  70. }
  71. // Returns the precomputed value, or |outside_value_| for features outside the
  72. // sentence.
  73. FeatureValue Compute(const WorkspaceSet &workspaces,
  74. const Sentence &sentence, int focus,
  75. const FeatureVector *result) const override {
  76. if (focus < 0 || focus >= sentence.token_size()) return outside_value_;
  77. return workspaces.Get<VectorIntWorkspace>(workspace_).element(focus);
  78. }
  79. int Workspace() const { return workspace_; }
  80. protected:
  81. // Sets whether this feature extracts a special value for outside tokens.
  82. // Should be called before Init().
  83. void set_use_outside(bool use) { use_outside_ = use; }
  84. private:
  85. int workspace_;
  86. bool use_outside_ = true;
  87. FeatureValue outside_value_ = kNone;
  88. };
  89. // A multi purpose specialization of the feature. Processes the tokens in a
  90. // Sentence by looking up a value set for each token and storing that in
  91. // a VectorVectorInt workspace. Given a set of base values of size Size(),
  92. // reserves an extra value for unknown tokens.
  93. class TokenLookupSetFeature : public SentenceFeature {
  94. public:
  95. void Init(TaskContext *context) override {
  96. set_feature_type(new ResourceBasedFeatureType<TokenLookupSetFeature>(
  97. name(), this, {{NumValues(), "<OUTSIDE>"}}));
  98. }
  99. // Number of unique values.
  100. virtual int64 NumValues() const = 0;
  101. // Given a position in a sentence and workspaces, looks up the corresponding
  102. // feature value set. The index is relative to the start of the sentence.
  103. virtual void LookupToken(const WorkspaceSet &workspaces,
  104. const Sentence &sentence, int index,
  105. std::vector<int> *values) const = 0;
  106. // Given a feature value, returns a string representation.
  107. virtual string GetFeatureValueName(int value) const = 0;
  108. // Name of the shared workspace.
  109. virtual string WorkspaceName() const = 0;
  110. // TokenLookupSetFeatures use VectorVectorIntWorkspaces by default.
  111. void RequestWorkspaces(WorkspaceRegistry *registry) override {
  112. workspace_ = registry->Request<VectorVectorIntWorkspace>(WorkspaceName());
  113. }
  114. // Default preprocessing: looks up a value set for each token in the Sentence.
  115. void Preprocess(WorkspaceSet *workspaces, Sentence *sentence) const override {
  116. // Default preprocessing: lookup a value set for each token in the Sentence.
  117. if (workspaces->Has<VectorVectorIntWorkspace>(workspace_)) return;
  118. VectorVectorIntWorkspace *workspace =
  119. new VectorVectorIntWorkspace(sentence->token_size());
  120. for (int i = 0; i < sentence->token_size(); ++i) {
  121. LookupToken(*workspaces, *sentence, i, workspace->mutable_elements(i));
  122. }
  123. workspaces->Set<VectorVectorIntWorkspace>(workspace_, workspace);
  124. }
  125. // Returns a pre-computed token value from the cache. This assumes the cache
  126. // is populated.
  127. const std::vector<int> &GetCachedValueSet(const WorkspaceSet &workspaces,
  128. const Sentence &sentence,
  129. int focus) const {
  130. // Do bounds checking on focus.
  131. CHECK_GE(focus, 0);
  132. CHECK_LT(focus, sentence.token_size());
  133. // Return value from cache.
  134. return workspaces.Get<VectorVectorIntWorkspace>(workspace_).elements(focus);
  135. }
  136. // Adds any precomputed features at the given focus, if present.
  137. void Evaluate(const WorkspaceSet &workspaces, const Sentence &sentence,
  138. int focus, FeatureVector *result) const override {
  139. if (focus >= 0 && focus < sentence.token_size()) {
  140. const std::vector<int> &elements =
  141. GetCachedValueSet(workspaces, sentence, focus);
  142. for (auto &value : elements) {
  143. result->add(this->feature_type(), value);
  144. }
  145. }
  146. }
  147. // Returns the precomputed value, or NumValues() for features outside
  148. // the sentence.
  149. FeatureValue Compute(const WorkspaceSet &workspaces, const Sentence &sentence,
  150. int focus, const FeatureVector *result) const override {
  151. if (focus < 0 || focus >= sentence.token_size()) return NumValues();
  152. return workspaces.Get<VectorIntWorkspace>(workspace_).element(focus);
  153. }
  154. private:
  155. int workspace_;
  156. };
  157. // Lookup feature that uses a TermFrequencyMap to store a string->int mapping.
  158. class TermFrequencyMapFeature : public TokenLookupFeature {
  159. public:
  160. explicit TermFrequencyMapFeature(const string &input_name)
  161. : input_name_(input_name), min_freq_(0), max_num_terms_(0) {}
  162. ~TermFrequencyMapFeature() override;
  163. // Requests the input map as a resource.
  164. void Setup(TaskContext *context) override;
  165. // Loads the input map into memory (using SharedStore to avoid redundancy.)
  166. void Init(TaskContext *context) override;
  167. // Number of unique values.
  168. int64 NumValues() const override { return term_map_->Size() + 1; }
  169. // Special value for strings not in the map.
  170. FeatureValue UnknownValue() const { return term_map_->Size(); }
  171. // Uses the TermFrequencyMap to lookup the string associated with a value.
  172. string GetFeatureValueName(FeatureValue value) const override;
  173. // Name of the shared workspace.
  174. string WorkspaceName() const override;
  175. protected:
  176. const TermFrequencyMap &term_map() const { return *term_map_; }
  177. private:
  178. // Shortcut pointer to shared map. Not owned.
  179. const TermFrequencyMap *term_map_ = nullptr;
  180. // Name of the input for the term map.
  181. string input_name_;
  182. // Filename of the underlying resource.
  183. string file_name_;
  184. // Minimum frequency for term map.
  185. int min_freq_;
  186. // Maximum number of terms for term map.
  187. int max_num_terms_;
  188. };
  189. // Specialization of the TokenLookupSetFeature class to use a TermFrequencyMap
  190. // to perform the mapping. This takes two options: "min_freq" (discard tokens
  191. // with less than this min frequency), and "max_num_terms" (only read in at most
  192. // these terms.)
  193. class TermFrequencyMapSetFeature : public TokenLookupSetFeature {
  194. public:
  195. // Initializes with an empty name, since we need the options to compute the
  196. // actual workspace name.
  197. explicit TermFrequencyMapSetFeature(const string &input_name)
  198. : input_name_(input_name), min_freq_(0), max_num_terms_(0) {}
  199. // Releases shared resources.
  200. ~TermFrequencyMapSetFeature() override;
  201. // Returns index of raw word text.
  202. virtual void GetTokenIndices(const Token &token,
  203. std::vector<int> *values) const = 0;
  204. // Requests the resource inputs.
  205. void Setup(TaskContext *context) override;
  206. // Obtains resources using the shared store. At this point options are known
  207. // so the full name can be computed.
  208. void Init(TaskContext *context) override;
  209. // Number of unique values.
  210. int64 NumValues() const override { return term_map_->Size(); }
  211. // Special value for strings not in the map.
  212. FeatureValue UnknownValue() const { return term_map_->Size(); }
  213. // Gets pointer to the underlying map.
  214. const TermFrequencyMap *term_map() const { return term_map_; }
  215. // Returns the term index or the unknown value. Used inside GetTokenIndex()
  216. // specializations for convenience.
  217. int LookupIndex(const string &term) const {
  218. return term_map_->LookupIndex(term, -1);
  219. }
  220. // Given a position in a sentence and workspaces, looks up the corresponding
  221. // feature value set. The index is relative to the start of the sentence.
  222. void LookupToken(const WorkspaceSet &workspaces, const Sentence &sentence,
  223. int index, std::vector<int> *values) const override {
  224. GetTokenIndices(sentence.token(index), values);
  225. }
  226. // Uses the TermFrequencyMap to lookup the string associated with a value.
  227. string GetFeatureValueName(int value) const override {
  228. if (value == UnknownValue()) return "<UNKNOWN>";
  229. if (value >= 0 && value < NumValues()) {
  230. return term_map_->GetTerm(value);
  231. }
  232. LOG(ERROR) << "Invalid feature value: " << value;
  233. return "<INVALID>";
  234. }
  235. // Name of the shared workspace.
  236. string WorkspaceName() const override;
  237. private:
  238. // Shortcut pointer to shared map. Not owned.
  239. const TermFrequencyMap *term_map_ = nullptr;
  240. // Name of the input for the term map.
  241. string input_name_;
  242. // Filename of the underlying resource.
  243. string file_name_;
  244. // Minimum frequency for term map.
  245. int min_freq_;
  246. // Maximum number of terms for term map.
  247. int max_num_terms_;
  248. };
  249. class Word : public TermFrequencyMapFeature {
  250. public:
  251. Word() : TermFrequencyMapFeature("word-map") {}
  252. FeatureValue ComputeValue(const Token &token) const override {
  253. const string &form = token.word();
  254. return term_map().LookupIndex(form, UnknownValue());
  255. }
  256. };
  257. // Like Word, but extracts nothing for outside or unknown words.
  258. class KnownWord : public TermFrequencyMapFeature {
  259. public:
  260. KnownWord() : TermFrequencyMapFeature("known-word-map") {
  261. set_use_outside(false);
  262. }
  263. // Returns the number of terms, with no room for additional feature values.
  264. int64 NumValues() const override { return term_map().Size(); }
  265. // Returns -1 for unknown words, so no features are extracted.
  266. FeatureValue ComputeValue(const Token &token) const override {
  267. const string &form = token.word();
  268. return term_map().LookupIndex(form, -1);
  269. }
  270. };
  271. class Char : public TermFrequencyMapFeature {
  272. public:
  273. Char() : TermFrequencyMapFeature("char-map") {}
  274. FeatureValue ComputeValue(const Token &token) const override {
  275. const string &form = token.word();
  276. if (SegmenterUtils::IsBreakChar(form)) return BreakCharValue();
  277. return term_map().LookupIndex(form, UnknownValue());
  278. }
  279. // Special value for breaks.
  280. FeatureValue BreakCharValue() const { return term_map().Size(); }
  281. // Special value for non-break strings not in the map.
  282. FeatureValue UnknownValue() const { return term_map().Size() + 1; }
  283. // Number of unique values.
  284. int64 NumValues() const override { return term_map().Size() + 2; }
  285. string GetFeatureValueName(FeatureValue value) const override {
  286. if (value == BreakCharValue()) return "<BREAK_CHAR>";
  287. if (value == UnknownValue()) return "<UNKNOWN>";
  288. if (value >= 0 && value < term_map().Size()) {
  289. return term_map().GetTerm(value);
  290. }
  291. LOG(ERROR) << "Invalid feature value: " << value;
  292. return "<INVALID>";
  293. }
  294. };
  295. class LowercaseWord : public TermFrequencyMapFeature {
  296. public:
  297. LowercaseWord() : TermFrequencyMapFeature("lc-word-map") {}
  298. FeatureValue ComputeValue(const Token &token) const override {
  299. const string lcword = utils::Lowercase(token.word());
  300. return term_map().LookupIndex(lcword, UnknownValue());
  301. }
  302. };
  303. class Tag : public TermFrequencyMapFeature {
  304. public:
  305. Tag() : TermFrequencyMapFeature("tag-map") {}
  306. FeatureValue ComputeValue(const Token &token) const override {
  307. return term_map().LookupIndex(token.tag(), UnknownValue());
  308. }
  309. };
  310. class Label : public TermFrequencyMapFeature {
  311. public:
  312. Label() : TermFrequencyMapFeature("label-map") {}
  313. FeatureValue ComputeValue(const Token &token) const override {
  314. return term_map().LookupIndex(token.label(), UnknownValue());
  315. }
  316. };
  317. class CharNgram : public TermFrequencyMapSetFeature {
  318. public:
  319. CharNgram() : TermFrequencyMapSetFeature("char-ngram-map") {}
  320. ~CharNgram() override {}
  321. void Setup(TaskContext *context) override;
  322. string WorkspaceName() const override;
  323. void GetTokenIndices(const Token &token,
  324. std::vector<int> *values) const override;
  325. private:
  326. // Extractor that implements the feature.
  327. CharNgramStringExtractor extractor_;
  328. };
  329. class MorphologySet : public TermFrequencyMapSetFeature {
  330. public:
  331. MorphologySet() : TermFrequencyMapSetFeature("morphology-map") {}
  332. ~MorphologySet() override {}
  333. void Setup(TaskContext *context) override {
  334. TermFrequencyMapSetFeature::Setup(context);
  335. }
  336. int64 NumValues() const override {
  337. return term_map()->Size() - 1;
  338. }
  339. // Returns index of raw word text.
  340. void GetTokenIndices(const Token &token,
  341. std::vector<int> *values) const override;
  342. };
  343. class LexicalCategoryFeature : public TokenLookupFeature {
  344. public:
  345. LexicalCategoryFeature(const string &name, int cardinality)
  346. : name_(name), cardinality_(cardinality) {}
  347. ~LexicalCategoryFeature() override {}
  348. FeatureValue NumValues() const override { return cardinality_; }
  349. // Returns the identifier for the workspace for this feature.
  350. string WorkspaceName() const override {
  351. return tensorflow::strings::StrCat(name_, ":", cardinality_);
  352. }
  353. private:
  354. // Name of the category type.
  355. const string name_;
  356. // Number of values.
  357. const int cardinality_;
  358. };
  359. // Feature that computes whether a word has a hyphen or not.
  360. class Hyphen : public LexicalCategoryFeature {
  361. public:
  362. // Enumeration of values.
  363. enum Category {
  364. NO_HYPHEN = 0,
  365. HAS_HYPHEN = 1,
  366. CARDINALITY = 2,
  367. };
  368. // Default constructor.
  369. Hyphen() : LexicalCategoryFeature("hyphen", CARDINALITY) {}
  370. // Returns a string representation of the enum value.
  371. string GetFeatureValueName(FeatureValue value) const override;
  372. // Returns the category value for the token.
  373. FeatureValue ComputeValue(const Token &token) const override;
  374. };
  375. // Feature that categorizes the capitalization of the word. If the option
  376. // utf8=true is specified, lowercase and uppercase checks are done with UTF8
  377. // compliant functions.
  378. class Capitalization : public LexicalCategoryFeature {
  379. public:
  380. // Enumeration of values.
  381. enum Category {
  382. LOWERCASE = 0, // normal word
  383. UPPERCASE = 1, // all-caps
  384. CAPITALIZED = 2, // has one cap and one non-cap
  385. CAPITALIZED_SENTENCE_INITIAL = 3, // same as above but sentence-initial
  386. NON_ALPHABETIC = 4, // contains no alphabetic characters
  387. CARDINALITY = 5,
  388. };
  389. // Default constructor.
  390. Capitalization() : LexicalCategoryFeature("capitalization", CARDINALITY) {}
  391. // Sets one of the options for the capitalization.
  392. void Setup(TaskContext *context) override;
  393. // Capitalization needs special preprocessing because token category can
  394. // depend on whether the token is at the start of the sentence.
  395. void Preprocess(WorkspaceSet *workspaces, Sentence *sentence) const override;
  396. // Returns a string representation of the enum value.
  397. string GetFeatureValueName(FeatureValue value) const override;
  398. // Returns the category value for the token.
  399. FeatureValue ComputeValue(const Token &token) const override {
  400. LOG(FATAL) << "Capitalization should use ComputeValueWithFocus.";
  401. return 0;
  402. }
  403. // Returns the category value for the token.
  404. FeatureValue ComputeValueWithFocus(const Token &token, int focus) const;
  405. private:
  406. // Whether to use UTF8 compliant functions to check capitalization.
  407. bool utf8_ = false;
  408. };
  409. // A feature for computing whether the focus token contains any punctuation
  410. // for ternary features.
  411. class PunctuationAmount : public LexicalCategoryFeature {
  412. public:
  413. // Enumeration of values.
  414. enum Category {
  415. NO_PUNCTUATION = 0,
  416. SOME_PUNCTUATION = 1,
  417. ALL_PUNCTUATION = 2,
  418. CARDINALITY = 3,
  419. };
  420. // Default constructor.
  421. PunctuationAmount()
  422. : LexicalCategoryFeature("punctuation-amount", CARDINALITY) {}
  423. // Returns a string representation of the enum value.
  424. string GetFeatureValueName(FeatureValue value) const override;
  425. // Returns the category value for the token.
  426. FeatureValue ComputeValue(const Token &token) const override;
  427. };
  428. // A feature for a feature that returns whether the word is an open or
  429. // close quotation mark, based on its relative position to other quotation marks
  430. // in the sentence.
  431. class Quote : public LexicalCategoryFeature {
  432. public:
  433. // Enumeration of values.
  434. enum Category {
  435. NO_QUOTE = 0,
  436. OPEN_QUOTE = 1,
  437. CLOSE_QUOTE = 2,
  438. UNKNOWN_QUOTE = 3,
  439. CARDINALITY = 4,
  440. };
  441. // Default constructor.
  442. Quote() : LexicalCategoryFeature("quote", CARDINALITY) {}
  443. // Returns a string representation of the enum value.
  444. string GetFeatureValueName(FeatureValue value) const override;
  445. // Returns the category value for the token.
  446. FeatureValue ComputeValue(const Token &token) const override;
  447. // Override preprocess to compute open and close quotes from prior context of
  448. // the sentence.
  449. void Preprocess(WorkspaceSet *workspaces, Sentence *instance) const override;
  450. };
  451. // Feature that computes whether a word has digits or not.
  452. class Digit : public LexicalCategoryFeature {
  453. public:
  454. // Enumeration of values.
  455. enum Category {
  456. NO_DIGIT = 0,
  457. SOME_DIGIT = 1,
  458. ALL_DIGIT = 2,
  459. CARDINALITY = 3,
  460. };
  461. // Default constructor.
  462. Digit() : LexicalCategoryFeature("digit", CARDINALITY) {}
  463. // Returns a string representation of the enum value.
  464. string GetFeatureValueName(FeatureValue value) const override;
  465. // Returns the category value for the token.
  466. FeatureValue ComputeValue(const Token &token) const override;
  467. };
  468. // TokenLookupFeature object to compute prefixes and suffixes of words. The
  469. // AffixTable is stored in the SharedStore. This is very similar to the
  470. // implementation of TermFrequencyMapFeature, but using an AffixTable to
  471. // perform the lookups. There are only two specializations, for prefixes and
  472. // suffixes.
  473. class AffixTableFeature : public TokenLookupFeature {
  474. public:
  475. // Explicit constructor to set the type of the table. This determines the
  476. // requested input.
  477. explicit AffixTableFeature(AffixTable::Type type);
  478. ~AffixTableFeature() override;
  479. // Requests inputs for the affix table.
  480. void Setup(TaskContext *context) override;
  481. // Loads the affix table from the SharedStore.
  482. void Init(TaskContext *context) override;
  483. // The workspace name is specific to which affix length we are computing.
  484. string WorkspaceName() const override;
  485. // Returns the total number of affixes in the table, regardless of specified
  486. // length.
  487. FeatureValue NumValues() const override { return affix_table_->size() + 1; }
  488. // Special value for strings not in the map.
  489. FeatureValue UnknownValue() const { return affix_table_->size(); }
  490. // Looks up the affix for a given word.
  491. FeatureValue ComputeValue(const Token &token) const override;
  492. // Returns the string associated with a value.
  493. string GetFeatureValueName(FeatureValue value) const override;
  494. private:
  495. // Size parameter for the affix table.
  496. int affix_length_;
  497. // Name of the input for the table.
  498. string input_name_;
  499. // The type of the affix table.
  500. const AffixTable::Type type_;
  501. // Affix table used for indexing. This comes from the shared store, and is not
  502. // owned directly.
  503. const AffixTable *affix_table_ = nullptr;
  504. };
  505. // Specific instantiation for computing prefixes. This requires the input
  506. // "prefix-table".
  507. class PrefixFeature : public AffixTableFeature {
  508. public:
  509. PrefixFeature() : AffixTableFeature(AffixTable::PREFIX) {}
  510. };
  511. // Specific instantiation for computing suffixes. Requires the input
  512. // "suffix-table."
  513. class SuffixFeature : public AffixTableFeature {
  514. public:
  515. SuffixFeature() : AffixTableFeature(AffixTable::SUFFIX) {}
  516. };
  517. // Offset locator. Simple locator: just changes the focus by some offset.
  518. class Offset : public Locator<Offset> {
  519. public:
  520. void UpdateArgs(const WorkspaceSet &workspaces,
  521. const Sentence &sentence, int *focus) const {
  522. *focus += argument();
  523. }
  524. };
  525. typedef FeatureExtractor<Sentence, int> SentenceExtractor;
  526. // Utility to register the sentence_instance::Feature functions.
  527. #define REGISTER_SENTENCE_IDX_FEATURE(name, type) \
  528. REGISTER_SYNTAXNET_FEATURE_FUNCTION(SentenceFeature, name, type)
  529. } // namespace syntaxnet
  530. #endif // SYNTAXNET_SENTENCE_FEATURES_H_