char_shift_transitions.h 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. /* Copyright 2016 Google Inc. All Rights Reserved.
  2. Licensed under the Apache License, Version 2.0 (the "License");
  3. you may not use this file except in compliance with the License.
  4. You may obtain a copy of the License at
  5. http://www.apache.org/licenses/LICENSE-2.0
  6. Unless required by applicable law or agreed to in writing, software
  7. distributed under the License is distributed on an "AS IS" BASIS,
  8. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. See the License for the specific language governing permissions and
  10. limitations under the License.
  11. ==============================================================================*/
  12. // Character-level shift transition system.
  13. //
  14. // This transition system has one type of action:
  15. // - The SHIFT action advances the next input pointer to the next input
  16. // character.
  17. //
  18. // For this transition system, we need a simple TransitionState that keeps track
  19. // of an input pointer into characters.
  20. #ifndef SYNTAXNET_CHAR_SHIFT_TRANSITIONS_H_
  21. #define SYNTAXNET_CHAR_SHIFT_TRANSITIONS_H_
  22. #include "syntaxnet/base.h"
  23. #include "syntaxnet/parser_features.h"
  24. #include "syntaxnet/parser_state.h"
  25. #include "syntaxnet/parser_transitions.h"
  26. #include "syntaxnet/sentence_features.h"
  27. #include "syntaxnet/shared_store.h"
  28. #include "syntaxnet/task_context.h"
  29. #include "syntaxnet/term_frequency_map.h"
  30. #include "syntaxnet/utils.h"
  31. #include "tensorflow/core/lib/strings/strcat.h"
  32. namespace syntaxnet {
  33. // CharShiftTransitionState is similar to ParserState, but operates on
  34. // character-level instead of token-level. It contains of a pointer to the next
  35. // input character.
  36. class CharShiftTransitionState : public ParserTransitionState {
  37. public:
  38. explicit CharShiftTransitionState(bool left_to_right)
  39. : left_to_right_(left_to_right) {}
  40. ParserTransitionState *Clone() const override;
  41. // Set the initial value of next in ParserState.
  42. void Init(ParserState *state) override;
  43. // Returns the index of the next input character.
  44. int Next() const;
  45. // Returns the character index relative to the next input character. If no
  46. // such character exists, returns -2.
  47. int Input(int offset) const;
  48. // Returns the character at the given index i. Returns an empty string if the
  49. // index is out of range.
  50. string GetChar(const ParserState &state, int i) const;
  51. // Sets the next input character. Useful for transition systems that do not
  52. // necessarily process characters in order.
  53. void Advance(int next);
  54. // Returns true if all characters have been processed.
  55. bool EndOfInput() const;
  56. // Returns true if the character index i is at a token start.
  57. bool IsTokenStart(int i) const;
  58. // Returns true if the character index i is at a token end.
  59. bool IsTokenEnd(int i) const;
  60. int num_chars() const { return num_chars_; }
  61. // Whether a parsed token should be considered correct for evaluation.
  62. bool IsTokenCorrect(const ParserState &state, int index) const override {
  63. return true;
  64. }
  65. // Returns a human readable string representation of this state.
  66. string ToString(const ParserState &state) const override {
  67. return "";
  68. }
  69. private:
  70. // Number of characters in the sentence.
  71. int num_chars_;
  72. // Index of the next input character.
  73. int next_;
  74. // Whether the input characters are read from left to right.
  75. const bool left_to_right_;
  76. // Int vectors both of size num_chars_ for storing character positons and
  77. // lengths (in bytes).
  78. std::vector<int> char_pos_map_;
  79. std::vector<int> char_len_map_;
  80. // Boolean vectors both of size num_chars_. token_starts[i]/token_ends[i]
  81. // is true iff the character index i is a token start/end.
  82. std::vector<bool> token_starts_;
  83. std::vector<bool> token_ends_;
  84. };
  85. class CharShiftTransitionSystem : public ParserTransitionSystem {
  86. public:
  87. static const ParserAction kShiftAction = 0;
  88. CharShiftTransitionSystem() {}
  89. // Determines the direction of the system.
  90. void Setup(TaskContext *context) override;
  91. // The shift transition system doesn't actually look at the dependency tree,
  92. // so it does allow non-projective trees.
  93. bool AllowsNonProjective() const override { return true; }
  94. // Returns the number of action types.
  95. int NumActionTypes() const override { return 1; }
  96. // Returns the number of possible actions.
  97. int NumActions(int num_labels) const override { return 1; }
  98. ParserAction GetDefaultAction(const ParserState &state) const override {
  99. return kShiftAction;
  100. }
  101. // At any time, the gold action is to shift.
  102. ParserAction GetNextGoldAction(const ParserState &state) const override {
  103. return kShiftAction;
  104. }
  105. // Checks if the action is allowed in a given parser state.
  106. bool IsAllowedAction(ParserAction action,
  107. const ParserState &state) const override;
  108. // Performs a shift by pushing the next input token on the stack and moving to
  109. // the next position.
  110. void PerformActionWithoutHistory(ParserAction action,
  111. ParserState *state) const override;
  112. bool IsFinalState(const ParserState &state) const override;
  113. // Returns a string representation of a parser action.
  114. string ActionAsString(ParserAction action,
  115. const ParserState &state) const override;
  116. // All states are deterministic in this transition system.
  117. bool IsDeterministicState(const ParserState &state) const override {
  118. return true;
  119. }
  120. // Returns a new transition state.
  121. ParserTransitionState *NewTransitionState(bool training_mode) const override;
  122. bool left_to_right() const { return left_to_right_; }
  123. private:
  124. bool left_to_right_ = true;
  125. };
  126. } // namespace syntaxnet
  127. #endif // SYNTAXNET_CHAR_SHIFT_TRANSITIONS_H_