syntaxnet_component.h 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. // Copyright 2017 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // =============================================================================
  15. #ifndef NLP_SAFT_OPENSOURCE_DRAGNN_COMPONENTS_SYNTAXNET_SYNTAXNET_COMPONENT_H_
  16. #define NLP_SAFT_OPENSOURCE_DRAGNN_COMPONENTS_SYNTAXNET_SYNTAXNET_COMPONENT_H_
  17. #include <vector>
  18. #include "dragnn/components/syntaxnet/syntaxnet_link_feature_extractor.h"
  19. #include "dragnn/components/syntaxnet/syntaxnet_transition_state.h"
  20. #include "dragnn/components/util/bulk_feature_extractor.h"
  21. #include "dragnn/core/beam.h"
  22. #include "dragnn/core/input_batch_cache.h"
  23. #include "dragnn/core/interfaces/component.h"
  24. #include "dragnn/core/interfaces/transition_state.h"
  25. #include "dragnn/protos/data.pb.h"
  26. #include "dragnn/protos/spec.pb.h"
  27. #include "dragnn/protos/trace.pb.h"
  28. #include "syntaxnet/base.h"
  29. #include "syntaxnet/parser_transitions.h"
  30. #include "syntaxnet/registry.h"
  31. #include "syntaxnet/task_context.h"
  32. namespace syntaxnet {
  33. namespace dragnn {
  34. class SyntaxNetComponent : public Component {
  35. public:
  36. // Create a SyntaxNet-backed DRAGNN component.
  37. SyntaxNetComponent();
  38. // Initializes this component from the spec.
  39. void InitializeComponent(const ComponentSpec &spec) override;
  40. // Provides the previous beam to the component.
  41. void InitializeData(
  42. const std::vector<std::vector<const TransitionState *>> &states,
  43. int max_beam_size, InputBatchCache *input_data) override;
  44. // Returns true if the component has had InitializeData called on it since
  45. // the last time it was reset.
  46. bool IsReady() const override;
  47. // Returns the string name of this component.
  48. string Name() const override;
  49. // Returns the number of steps taken by the given batch in this component.
  50. int StepsTaken(int batch_index) const override;
  51. // Returns the current batch size of the component's underlying data.
  52. int BatchSize() const override;
  53. // Returns the maximum beam size of this component.
  54. int BeamSize() const override;
  55. // Return the beam index of the item which is currently at index
  56. // 'index', when the beam was at step 'step', for batch element 'batch'.
  57. int GetBeamIndexAtStep(int step, int current_index, int batch) const override;
  58. // Return the source index of the item which is currently at index 'index'
  59. // for batch element 'batch'. This index is into the final beam of the
  60. // Component that this Component was initialized from.
  61. int GetSourceBeamIndex(int current_index, int batch) const override;
  62. // Request a translation function based on the given method string.
  63. // The translation function will be called with arguments (batch, beam, value)
  64. // and should return the step index corresponding to the given value, for the
  65. // data in the given beam and batch.
  66. std::function<int(int, int, int)> GetStepLookupFunction(
  67. const string &method) override;
  68. // Advances this component from the given transition matrix.
  69. void AdvanceFromPrediction(const float transition_matrix[],
  70. int transition_matrix_length) override;
  71. // Advances this component from the state oracles.
  72. void AdvanceFromOracle() override;
  73. // Returns true if all states within this component are terminal.
  74. bool IsTerminal() const override;
  75. // Returns the current batch of beams for this component.
  76. std::vector<std::vector<const TransitionState *>> GetBeam() override;
  77. // Extracts and populates the vector of FixedFeatures for the specified
  78. // channel.
  79. int GetFixedFeatures(std::function<int32 *(int)> allocate_indices,
  80. std::function<int64 *(int)> allocate_ids,
  81. std::function<float *(int)> allocate_weights,
  82. int channel_id) const override;
  83. // Extracts and populates all FixedFeatures for all channels, advancing this
  84. // component via the oracle until it is terminal.
  85. int BulkGetFixedFeatures(const BulkFeatureExtractor &extractor) override;
  86. // Extracts and returns the vector of LinkFeatures for the specified
  87. // channel. Note: these are NOT translated.
  88. std::vector<LinkFeatures> GetRawLinkFeatures(int channel_id) const override;
  89. // Returns a vector of oracle labels for each element in the beam and
  90. // batch.
  91. std::vector<std::vector<int>> GetOracleLabels() const override;
  92. // Annotate the underlying data object with the results of this Component's
  93. // calculation.
  94. void FinalizeData() override;
  95. // Reset this component.
  96. void ResetComponent() override;
  97. // Initializes the component for tracing execution. This will typically have
  98. // the side effect of slowing down all subsequent Component calculations
  99. // and storing a trace in memory that can be returned by GetTraceProtos().
  100. void InitializeTracing() override;
  101. // Disables tracing, freeing any additional memory and avoiding triggering
  102. // additional computation in the future.
  103. void DisableTracing() override;
  104. std::vector<std::vector<ComponentTrace>> GetTraceProtos() const override;
  105. void AddTranslatedLinkFeaturesToTrace(
  106. const std::vector<LinkFeatures> &features, int channel_id) override;
  107. private:
  108. friend class SyntaxNetComponentTest;
  109. friend class SyntaxNetTransitionStateTest;
  110. // Permission function for this component.
  111. bool IsAllowed(SyntaxNetTransitionState *state, int action) const;
  112. // Returns true if this state is final
  113. bool IsFinal(SyntaxNetTransitionState *state) const;
  114. // Oracle function for this component.
  115. int GetOracleLabel(SyntaxNetTransitionState *state) const;
  116. // State advance function for this component.
  117. void Advance(SyntaxNetTransitionState *state, int action,
  118. Beam<SyntaxNetTransitionState> *beam);
  119. // Creates a new state for the given nlp_saft::SentenceExample.
  120. std::unique_ptr<SyntaxNetTransitionState> CreateState(
  121. SyntaxNetSentence *example);
  122. // Creates a newly initialized Beam.
  123. std::unique_ptr<Beam<SyntaxNetTransitionState>> CreateBeam(int max_size);
  124. // Transition system.
  125. std::unique_ptr<ParserTransitionSystem> transition_system_;
  126. // Label map for transition system.
  127. const TermFrequencyMap *label_map_;
  128. // Extractor for fixed features
  129. ParserEmbeddingFeatureExtractor feature_extractor_;
  130. // Extractor for linked features.
  131. SyntaxNetLinkFeatureExtractor link_feature_extractor_;
  132. // Internal workspace registry for use in feature extraction.
  133. WorkspaceRegistry workspace_registry_;
  134. // Switch for simulating legacy parser behaviour.
  135. bool rewrite_root_labels_;
  136. // The ComponentSpec used to initialize this component.
  137. ComponentSpec spec_;
  138. // State search beams
  139. std::vector<std::unique_ptr<Beam<SyntaxNetTransitionState>>> batch_;
  140. // Current max beam size.
  141. int max_beam_size_;
  142. // Underlying input data.
  143. InputBatchCache *input_data_;
  144. // Whether or not to trace for each batch and beam element.
  145. bool do_tracing_ = false;
  146. };
  147. } // namespace dragnn
  148. } // namespace syntaxnet
  149. #endif // NLP_SAFT_OPENSOURCE_DRAGNN_COMPONENTS_SYNTAXNET_SYNTAXNET_COMPONENT_H_