123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273 |
- // DRAGNN Configuration proto. See go/dragnn-design for more information.
- syntax = "proto2";
- package syntaxnet.dragnn;
- // Proto to specify a set of DRAGNN components (transition systems) that are
- // trained and evaluated jointly. Each component gets one ComponentSpec.
- //
- // The order of component is important: a component can only link to components
- // that come before (for now.)
- // NEXT ID: 6
- message MasterSpec {
- repeated ComponentSpec component = 1;
- // Whether to extract debug traces.
- optional bool debug_tracing = 4 [default = false];
- reserved 2, 3, 5;
- }
- // Complete specification for a single task.
- message ComponentSpec {
- // Name for this component: this is used in linked features via the
- // "source_component" field.
- optional string name = 1;
- // TransitionSystem to use.
- optional RegisteredModuleSpec transition_system = 2;
- // Resources that this component depends on. These are copied to TaskInputs
- // when calling SAFT code.
- repeated Resource resource = 3;
- // Feature space configurations.
- repeated FixedFeatureChannel fixed_feature = 4;
- repeated LinkedFeatureChannel linked_feature = 5;
- // Neural Network builder specification.
- optional RegisteredModuleSpec network_unit = 6;
- // The registered C++ implementation of the dragnn::Component class; e.g.
- // "SyntaxNetComponent".
- optional RegisteredModuleSpec backend = 7;
- // Number of possible actions from every state.
- optional int32 num_actions = 8;
- // Specify the name of the lower level component on which it has attention.
- optional string attention_component = 9 [default = ""];
- // Options for the ComponentBuilder. If this is empty, the regular
- // tf.while_loop based builder is assumed.
- optional RegisteredModuleSpec component_builder = 10;
- // Default max number of active states for beam training.
- optional int32 training_beam_size = 11 [default = 1];
- // Default max number of active states for beam inference.
- optional int32 inference_beam_size = 12 [default = 1];
- }
- // Super generic container for any registered sub-piece of DRAGNN.
- message RegisteredModuleSpec {
- // Name of the registered class.
- optional string registered_name = 1;
- // Parameters to set while initializing this system; these are copied to
- // Parameters in a TaskSpec when calling SAFT code, or via kwargs in TF Python
- // code.
- map<string, string> parameters = 2;
- }
- // Fixed resources that will be converted into TaskInput's when calling SAFT
- // code.
- message Resource {
- optional string name = 1;
- repeated Part part = 2;
- }
- // The Parts here should be more or less compatible with TaskInput.
- message Part {
- optional string file_pattern = 1;
- optional string file_format = 2;
- optional string record_format = 3;
- }
- // ------------------------------------------------------------------------
- // Feature specifications.
- //
- // A *feature channel* is a named collection of feature templates that share an
- // embedding matrix. Thus all features in the channel are assumed to use the
- // same vocabulary: e.g., words, POS tags, hidden layer activations, etc. These
- // are extracted, embedded, and then concatenated together as a group.
- // Specification for a feature channel that is a *fixed* function of the input.
- // NEXT_ID: 10
- message FixedFeatureChannel {
- // Interpretable name for this feature channel. NN builders might depend on
- // this to determine how to hook different channels up internally.
- optional string name = 1;
- // String describing the FML for this feature channel.
- optional string fml = 2;
- // Size of parameters for this space:
- // Dimensions of embedding space, or -1 if the feature should not be embedded.
- optional int32 embedding_dim = 3;
- // No. of possible values returned.
- optional int32 vocabulary_size = 4;
- // No. of different feature templates in the channel, i.e. the # of features
- // that will be concatenated but share the embedding for this channel.
- optional int32 size = 5;
- // Whether the embeddings for this channel should be held constant at their
- // pretrained values, instead of being trained. Pretrained embeddings are
- // required when true.
- optional bool is_constant = 9;
- // Resources for this space:
- // Predicate map for compacting feature values.
- optional string predicate_map = 6;
- // Pointer to a pretrained embedding matrix for this feature set.
- optional Resource pretrained_embedding_matrix = 7;
- // Vocab file, containing all vocabulary words one per line.
- optional Resource vocab = 8;
- }
- // Specification for a feature channel that *links* to component
- // activations. Note that the "vocabulary" of these features is the activations
- // that they are linked to, so it is determined by the other components in the
- // spec.
- message LinkedFeatureChannel {
- // Interpretable name for this feature channel. NN builders might depend on
- // this to determine how to hook different channels up internally.
- optional string name = 1;
- // Feature function specification. Note: these should all be of type
- // LinkedFeatureType.
- optional string fml = 2;
- // Embedding dimension, or -1 if the link should not be embedded.
- optional int32 embedding_dim = 3;
- // No. of different feature templates in the channel, i.e. the # of features
- // that will be concatenated but share the embedding for this channel.
- optional int32 size = 4;
- // Component to use for translation, e.g. "tagger"
- optional string source_component = 5;
- // Translator target, e.g. "token" or "last_action", to translate raw feature
- // values into indices. This must be interpretable by the Component referenced
- // by source_component.
- optional string source_translator = 6;
- // Layer that these features should connect to.
- optional string source_layer = 7;
- }
- // A vector of hyperparameter configurations to search over.
- message TrainingGridSpec {
- // Grid points to search over.
- repeated GridPoint grid_point = 1;
- // Training targets to create in the graph builder stage.
- repeated TrainTarget target = 2;
- }
- // A hyperparameter configuration for a training run.
- // NEXT ID: 22
- message GridPoint {
- // Global learning rate initialization point.
- optional double learning_rate = 1 [default = 0.1];
- // Momentum coefficient when using MomentumOptimizer.
- optional double momentum = 2 [default = 0.9];
- // Decay rate and base for global learning rate decay. The learning rate is
- // reduced by a factor of |decay_base| every |decay_steps|.
- optional double decay_base = 16 [default = 0.96];
- optional int32 decay_steps = 3 [default = 1000];
- // Whether to decay the learning rate in a "staircase" manner. If true, the
- // rate is adjusted exactly once every |decay_steps|. Otherwise, the rate is
- // adjusted in smaller increments on every step, such that the overall rate of
- // decay is still |decay_base| every |decay_steps|.
- optional bool decay_staircase = 17 [default = true];
- // Random seed to initialize parameters.
- optional int32 seed = 4 [default = 0];
- // Specify the optimizer used in training, the default is MomentumOptimizer.
- optional string learning_method = 7 [default = 'momentum'];
- // Whether or not to use a moving average of the weights in inference time.
- optional bool use_moving_average = 8 [default = false];
- // Rolling average update co-efficient.
- optional double average_weight = 9 [default = 0.9999];
- // The dropout *keep* probability rate used in the model. 1.0 = no dropout.
- optional double dropout_rate = 10 [default = 1.0];
- // The dropout *keep* probability rate for recurrent connections. If < 0.0,
- // recurrent connections should use |dropout_rate| instead. 1.0 = no dropout.
- optional double recurrent_dropout_rate = 20 [default = -1.0];
- // Gradient clipping threshold, applied if greater than zero. A value in the
- // range 1-20 seems to work well to prevent large learning rates from causing
- // problems for updates at the start of training.
- optional double gradient_clip_norm = 11 [default = 0.0];
- // A spec for using multiple optimization methods.
- message CompositeOptimizerSpec {
- // First optimizer.
- optional GridPoint method1 = 1;
- // Second optimizer.
- optional GridPoint method2 = 2;
- // After this number of steps, switch from first to second.
- optional int32 switch_after_steps = 3;
- }
- optional CompositeOptimizerSpec composite_optimizer_spec = 12;
- // Parameters for Adam training.
- optional double adam_beta1 = 13 [default = 0.01];
- optional double adam_beta2 = 14 [default = 0.9999];
- optional double adam_eps = 15 [default = 1e-8];
- // Coefficient for global L2 regularization.
- optional double l2_regularization_coefficient = 18 [default = 1e-4];
- // Coefficient for global self normalization regularization.
- // A value of zero turns it off.
- optional double self_norm_alpha = 19 [default = 0.0];
- // Comma separated list of components to which self_norm_alpha
- // should be restricted. If left empty, no filtering will take
- // place. Typically a single component.
- optional string self_norm_components_filter = 21;
- reserved 5, 6;
- }
- // Training target to be built into the graph.
- message TrainTarget {
- // Name for this target. This should be unique across all targets.
- optional string name = 1;
- // Specify the weights for different components. This should be the same size
- // as the number of components in the spec, or empty (defaults to equal
- // weights). Weights are normalized across the components being trained to sum
- // to one.
- repeated double component_weights = 2;
- // Specify whether to train a component using supervised signal or not. This
- // should be the same size as the number of components in the spec, or empty
- // (defaults to all true).
- repeated bool unroll_using_oracle = 3;
- // Maximum length of the pipeline to train. E.g. if max_index is 1, then only
- // the first component will be trained via this target.
- optional int32 max_index = 4 [default = -1];
- }
|