|
@@ -0,0 +1,362 @@
|
|
|
+Parameter {
|
|
|
+ name: "brain_tokenizer_embedding_dims"
|
|
|
+ value: "16;16;16"
|
|
|
+}
|
|
|
+Parameter {
|
|
|
+ name: "brain_tokenizer_embedding_names"
|
|
|
+ value: "chars;digits;puncts"
|
|
|
+}
|
|
|
+Parameter {
|
|
|
+ name: "brain_tokenizer_features"
|
|
|
+ value: "input.char "
|
|
|
+ "input(-1).char "
|
|
|
+ "input(1).char; "
|
|
|
+ "input.digit "
|
|
|
+ "input(-1).digit "
|
|
|
+ "input(1).digit; "
|
|
|
+ "input.punctuation-amount "
|
|
|
+ "input(-1).punctuation-amount "
|
|
|
+ "input(1).punctuation-amount "
|
|
|
+}
|
|
|
+Parameter {
|
|
|
+ name: "brain_tokenizer_transition_system"
|
|
|
+ value: "binary-segment-transitions"
|
|
|
+}
|
|
|
+Parameter {
|
|
|
+ name: "brain_morpher_embedding_dims"
|
|
|
+ value: "2;16;8;16;16;16;16;16;64"
|
|
|
+}
|
|
|
+Parameter {
|
|
|
+ name: "brain_morpher_embedding_names"
|
|
|
+ value: "capitalization;char_ngram;other;prefix2;prefix3;suffix2;suffix3;tags;words"
|
|
|
+}
|
|
|
+Parameter {
|
|
|
+ name: "brain_morpher_features"
|
|
|
+ value: "input.capitalization "
|
|
|
+ "input(1).capitalization "
|
|
|
+ "input(2).capitalization "
|
|
|
+ "input(3).capitalization "
|
|
|
+ "input(-1).capitalization "
|
|
|
+ "input(-2).capitalization "
|
|
|
+ "input(-3).capitalization "
|
|
|
+ "input(-4).capitalization; "
|
|
|
+ "input.token.char-ngram "
|
|
|
+ "input(1).token.char-ngram "
|
|
|
+ "input(2).token.char-ngram "
|
|
|
+ "input(3).token.char-ngram "
|
|
|
+ "input(-1).token.char-ngram "
|
|
|
+ "input(-2).token.char-ngram "
|
|
|
+ "input(-3).token.char-ngram "
|
|
|
+ "input(-4).token.char-ngram; "
|
|
|
+ "input.digit "
|
|
|
+ "input.hyphen "
|
|
|
+ "input.token.punctuation-amount "
|
|
|
+ "input.token.quote; "
|
|
|
+ "input.token.prefix(length=2) "
|
|
|
+ "input(1).token.prefix(length=2) "
|
|
|
+ "input(2).token.prefix(length=2) "
|
|
|
+ "input(3).token.prefix(length=2) "
|
|
|
+ "input(-1).token.prefix(length=2) "
|
|
|
+ "input(-2).token.prefix(length=2) "
|
|
|
+ "input(-3).token.prefix(length=2) "
|
|
|
+ "input(-4).token.prefix(length=2); "
|
|
|
+ "input.token.prefix(length=3) "
|
|
|
+ "input(1).token.prefix(length=3) "
|
|
|
+ "input(2).token.prefix(length=3) "
|
|
|
+ "input(3).token.prefix(length=3) "
|
|
|
+ "input(-1).token.prefix(length=3) "
|
|
|
+ "input(-2).token.prefix(length=3) "
|
|
|
+ "input(-3).token.prefix(length=3) "
|
|
|
+ "input(-4).token.prefix(length=3); "
|
|
|
+ "input.token.suffix(length=2) "
|
|
|
+ "input(1).token.suffix(length=2) "
|
|
|
+ "input(2).token.suffix(length=2) "
|
|
|
+ "input(3).token.suffix(length=2) "
|
|
|
+ "input(-1).token.suffix(length=2) "
|
|
|
+ "input(-2).token.suffix(length=2) "
|
|
|
+ "input(-3).token.suffix(length=2) "
|
|
|
+ "input(-4).token.suffix(length=2); "
|
|
|
+ "input.token.suffix(length=3) "
|
|
|
+ "input(1).token.suffix(length=3) "
|
|
|
+ "input(2).token.suffix(length=3) "
|
|
|
+ "input(3).token.suffix(length=3) "
|
|
|
+ "input(-1).token.suffix(length=3) "
|
|
|
+ "input(-2).token.suffix(length=3) "
|
|
|
+ "input(-3).token.suffix(length=3) "
|
|
|
+ "input(-4).token.suffix(length=3); "
|
|
|
+ "input(-1).pred-morph-tag "
|
|
|
+ "input(-2).pred-morph-tag "
|
|
|
+ "input(-3).pred-morph-tag "
|
|
|
+ "input(-4).pred-morph-tag; "
|
|
|
+ "input.token.word "
|
|
|
+ "input(1).token.word "
|
|
|
+ "input(2).token.word "
|
|
|
+ "input(3).token.word "
|
|
|
+ "input(-1).token.word "
|
|
|
+ "input(-2).token.word "
|
|
|
+ "input(-3).token.word "
|
|
|
+ "input(-4).token.word"
|
|
|
+}
|
|
|
+Parameter {
|
|
|
+ name: "brain_morpher_transition_system"
|
|
|
+ value: "morpher"
|
|
|
+}
|
|
|
+Parameter {
|
|
|
+ name: "brain_tagger_embedding_dims"
|
|
|
+ value: "2;16;8;16;16;16;16;16;64"
|
|
|
+}
|
|
|
+Parameter {
|
|
|
+ name: "brain_tagger_embedding_names"
|
|
|
+ value: "capitalization;char_ngram;other;prefix2;prefix3;suffix2;suffix3;tags;words"
|
|
|
+}
|
|
|
+Parameter {
|
|
|
+ name: "brain_tagger_features"
|
|
|
+ value: "input.capitalization "
|
|
|
+ "input(1).capitalization "
|
|
|
+ "input(2).capitalization "
|
|
|
+ "input(3).capitalization "
|
|
|
+ "input(-1).capitalization "
|
|
|
+ "input(-2).capitalization "
|
|
|
+ "input(-3).capitalization "
|
|
|
+ "input(-4).capitalization; "
|
|
|
+ "input.token.char-ngram "
|
|
|
+ "input(1).token.char-ngram "
|
|
|
+ "input(2).token.char-ngram "
|
|
|
+ "input(3).token.char-ngram "
|
|
|
+ "input(-1).token.char-ngram "
|
|
|
+ "input(-2).token.char-ngram "
|
|
|
+ "input(-3).token.char-ngram "
|
|
|
+ "input(-4).token.char-ngram; "
|
|
|
+ "input.digit "
|
|
|
+ "input.hyphen "
|
|
|
+ "input.token.punctuation-amount "
|
|
|
+ "input.token.quote; "
|
|
|
+ "input.token.prefix(length=2) "
|
|
|
+ "input(1).token.prefix(length=2) "
|
|
|
+ "input(2).token.prefix(length=2) "
|
|
|
+ "input(3).token.prefix(length=2) "
|
|
|
+ "input(-1).token.prefix(length=2) "
|
|
|
+ "input(-2).token.prefix(length=2) "
|
|
|
+ "input(-3).token.prefix(length=2) "
|
|
|
+ "input(-4).token.prefix(length=2); "
|
|
|
+ "input.token.prefix(length=3) "
|
|
|
+ "input(1).token.prefix(length=3) "
|
|
|
+ "input(2).token.prefix(length=3) "
|
|
|
+ "input(3).token.prefix(length=3) "
|
|
|
+ "input(-1).token.prefix(length=3) "
|
|
|
+ "input(-2).token.prefix(length=3) "
|
|
|
+ "input(-3).token.prefix(length=3) "
|
|
|
+ "input(-4).token.prefix(length=3); "
|
|
|
+ "input.token.suffix(length=2) "
|
|
|
+ "input(1).token.suffix(length=2) "
|
|
|
+ "input(2).token.suffix(length=2) "
|
|
|
+ "input(3).token.suffix(length=2) "
|
|
|
+ "input(-1).token.suffix(length=2) "
|
|
|
+ "input(-2).token.suffix(length=2) "
|
|
|
+ "input(-3).token.suffix(length=2) "
|
|
|
+ "input(-4).token.suffix(length=2); "
|
|
|
+ "input.token.suffix(length=3) "
|
|
|
+ "input(1).token.suffix(length=3) "
|
|
|
+ "input(2).token.suffix(length=3) "
|
|
|
+ "input(3).token.suffix(length=3) "
|
|
|
+ "input(-1).token.suffix(length=3) "
|
|
|
+ "input(-2).token.suffix(length=3) "
|
|
|
+ "input(-3).token.suffix(length=3) "
|
|
|
+ "input(-4).token.suffix(length=3); "
|
|
|
+ "input(-1).pred-tag "
|
|
|
+ "input(-2).pred-tag "
|
|
|
+ "input(-3).pred-tag "
|
|
|
+ "input(-4).pred-tag; "
|
|
|
+ "input.token.word "
|
|
|
+ "input(1).token.word "
|
|
|
+ "input(2).token.word "
|
|
|
+ "input(3).token.word "
|
|
|
+ "input(-1).token.word "
|
|
|
+ "input(-2).token.word "
|
|
|
+ "input(-3).token.word "
|
|
|
+ "input(-4).token.word"
|
|
|
+}
|
|
|
+Parameter {
|
|
|
+ name: "brain_tagger_transition_system"
|
|
|
+ value: "tagger"
|
|
|
+}
|
|
|
+Parameter {
|
|
|
+ name: "brain_parser_embedding_dims"
|
|
|
+ value: "32;32;32;64"
|
|
|
+}
|
|
|
+Parameter {
|
|
|
+ name: "brain_parser_embedding_names"
|
|
|
+ value: "labels;morphology;tags;words"
|
|
|
+}
|
|
|
+Parameter {
|
|
|
+ name: "brain_parser_features"
|
|
|
+ value: "stack.child(1).label "
|
|
|
+ "stack.child(1).sibling(-1).label "
|
|
|
+ "stack.child(-1).label "
|
|
|
+ "stack.child(-1).sibling(1).label "
|
|
|
+ "stack.child(2).label "
|
|
|
+ "stack.child(-2).label "
|
|
|
+ "stack(1).child(1).label "
|
|
|
+ "stack(1).child(1).sibling(-1).label "
|
|
|
+ "stack(1).child(-1).label "
|
|
|
+ "stack(1).child(-1).sibling(1).label "
|
|
|
+ "stack(1).child(2).label "
|
|
|
+ "stack(1).child(-2).label; "
|
|
|
+ "input.token.morphology-set "
|
|
|
+ "input(1).token.morphology-set "
|
|
|
+ "input(2).token.morphology-set "
|
|
|
+ "input(3).token.morphology-set "
|
|
|
+ "stack.token.morphology-set "
|
|
|
+ "stack.child(1).token.morphology-set "
|
|
|
+ "stack.child(1).sibling(-1).token.morphology-set "
|
|
|
+ "stack.child(-1).token.morphology-set "
|
|
|
+ "stack.child(-1).sibling(1).token.morphology-set "
|
|
|
+ "stack.child(2).token.morphology-set "
|
|
|
+ "stack.child(-2).token.morphology-set "
|
|
|
+ "stack(1).token.morphology-set "
|
|
|
+ "stack(1).child(1).token.morphology-set "
|
|
|
+ "stack(1).child(1).sibling(-1).token.morphology-set "
|
|
|
+ "stack(1).child(-1).token.morphology-set "
|
|
|
+ "stack(1).child(-1).sibling(1).token.morphology-set "
|
|
|
+ "stack(1).child(2).token.morphology-set "
|
|
|
+ "stack(1).child(-2).token.morphology-set "
|
|
|
+ "stack(2).token.morphology-set "
|
|
|
+ "stack(3).token.morphology-set; "
|
|
|
+ "input.token.tag "
|
|
|
+ "input(1).token.tag "
|
|
|
+ "input(2).token.tag "
|
|
|
+ "input(3).token.tag "
|
|
|
+ "stack.token.tag "
|
|
|
+ "stack.child(1).token.tag "
|
|
|
+ "stack.child(1).sibling(-1).token.tag "
|
|
|
+ "stack.child(-1).token.tag "
|
|
|
+ "stack.child(-1).sibling(1).token.tag "
|
|
|
+ "stack.child(2).token.tag "
|
|
|
+ "stack.child(-2).token.tag "
|
|
|
+ "stack(1).token.tag "
|
|
|
+ "stack(1).child(1).token.tag "
|
|
|
+ "stack(1).child(1).sibling(-1).token.tag "
|
|
|
+ "stack(1).child(-1).token.tag "
|
|
|
+ "stack(1).child(-1).sibling(1).token.tag "
|
|
|
+ "stack(1).child(2).token.tag "
|
|
|
+ "stack(1).child(-2).token.tag "
|
|
|
+ "stack(2).token.tag "
|
|
|
+ "stack(3).token.tag; "
|
|
|
+ "input.token.word "
|
|
|
+ "input(1).token.word "
|
|
|
+ "input(2).token.word "
|
|
|
+ "input(3).token.word "
|
|
|
+ "stack.token.word "
|
|
|
+ "stack.child(1).token.word "
|
|
|
+ "stack.child(1).sibling(-1).token.word "
|
|
|
+ "stack.child(-1).token.word "
|
|
|
+ "stack.child(-1).sibling(1).token.word "
|
|
|
+ "stack.child(2).token.word "
|
|
|
+ "stack.child(-2).token.word "
|
|
|
+ "stack(1).token.word "
|
|
|
+ "stack(1).child(1).token.word "
|
|
|
+ "stack(1).child(1).sibling(-1).token.word "
|
|
|
+ "stack(1).child(-1).token.word "
|
|
|
+ "stack(1).child(-1).sibling(1).token.word "
|
|
|
+ "stack(1).child(2).token.word "
|
|
|
+ "stack(1).child(-2).token.word "
|
|
|
+ "stack(2).token.word "
|
|
|
+ "stack(3).token.word "
|
|
|
+}
|
|
|
+Parameter {
|
|
|
+ name: "brain_parser_transition_system"
|
|
|
+ value: "arc-standard"
|
|
|
+}
|
|
|
+Parameter {
|
|
|
+ name: "join_category_to_pos"
|
|
|
+ value: "true"
|
|
|
+}
|
|
|
+input {
|
|
|
+ name: "word-map"
|
|
|
+ Part {
|
|
|
+ file_pattern: "word-map"
|
|
|
+ }
|
|
|
+}
|
|
|
+input {
|
|
|
+ name: "char-map"
|
|
|
+ Part {
|
|
|
+ file_pattern: "char-map"
|
|
|
+ }
|
|
|
+}
|
|
|
+input {
|
|
|
+ name: "tag-map"
|
|
|
+ Part {
|
|
|
+ file_pattern: "tag-map"
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+input {
|
|
|
+ name: "tag-to-category"
|
|
|
+ Part {
|
|
|
+ file_pattern: "tag-to-category"
|
|
|
+ }
|
|
|
+}
|
|
|
+input {
|
|
|
+ name: "label-map"
|
|
|
+ Part {
|
|
|
+ file_pattern: "label-map"
|
|
|
+ }
|
|
|
+}
|
|
|
+input {
|
|
|
+ name: "char-ngram-map"
|
|
|
+ Part {
|
|
|
+ file_pattern: "char-ngram-map"
|
|
|
+ }
|
|
|
+}
|
|
|
+input {
|
|
|
+ name: "prefix-table"
|
|
|
+ Part {
|
|
|
+ file_pattern: "prefix-table"
|
|
|
+ }
|
|
|
+}
|
|
|
+input {
|
|
|
+ name: "suffix-table"
|
|
|
+ Part {
|
|
|
+ file_pattern: "suffix-table"
|
|
|
+ }
|
|
|
+}
|
|
|
+input {
|
|
|
+ name: "morph-label-set"
|
|
|
+ Part {
|
|
|
+ file_pattern: "morph-label-set"
|
|
|
+ }
|
|
|
+}
|
|
|
+input {
|
|
|
+ name: "morphology-map"
|
|
|
+ Part {
|
|
|
+ file_pattern: "morphology-map"
|
|
|
+ }
|
|
|
+}
|
|
|
+input {
|
|
|
+ name: 'stdin'
|
|
|
+ record_format: 'tokenized-text'
|
|
|
+ Part {
|
|
|
+ file_pattern: '-'
|
|
|
+ }
|
|
|
+}
|
|
|
+input {
|
|
|
+ name: 'stdin-conll'
|
|
|
+ record_format: 'conll-sentence'
|
|
|
+ Part {
|
|
|
+ file_pattern: '-'
|
|
|
+ }
|
|
|
+}
|
|
|
+input {
|
|
|
+ name: 'stdin-untoken'
|
|
|
+ record_format: 'untokenized-text'
|
|
|
+ Part {
|
|
|
+ file_pattern: '-'
|
|
|
+ }
|
|
|
+}
|
|
|
+input {
|
|
|
+ name: 'stdout-conll'
|
|
|
+ record_format: 'conll-sentence'
|
|
|
+ Part {
|
|
|
+ file_pattern: '-'
|
|
|
+ }
|
|
|
+}
|