| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283 |
- // K-best part-of-speech and dependency annotations for tokens.
- syntax = "proto2";
- import "syntaxnet/sentence.proto";
- package syntaxnet;
- // A list of alternative (k-best) syntax analyses, grouped by sentences.
- message KBestSyntaxAnalyses {
- extend Sentence {
- optional KBestSyntaxAnalyses extension = 60366242;
- }
- // Alternative analyses for each sentence. Sentences are listed in the
- // order visited by a SentenceIterator.
- repeated KBestSyntaxAnalysesForSentence sentence = 1;
- // Alternative analyses for each token.
- repeated KBestSyntaxAnalysesForToken token = 2;
- }
- // A list of alternative (k-best) analyses for a sentence spanning from a start
- // token index to an end token index. The alternative analyses are ordered by
- // decreasing model score from best to worst. The first analysis is the 1-best
- // analysis, which is typically also stored in the document tokens.
- message KBestSyntaxAnalysesForSentence {
- // First token of sentence.
- optional int32 start = 1 [default = -1];
- // Last token of sentence.
- optional int32 end = 2 [default = -1];
- // K-best analyses for the tokens in this sentence. All of the analyses in
- // the list have the same "type"; e.g., k-best taggings,
- // k-best {tagging+parse}s, etc.
- // Note also that the type of analysis stored in this list can change
- // depending on where we are in the document processing pipeline; e.g.,
- // may initially be taggings, and then switch to parses. The first
- // token_analysis would be the 1-best analysis, which is typically also stored
- // in the document. Note: some post-processors will update the document's
- // syntax trees, but will leave these unchanged.
- repeated AlternativeTokenAnalysis token_analysis = 3;
- }
- // A list of scored alternative (k-best) analyses for a particular token. These
- // are all distinct from each other and ordered by decreasing model score. The
- // first is the 1-best analysis, which may or may not match the document tokens
- // depending on how the k-best analyses are selected.
- message KBestSyntaxAnalysesForToken {
- // All token analyses in this repeated field refer to the same token.
- // Each alternative analysis will contain a single entry for repeated fields
- // such as head, tag, category and label.
- repeated AlternativeTokenAnalysis token_analysis = 3;
- }
- // An alternative analysis of tokens in the document. The repeated fields
- // are indexed relative to the beginning of a sentence. Fields not
- // represented in the alternative analysis are assumed to be unchanged.
- // Currently only alternatives for tags, categories and (labeled) dependency
- // heads are supported.
- // Each repeated field should either have length=0 or length=number of tokens.
- message AlternativeTokenAnalysis {
- // Head of this token in the dependency tree: the id of the token which has
- // an arc going to this one. If it is the root token of a sentence, then it
- // is set to -1.
- repeated int32 head = 1;
- // Part-of-speech tag for token.
- repeated string tag = 2;
- // Coarse-grained word category for token.
- repeated string category = 3;
- // Label for dependency relation between this token and its head.
- repeated string label = 4;
- // The score of this analysis, where bigger values typically indicate better
- // quality, but there are no guarantees and there is also no pre-defined
- // range.
- optional double score = 5;
- }
|