kbest_syntax.proto 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. // K-best part-of-speech and dependency annotations for tokens.
  2. syntax = "proto2";
  3. import "syntaxnet/sentence.proto";
  4. package syntaxnet;
  5. // A list of alternative (k-best) syntax analyses, grouped by sentences.
  6. message KBestSyntaxAnalyses {
  7. extend Sentence {
  8. optional KBestSyntaxAnalyses extension = 60366242;
  9. }
  10. // Alternative analyses for each sentence. Sentences are listed in the
  11. // order visited by a SentenceIterator.
  12. repeated KBestSyntaxAnalysesForSentence sentence = 1;
  13. // Alternative analyses for each token.
  14. repeated KBestSyntaxAnalysesForToken token = 2;
  15. }
  16. // A list of alternative (k-best) analyses for a sentence spanning from a start
  17. // token index to an end token index. The alternative analyses are ordered by
  18. // decreasing model score from best to worst. The first analysis is the 1-best
  19. // analysis, which is typically also stored in the document tokens.
  20. message KBestSyntaxAnalysesForSentence {
  21. // First token of sentence.
  22. optional int32 start = 1 [default = -1];
  23. // Last token of sentence.
  24. optional int32 end = 2 [default = -1];
  25. // K-best analyses for the tokens in this sentence. All of the analyses in
  26. // the list have the same "type"; e.g., k-best taggings,
  27. // k-best {tagging+parse}s, etc.
  28. // Note also that the type of analysis stored in this list can change
  29. // depending on where we are in the document processing pipeline; e.g.,
  30. // may initially be taggings, and then switch to parses. The first
  31. // token_analysis would be the 1-best analysis, which is typically also stored
  32. // in the document. Note: some post-processors will update the document's
  33. // syntax trees, but will leave these unchanged.
  34. repeated AlternativeTokenAnalysis token_analysis = 3;
  35. }
  36. // A list of scored alternative (k-best) analyses for a particular token. These
  37. // are all distinct from each other and ordered by decreasing model score. The
  38. // first is the 1-best analysis, which may or may not match the document tokens
  39. // depending on how the k-best analyses are selected.
  40. message KBestSyntaxAnalysesForToken {
  41. // All token analyses in this repeated field refer to the same token.
  42. // Each alternative analysis will contain a single entry for repeated fields
  43. // such as head, tag, category and label.
  44. repeated AlternativeTokenAnalysis token_analysis = 3;
  45. }
  46. // An alternative analysis of tokens in the document. The repeated fields
  47. // are indexed relative to the beginning of a sentence. Fields not
  48. // represented in the alternative analysis are assumed to be unchanged.
  49. // Currently only alternatives for tags, categories and (labeled) dependency
  50. // heads are supported.
  51. // Each repeated field should either have length=0 or length=number of tokens.
  52. message AlternativeTokenAnalysis {
  53. // Head of this token in the dependency tree: the id of the token which has
  54. // an arc going to this one. If it is the root token of a sentence, then it
  55. // is set to -1.
  56. repeated int32 head = 1;
  57. // Part-of-speech tag for token.
  58. repeated string tag = 2;
  59. // Coarse-grained word category for token.
  60. repeated string category = 3;
  61. // Label for dependency relation between this token and its head.
  62. repeated string label = 4;
  63. // The score of this analysis, where bigger values typically indicate better
  64. // quality, but there are no guarantees and there is also no pre-defined
  65. // range.
  66. optional double score = 5;
  67. }