dictionary.proto 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. // Protocol buffers for serializing string<=>index dictionaries.
  2. syntax = "proto2";
  3. package syntaxnet;
  4. // Serializable representation of a string=>string pair.
  5. message StringToStringPair {
  6. // String representing the key.
  7. required string key = 1;
  8. // String representing the value.
  9. required string value = 2;
  10. }
  11. // Serializable representation of a string=>string mapping.
  12. message StringToStringMap {
  13. // Key=>value pairs.
  14. repeated StringToStringPair pair = 1;
  15. }
  16. // Affix table entry, for serialization of the affix tables.
  17. message AffixTableEntry {
  18. // Nested message for serializing a single affix.
  19. message AffixEntry {
  20. // The affix as a string.
  21. required string form = 1;
  22. // The length of the affix (this is non-trivial to compute due to UTF-8).
  23. required int32 length = 2;
  24. // The ID of the affix that is one character shorter, or -1 if none exists.
  25. required int32 shorter_id = 3;
  26. }
  27. // The type of affix table, as a string.
  28. required string type = 1;
  29. // The maximum affix length.
  30. required int32 max_length = 2;
  31. // The list of affixes, in order of affix ID.
  32. repeated AffixEntry affix = 3;
  33. }
  34. // A light-weight proto to store vectors in binary format.
  35. message TokenEmbedding {
  36. required bytes token = 1; // can be word or phrase, or URL, etc.
  37. // If available, raw count of this token in the training corpus.
  38. optional int64 count = 3;
  39. message Vector {
  40. repeated float values = 1 [packed = true];
  41. }
  42. optional Vector vector = 2;
  43. };