| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758 |
- // Protocol buffers for serializing string<=>index dictionaries.
- syntax = "proto2";
- package syntaxnet;
- // Serializable representation of a string=>string pair.
- message StringToStringPair {
- // String representing the key.
- required string key = 1;
- // String representing the value.
- required string value = 2;
- }
- // Serializable representation of a string=>string mapping.
- message StringToStringMap {
- // Key=>value pairs.
- repeated StringToStringPair pair = 1;
- }
- // Affix table entry, for serialization of the affix tables.
- message AffixTableEntry {
- // Nested message for serializing a single affix.
- message AffixEntry {
- // The affix as a string.
- required string form = 1;
- // The length of the affix (this is non-trivial to compute due to UTF-8).
- required int32 length = 2;
- // The ID of the affix that is one character shorter, or -1 if none exists.
- required int32 shorter_id = 3;
- }
- // The type of affix table, as a string.
- required string type = 1;
- // The maximum affix length.
- required int32 max_length = 2;
- // The list of affixes, in order of affix ID.
- repeated AffixEntry affix = 3;
- }
- // A light-weight proto to store vectors in binary format.
- message TokenEmbedding {
- required bytes token = 1; // can be word or phrase, or URL, etc.
- // If available, raw count of this token in the training corpus.
- optional int64 count = 3;
- message Vector {
- repeated float values = 1 [packed = true];
- }
- optional Vector vector = 2;
- };
|