// Protocol buffers for serializing string<=>index dictionaries. syntax = "proto2"; package syntaxnet; // Serializable representation of a string=>string pair. message StringToStringPair { // String representing the key. required string key = 1; // String representing the value. required string value = 2; } // Serializable representation of a string=>string mapping. message StringToStringMap { // Key=>value pairs. repeated StringToStringPair pair = 1; } // Affix table entry, for serialization of the affix tables. message AffixTableEntry { // Nested message for serializing a single affix. message AffixEntry { // The affix as a string. required string form = 1; // The length of the affix (this is non-trivial to compute due to UTF-8). required int32 length = 2; // The ID of the affix that is one character shorter, or -1 if none exists. required int32 shorter_id = 3; } // The type of affix table, as a string. required string type = 1; // The maximum affix length. required int32 max_length = 2; // The list of affixes, in order of affix ID. repeated AffixEntry affix = 3; } // A light-weight proto to store vectors in binary format. message TokenEmbedding { required bytes token = 1; // can be word or phrase, or URL, etc. // If available, raw count of this token in the training corpus. optional int64 count = 3; message Vector { repeated float values = 1 [packed = true]; } optional Vector vector = 2; };