|
@@ -0,0 +1,65 @@
|
|
|
+"""Example of Converting TextSum model data.
|
|
|
+Usage:
|
|
|
+python data_convert_example.py --command binary_to_text --in_file data/data --out_file data/text_data
|
|
|
+python data_convert_example.py --command text_to_binary --in_file data/text_data --out_file data/binary_data
|
|
|
+python data_convert_example.py --command binary_to_text --in_file data/binary_data --out_file data/text_data2
|
|
|
+diff data/text_data2 data/text_data
|
|
|
+"""
|
|
|
+
|
|
|
+import struct
|
|
|
+import sys
|
|
|
+
|
|
|
+import tensorflow as tf
|
|
|
+from tensorflow.core.example import example_pb2
|
|
|
+
|
|
|
+FLAGS = tf.app.flags.FLAGS
|
|
|
+tf.app.flags.DEFINE_string('command', 'binary_to_text',
|
|
|
+ 'Either binary_to_text or text_to_binary.'
|
|
|
+ 'Specify FLAGS.in_file accordingly.')
|
|
|
+tf.app.flags.DEFINE_string('in_file', '', 'path to file')
|
|
|
+tf.app.flags.DEFINE_string('out_file', '', 'path to file')
|
|
|
+
|
|
|
+def _binary_to_text():
|
|
|
+ reader = open(FLAGS.in_file, 'rb')
|
|
|
+ writer = open(FLAGS.out_file, 'w')
|
|
|
+ while True:
|
|
|
+ len_bytes = reader.read(8)
|
|
|
+ if not len_bytes:
|
|
|
+ sys.stderr.write('Done reading\n')
|
|
|
+ return
|
|
|
+ str_len = struct.unpack('q', len_bytes)[0]
|
|
|
+ tf_example_str = struct.unpack('%ds' % str_len, reader.read(str_len))[0]
|
|
|
+ tf_example = example_pb2.Example.FromString(tf_example_str)
|
|
|
+ examples = []
|
|
|
+ for key in tf_example.features.feature:
|
|
|
+ examples.append('%s=%s' % (key, tf_example.features.feature[key].bytes_list.value[0]))
|
|
|
+ writer.write('%s\n' % '\t'.join(examples))
|
|
|
+ reader.close()
|
|
|
+ writer.close()
|
|
|
+
|
|
|
+
|
|
|
+def _text_to_binary():
|
|
|
+ inputs = open(FLAGS.in_file, 'r').readlines()
|
|
|
+ writer = open(FLAGS.out_file, 'wb')
|
|
|
+ for inp in inputs:
|
|
|
+ tf_example = example_pb2.Example()
|
|
|
+ for feature in inp.strip().split('\t'):
|
|
|
+ (k, v) = feature.split('=')
|
|
|
+ tf_example.features.feature[k].bytes_list.value.extend([v])
|
|
|
+ tf_example_str = tf_example.SerializeToString()
|
|
|
+ str_len = len(tf_example_str)
|
|
|
+ writer.write(struct.pack('q', str_len))
|
|
|
+ writer.write(struct.pack('%ds' % str_len, tf_example_str))
|
|
|
+ writer.close()
|
|
|
+
|
|
|
+
|
|
|
+def main(unused_argv):
|
|
|
+ assert FLAGS.command and FLAGS.in_file and FLAGS.out_file
|
|
|
+ if FLAGS.command == 'binary_to_text':
|
|
|
+ _binary_to_text()
|
|
|
+ elif FLAGS.command == 'text_to_binary':
|
|
|
+ _text_to_binary()
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ tf.app.run()
|