data_convert_example.py 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. """Example of Converting TextSum model data.
  2. Usage:
  3. python data_convert_example.py --command binary_to_text --in_file data/data --out_file data/text_data
  4. python data_convert_example.py --command text_to_binary --in_file data/text_data --out_file data/binary_data
  5. python data_convert_example.py --command binary_to_text --in_file data/binary_data --out_file data/text_data2
  6. diff data/text_data2 data/text_data
  7. """
  8. import struct
  9. import sys
  10. import tensorflow as tf
  11. from tensorflow.core.example import example_pb2
  12. FLAGS = tf.app.flags.FLAGS
  13. tf.app.flags.DEFINE_string('command', 'binary_to_text',
  14. 'Either binary_to_text or text_to_binary.'
  15. 'Specify FLAGS.in_file accordingly.')
  16. tf.app.flags.DEFINE_string('in_file', '', 'path to file')
  17. tf.app.flags.DEFINE_string('out_file', '', 'path to file')
  18. def _binary_to_text():
  19. reader = open(FLAGS.in_file, 'rb')
  20. writer = open(FLAGS.out_file, 'w')
  21. while True:
  22. len_bytes = reader.read(8)
  23. if not len_bytes:
  24. sys.stderr.write('Done reading\n')
  25. return
  26. str_len = struct.unpack('q', len_bytes)[0]
  27. tf_example_str = struct.unpack('%ds' % str_len, reader.read(str_len))[0]
  28. tf_example = example_pb2.Example.FromString(tf_example_str)
  29. examples = []
  30. for key in tf_example.features.feature:
  31. examples.append('%s=%s' % (key, tf_example.features.feature[key].bytes_list.value[0]))
  32. writer.write('%s\n' % '\t'.join(examples))
  33. reader.close()
  34. writer.close()
  35. def _text_to_binary():
  36. inputs = open(FLAGS.in_file, 'r').readlines()
  37. writer = open(FLAGS.out_file, 'wb')
  38. for inp in inputs:
  39. tf_example = example_pb2.Example()
  40. for feature in inp.strip().split('\t'):
  41. (k, v) = feature.split('=')
  42. tf_example.features.feature[k].bytes_list.value.extend([v])
  43. tf_example_str = tf_example.SerializeToString()
  44. str_len = len(tf_example_str)
  45. writer.write(struct.pack('q', str_len))
  46. writer.write(struct.pack('%ds' % str_len, tf_example_str))
  47. writer.close()
  48. def main(unused_argv):
  49. assert FLAGS.command and FLAGS.in_file and FLAGS.out_file
  50. if FLAGS.command == 'binary_to_text':
  51. _binary_to_text()
  52. elif FLAGS.command == 'text_to_binary':
  53. _text_to_binary()
  54. if __name__ == '__main__':
  55. tf.app.run()