process.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. from __future__ import print_function
  4. import shutil
  5. import fileinput
  6. import math
  7. def main(filename, bins, maximum):
  8. with open(filename) as f:
  9. content = f.read().split("\n")
  10. numbers = []
  11. for line in content:
  12. line = line.strip()
  13. if line != "":
  14. numbers.append(float(line))
  15. numbers = sorted(numbers)
  16. minimum = min(numbers)
  17. bin_counter = [0 for i in range(bins+1)]
  18. xticklabels = []
  19. for i, number in enumerate(numbers):
  20. if number >= minimum + (maximum - minimum)/bins*(bins+1):
  21. bin_counter[bins] += 1
  22. elif number < minimum:
  23. bin_counter[0] += 1
  24. else:
  25. for b in range(bins):
  26. lower = minimum + (maximum - minimum)/bins*b
  27. upper = minimum + (maximum - minimum)/bins*(b+1)
  28. if lower <= number < upper:
  29. bin_counter[b] += 1
  30. break
  31. for b in range(bins):
  32. lower = minimum + (maximum - minimum)/bins*b
  33. xticklabels.append(get_xticklabel(lower))
  34. # Get labels for y-axis
  35. ytickslabels = []
  36. maxylabel = int(10**math.floor(math.log(max(bin_counter), 10)))
  37. ylabelsteps = maxylabel / 10
  38. for i in range(0, maxylabel+1, ylabelsteps):
  39. ytickslabels.append(get_yticklabel(i, True))
  40. xticklabels.append("\infty")
  41. return bin_counter, xticklabels, ytickslabels
  42. def get_xticklabel(value):
  43. return str(int(value/1000))
  44. def get_yticklabel(value, si_suffix):
  45. if si_suffix:
  46. divide_by, suffix = get_si_suffix(value)
  47. new_value = (value / divide_by)
  48. if int(new_value) == new_value:
  49. return ("%i" % int(new_value)) + suffix
  50. else:
  51. return ("%0.2f" % new_value) + suffix
  52. else:
  53. return str(value)
  54. def get_si_suffix(value):
  55. if value >= 10**3:
  56. return (10**3, "K")
  57. elif value >= 10**6:
  58. return (10**6, "M")
  59. else:
  60. return (1, "")
  61. def modify_template(bin_counter, xticklabels, yticklabels):
  62. shutil.copyfile("histogram-large-1d-dataset.template.tex",
  63. "histogram-large-1d-dataset.tex")
  64. xticklabels = ", ".join(map(lambda n: "$%s$" % n, xticklabels))
  65. yticklabels = ", ".join(yticklabels)
  66. coordinates = ""
  67. for i, value in enumerate(bin_counter):
  68. coordinates += "(%i, %i) " % (i, value)
  69. for line in fileinput.input("histogram-large-1d-dataset.tex",
  70. inplace=True):
  71. line = line.replace("{{xticklabels}}", xticklabels)
  72. line = line.replace("{{yticklabels}}", yticklabels)
  73. line = line.replace("{{coordinates}}", coordinates)
  74. print(line, end='')
  75. if __name__ == '__main__':
  76. from argparse import ArgumentParser
  77. parser = ArgumentParser()
  78. parser.add_argument("-f", "--file", dest="filename",
  79. default="1ddata.txt",
  80. help="use FILE as input data", metavar="FILE")
  81. parser.add_argument("-b", "--bins", dest="bins", type=int,
  82. default=15,
  83. help="how many bins should be used")
  84. parser.add_argument("-m", "--max", dest="max", type=float,
  85. default=15000,
  86. help=("what is the maximum number "
  87. "that should get binned?"))
  88. args = parser.parse_args()
  89. bin_counter, xticklabels, yticklabels = main(args.filename,
  90. args.bins,
  91. args.max)
  92. modify_template(bin_counter, xticklabels, yticklabels)