process.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. from __future__ import print_function
  4. import shutil
  5. import fileinput
  6. import math
  7. def main(filename, bins, maximum, yticks_number):
  8. with open(filename) as f:
  9. content = f.read().split("\n")
  10. numbers = []
  11. for line in content:
  12. line = line.strip()
  13. if line != "":
  14. numbers.append(float(line))
  15. numbers = sorted(numbers)
  16. minimum = min(numbers)
  17. bin_counter = [0 for i in range(bins+1)]
  18. xticklabels = []
  19. for i, number in enumerate(numbers):
  20. if number >= minimum + (maximum - minimum)/bins*(bins+1):
  21. bin_counter[bins] += 1
  22. elif number < minimum:
  23. bin_counter[0] += 1
  24. else:
  25. for b in range(bins):
  26. lower = minimum + (maximum - minimum)/bins*b
  27. upper = minimum + (maximum - minimum)/bins*(b+1)
  28. if lower <= number < upper:
  29. bin_counter[b] += 1
  30. break
  31. for b in range(bins):
  32. lower = minimum + (maximum - minimum)/bins*b
  33. xticklabels.append(get_xticklabel(lower))
  34. # Get labels for y-axis
  35. yticks = []
  36. ytickslabels = []
  37. maxy = max(bin_counter)
  38. maxylabel = int(10**math.floor(math.log(maxy, 10)))*int(str(maxy)[0])
  39. ylabelsteps = maxylabel / yticks
  40. for i in range(0, maxylabel+1, ylabelsteps):
  41. print("i: %i, %i" % (i, maxylabel))
  42. print("label: %i%s" % get_si_suffix(i))
  43. yticks.append(str(i))
  44. ytickslabels.append(get_yticklabel(i, True))
  45. xticklabels.append("\infty")
  46. return bin_counter, xticklabels, ytickslabels, yticks
  47. def get_xticklabel(value):
  48. return str(int(value))
  49. def get_yticklabel(value, si_suffix):
  50. value = float(value)
  51. if si_suffix:
  52. divide_by, suffix = get_si_suffix(value)
  53. new_value = (value / divide_by)
  54. if int(new_value) == new_value:
  55. return ("%i" % int(new_value)) + suffix
  56. else:
  57. return ("%0.2f" % new_value) + suffix
  58. else:
  59. return str(value)
  60. def get_si_suffix(value):
  61. if value >= 10**3:
  62. return (10**3, "K")
  63. elif value >= 10**6:
  64. return (10**6, "M")
  65. else:
  66. return (1, "")
  67. def modify_template(bin_counter, xticklabels, yticklabels, yticks):
  68. shutil.copyfile("histogram-large-1d-dataset.template.tex",
  69. "histogram-large-1d-dataset.tex")
  70. xticklabels = ", ".join(map(lambda n: "$%s$" % n, xticklabels))
  71. yticklabels = ", ".join(yticklabels)
  72. yticks = ",".join(yticks)
  73. coordinates = ""
  74. for i, value in enumerate(bin_counter):
  75. coordinates += "(%i, %i) " % (i, value)
  76. for line in fileinput.input("histogram-large-1d-dataset.tex",
  77. inplace=True):
  78. line = line.replace("{{xticklabels}}", xticklabels)
  79. line = line.replace("{{yticklabels}}", yticklabels)
  80. line = line.replace("{{yticks}}", yticks)
  81. line = line.replace("{{coordinates}}", coordinates)
  82. print(line, end='')
  83. if __name__ == '__main__':
  84. from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
  85. parser = ArgumentParser(description=__doc__,
  86. formatter_class=ArgumentDefaultsHelpFormatter)
  87. parser.add_argument("-f", "--file", dest="filename",
  88. default="1ddata.txt",
  89. help="use FILE as input data", metavar="FILE")
  90. parser.add_argument("-b", "--bins", dest="bins", type=int,
  91. default=15,
  92. help="how many bins should be used")
  93. parser.add_argument("-m", "--max", dest="max", type=float,
  94. default=15000,
  95. help=("what is the maximum number "
  96. "that should get binned?"))
  97. parser.add_argument("--yticks", dest="yticks", type=int,
  98. default=5,
  99. help=("How many y-ticks should be used?"))
  100. args = parser.parse_args()
  101. bin_counter, xticklabels, yticklabels, yticks = main(args.filename,
  102. args.bins,
  103. args.max,
  104. args.yticks)
  105. modify_template(bin_counter, xticklabels, yticklabels, yticks)