process.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. from __future__ import print_function
  4. import shutil
  5. import fileinput
  6. import math
  7. def main(filename, bins, maximum, yticks_number):
  8. with open(filename) as f:
  9. content = f.read().split("\n")
  10. numbers = []
  11. for line in content:
  12. line = line.strip()
  13. if line != "":
  14. numbers.append(float(line))
  15. numbers = sorted(numbers)
  16. minimum = min(numbers)
  17. bin_counter = [0 for i in range(bins+1)]
  18. xticklabels = []
  19. for i, number in enumerate(numbers):
  20. if number >= minimum + (maximum - minimum)/bins*(bins+1):
  21. bin_counter[bins] += 1
  22. elif number < minimum:
  23. bin_counter[0] += 1
  24. else:
  25. for b in range(bins):
  26. lower = minimum + (maximum - minimum)/bins*b
  27. upper = minimum + (maximum - minimum)/bins*(b+1)
  28. if lower <= number < upper:
  29. bin_counter[b] += 1
  30. break
  31. minimum = 0
  32. for b in range(bins):
  33. lower = minimum + (maximum - minimum)/bins*b
  34. xticklabels.append(get_xticklabel(lower))
  35. # Get labels for y-axis
  36. yticks = []
  37. ytickslabels = []
  38. maxy = max(bin_counter)
  39. maxylabel = int(10**math.floor(math.log(maxy, 10)))*int(str(maxy)[0])
  40. ylabelsteps = int(math.ceil(maxylabel / yticks_number))
  41. if ylabelsteps == 0:
  42. ylabelsteps = 1
  43. for i in range(0, maxylabel+1, ylabelsteps):
  44. print("i: %i, %i" % (i, maxylabel))
  45. print("label: %i%s" % get_si_suffix(i))
  46. yticks.append(str(i))
  47. ytickslabels.append(get_yticklabel(i, True))
  48. xticklabels.append("\infty")
  49. return bin_counter, xticklabels, ytickslabels, yticks
  50. def get_xticklabel(value):
  51. return str(int(value))
  52. def get_yticklabel(value, si_suffix):
  53. value = float(value)
  54. if si_suffix:
  55. divide_by, suffix = get_si_suffix(value)
  56. new_value = (value / divide_by)
  57. if int(new_value) == new_value:
  58. return ("%i" % int(new_value)) + suffix
  59. else:
  60. return ("%0.2f" % new_value) + suffix
  61. else:
  62. return str(value)
  63. def get_si_suffix(value):
  64. if value >= 10**3:
  65. return (10**3, "K")
  66. elif value >= 10**6:
  67. return (10**6, "M")
  68. else:
  69. return (1, "")
  70. def modify_template(bin_counter, xticklabels, yticklabels, yticks):
  71. shutil.copyfile("histogram-large-1d-dataset.template.tex",
  72. "histogram-large-1d-dataset.tex")
  73. xticklabels = ", ".join(map(lambda n: "$%s$" % n, xticklabels))
  74. yticklabels = ", ".join(yticklabels)
  75. yticks = ",".join(yticks)
  76. coordinates = ""
  77. for i, value in enumerate(bin_counter):
  78. coordinates += "(%i, %i) " % (i, value)
  79. for line in fileinput.input("histogram-large-1d-dataset.tex",
  80. inplace=True):
  81. line = line.replace("{{xticklabels}}", xticklabels)
  82. line = line.replace("{{yticklabels}}", yticklabels)
  83. line = line.replace("{{yticks}}", yticks)
  84. line = line.replace("{{coordinates}}", coordinates)
  85. print(line, end='')
  86. if __name__ == '__main__':
  87. from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
  88. parser = ArgumentParser(description=__doc__,
  89. formatter_class=ArgumentDefaultsHelpFormatter)
  90. parser.add_argument("-f", "--file", dest="filename",
  91. default="1ddata.txt",
  92. help="use FILE as input data", metavar="FILE")
  93. parser.add_argument("-b", "--bins", dest="bins", type=int,
  94. default=15,
  95. help="how many bins should be used")
  96. parser.add_argument("-m", "--max", dest="max", type=float,
  97. default=15000,
  98. help=("what is the maximum number "
  99. "that should get binned?"))
  100. parser.add_argument("--yticks", dest="yticks", type=int,
  101. default=5,
  102. help=("How many y-ticks should be used?"))
  103. args = parser.parse_args()
  104. bin_counter, xticklabels, yticklabels, yticks = main(args.filename,
  105. args.bins,
  106. args.max,
  107. args.yticks)
  108. modify_template(bin_counter, xticklabels, yticklabels, yticks)