123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121 |
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- from __future__ import print_function
- import shutil
- import fileinput
- import math
- def main(filename, bins, maximum, yticks_number):
- with open(filename) as f:
- content = f.read().split("\n")
- numbers = []
- for line in content:
- line = line.strip()
- if line != "":
- numbers.append(float(line))
- numbers = sorted(numbers)
- minimum = min(numbers)
- bin_counter = [0 for i in range(bins+1)]
- xticklabels = []
- for i, number in enumerate(numbers):
- if number >= minimum + (maximum - minimum)/bins*(bins+1):
- bin_counter[bins] += 1
- elif number < minimum:
- bin_counter[0] += 1
- else:
- for b in range(bins):
- lower = minimum + (maximum - minimum)/bins*b
- upper = minimum + (maximum - minimum)/bins*(b+1)
- if lower <= number < upper:
- bin_counter[b] += 1
- break
- for b in range(bins):
- lower = minimum + (maximum - minimum)/bins*b
- xticklabels.append(get_xticklabel(lower))
- # Get labels for y-axis
- yticks = []
- ytickslabels = []
- maxy = max(bin_counter)
- maxylabel = int(10**math.floor(math.log(maxy, 10)))*int(str(maxy)[0])
- ylabelsteps = maxylabel / yticks
- for i in range(0, maxylabel+1, ylabelsteps):
- print("i: %i, %i" % (i, maxylabel))
- print("label: %i%s" % get_si_suffix(i))
- yticks.append(str(i))
- ytickslabels.append(get_yticklabel(i, True))
- xticklabels.append("\infty")
- return bin_counter, xticklabels, ytickslabels, yticks
- def get_xticklabel(value):
- return str(int(value))
- def get_yticklabel(value, si_suffix):
- value = float(value)
- if si_suffix:
- divide_by, suffix = get_si_suffix(value)
- new_value = (value / divide_by)
- if int(new_value) == new_value:
- return ("%i" % int(new_value)) + suffix
- else:
- return ("%0.2f" % new_value) + suffix
- else:
- return str(value)
- def get_si_suffix(value):
- if value >= 10**3:
- return (10**3, "K")
- elif value >= 10**6:
- return (10**6, "M")
- else:
- return (1, "")
- def modify_template(bin_counter, xticklabels, yticklabels, yticks):
- shutil.copyfile("histogram-large-1d-dataset.template.tex",
- "histogram-large-1d-dataset.tex")
- xticklabels = ", ".join(map(lambda n: "$%s$" % n, xticklabels))
- yticklabels = ", ".join(yticklabels)
- yticks = ",".join(yticks)
- coordinates = ""
- for i, value in enumerate(bin_counter):
- coordinates += "(%i, %i) " % (i, value)
- for line in fileinput.input("histogram-large-1d-dataset.tex",
- inplace=True):
- line = line.replace("{{xticklabels}}", xticklabels)
- line = line.replace("{{yticklabels}}", yticklabels)
- line = line.replace("{{yticks}}", yticks)
- line = line.replace("{{coordinates}}", coordinates)
- print(line, end='')
- if __name__ == '__main__':
- from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
- parser = ArgumentParser(description=__doc__,
- formatter_class=ArgumentDefaultsHelpFormatter)
- parser.add_argument("-f", "--file", dest="filename",
- default="1ddata.txt",
- help="use FILE as input data", metavar="FILE")
- parser.add_argument("-b", "--bins", dest="bins", type=int,
- default=15,
- help="how many bins should be used")
- parser.add_argument("-m", "--max", dest="max", type=float,
- default=15000,
- help=("what is the maximum number "
- "that should get binned?"))
- parser.add_argument("--yticks", dest="yticks", type=int,
- default=5,
- help=("How many y-ticks should be used?"))
- args = parser.parse_args()
- bin_counter, xticklabels, yticklabels, yticks = main(args.filename,
- args.bins,
- args.max,
- args.yticks)
- modify_template(bin_counter, xticklabels, yticklabels, yticks)
|