浏览代码

Add the resampling support to labours.py

Vadim Markovtsev 8 年之前
父节点
当前提交
2ab12f7f1f
共有 1 个文件被更改,包括 38 次插入8 次删除
  1. 38 8
      labours.py

+ 38 - 8
labours.py

@@ -1,6 +1,7 @@
 import argparse
 import argparse
 from datetime import datetime, timedelta
 from datetime import datetime, timedelta
 import sys
 import sys
+import warnings
 
 
 import numpy
 import numpy
 
 
@@ -17,6 +18,12 @@ def parse_args():
     parser.add_argument("--text-size", default=12,
     parser.add_argument("--text-size", default=12,
                         help="Size of the labels and legend.")
                         help="Size of the labels and legend.")
     parser.add_argument("--backend", help="Matplotlib backend to use.")
     parser.add_argument("--backend", help="Matplotlib backend to use.")
+    parser.add_argument(
+        "--resample", default="year",
+        help="The way to resample the time series. Possible values are: "
+             "\"month\", \"year\", \"no\", \"raw\" and pandas offset aliases ("
+             "http://pandas.pydata.org/pandas-docs/stable/timeseries.html"
+             "#offset-aliases).")
     args = parser.parse_args()
     args = parser.parse_args()
     return args
     return args
 
 
@@ -29,7 +36,6 @@ def main():
         matplotlib.use(args.backend)
         matplotlib.use(args.backend)
     import matplotlib.pyplot as pyplot
     import matplotlib.pyplot as pyplot
     import pandas
     import pandas
-    import seaborn  # to get nice colors, he-he
 
 
     start, granularity, sampling = input().split()
     start, granularity, sampling = input().split()
     start = datetime.fromtimestamp(int(start))
     start = datetime.fromtimestamp(int(start))
@@ -37,14 +43,38 @@ def main():
     sampling = int(sampling)
     sampling = int(sampling)
     matrix = numpy.array([numpy.fromstring(line, dtype=int, sep=" ")
     matrix = numpy.array([numpy.fromstring(line, dtype=int, sep=" ")
                           for line in sys.stdin.read().split("\n")[:-1]]).T
                           for line in sys.stdin.read().split("\n")[:-1]]).T
-    pyplot.stackplot(
-        pandas.date_range(start, periods=matrix.shape[1], freq="%dD" % sampling),
-        matrix,
-        labels=["%s - %s" % ((start + timedelta(days=i * granularity)).date(),
-                             (start + timedelta(days=(i + 1) * granularity)).date())
-                for i in range(matrix.shape[0])])
+    date_range_granularity = pandas.date_range(
+        start, periods=matrix.shape[0], freq="%dD" % granularity)
+    date_range_sampling = pandas.date_range(
+        start, periods=matrix.shape[1],  freq="%dD" % sampling)
+    df = pandas.DataFrame({
+        dr: pandas.Series(row, index=date_range_sampling)
+        for dr, row in zip(date_range_granularity, matrix)
+    }).T
+    if args.resample not in ("no", "raw"):
+        aliases = {
+            "year": "A",
+            "month": "M"
+        }
+        df = df.resample(aliases.get(args.resample, args.resample)).mean()
+        matrix = df.as_matrix()
+        if args.resample in ("year", "A"):
+            labels = [dt.year for dt in df.index]
+        elif args.resample in ("month", "M"):
+            labels = [dt.strftime("%Y %B") for dt in df.index]
+        else:
+            labels = [dt.date() for dt in df.index]
+    else:
+        labels = [
+            "%s - %s" % ((start + timedelta(days=i * granularity)).date(),
+                         (start + timedelta(days=(i + 1) * granularity)).date())
+            for i in range(matrix.shape[0])]
+        if len(labels) > 18:
+            warnings.warn("Too many labels - consider resampling.")
+    pyplot.stackplot(date_range_sampling, matrix, labels=labels)
     pyplot.legend(loc=2, fontsize=args.text_size)
     pyplot.legend(loc=2, fontsize=args.text_size)
     pyplot.ylabel("Lines of code", fontsize=args.text_size)
     pyplot.ylabel("Lines of code", fontsize=args.text_size)
+    pyplot.ylabel("Time", fontsize=args.text_size)
     pyplot.tick_params(labelsize=args.text_size)
     pyplot.tick_params(labelsize=args.text_size)
     pyplot.gcf().set_size_inches(12, 9)
     pyplot.gcf().set_size_inches(12, 9)
     if not args.output:
     if not args.output:
@@ -54,7 +84,7 @@ def main():
         pyplot.show()
         pyplot.show()
     else:
     else:
         pyplot.tight_layout()
         pyplot.tight_layout()
-        pyplot.savefig(args.output)
+        pyplot.savefig(args.output, transparent=True)
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":
     sys.exit(main())
     sys.exit(main())