瀏覽代碼

Update the readme with Linux image

Vadim Markovtsev 7 年之前
父節點
當前提交
396aa0e8fb
共有 4 個文件被更改,包括 47 次插入36 次删除
  1. 18 13
      README.md
  2. 二進制
      git-git.png
  3. 29 23
      labours.py
  4. 二進制
      linux.png

+ 18 - 13
README.md

@@ -15,19 +15,24 @@ algorithm, only the last modification date is recorded.
 
 There are two tools: `hercules` and `labours.py`. The first is the program
 written in Go which collects the burnout stats from a Git repository.
-The second is the Python script which draws the stack area plot and optionally resamples the time series. These two tools
-are normally used together through the pipe. `hercules` prints
-results in plain text. The first line is three numbers: UNIX timestamp which
-corresponds to the time the repository was created, *granularity* and *sampling*.
-Granularity is the number of days each band in the stack consists of. For example,
-to generate the annual burnout plot, set granularity to 365. Sampling is the
-frequency with which the burnout is snapshotted. The smaller the value,
-the more smooth is the plot but the more work is done.
+The second is the Python script which draws the stack area plot and optionally
+resamples the time series. These two tools are normally used together through
+the pipe. `hercules` prints results in plain text. The first line is four numbers:
+UNIX timestamp which corresponds to the time the repository was created,
+UNIX timestamp of the last commit, *granularity* and *sampling*.
+Granularity is the number of days each band in the stack consists of. Sampling
+is the frequency with which the burnout state is snapshotted. The smaller the
+value, the more smooth is the plot but the more work is done.
 
-![git/git image](git-git.png)
-<p align="center">git/git burndown (granularity 365, sampling 30, no resampling)</p>
+![git/git image](linux.png)
+<p align="center">torvalds/linux burndown (granularity 30, sampling 30, resampled by year)</p>
 
-There is an option to resample the bands inside `labours.py`, so that you can define very precise distribution and visualize it differently. Besides, resampling aligns the bands across the year (month, week) boundaries.
+There is an option to resample the bands inside `labours.py`, so that you can
+define a very precise distribution and visualize it different ways. Besides,
+resampling aligns the bands across periodic boundaries, e.g. months or years.
+Unresampled bands are apparently not aligned and start from the project's birth date.
+
+There is a [presentation](http://vmarkovtsev.github.io/techtalks-2017-moscow-lightning/) available.
 
 ### Installation
 You are going to need Go and Python 2 or 3.
@@ -43,8 +48,8 @@ wget https://github.com/src-d/hercules/raw/master/labours.py
 hercules https://github.com/src-d/go-git | python3 labours.py --resample month
 # Use "file system" go-git backend and print the raw data.
 hercules /path/to/cloned/go-git
-# Use "file system" go-git backend, cache the cloned repository to /tmp/repo-cache and display the plot.
-hercules https://github.com/git/git /tmp/repo-cache | python3 labours.py --resample month
+# Use "file system" go-git backend, cache the cloned repository to /tmp/repo-cache and display the unresampled plot.
+hercules https://github.com/git/git /tmp/repo-cache | python3 labours.py --resample raw
 
 # Now something fun
 # Get the linear history from git rev-list, reverse it

二進制
git-git.png


+ 29 - 23
labours.py

@@ -74,35 +74,43 @@ def load_matrix(args):
                             daily_matrix[suby, subx] = matrix[
                                                            y, x] / granularity
         daily_matrix[(last - start).days:] = 0
-        # Resample the time interval
+        # Resample the bands
         aliases = {
             "year": "A",
             "month": "M"
         }
         args.resample = aliases.get(args.resample, args.resample)
         periods = 0
-        date_range_sampling = [start]
-        while date_range_sampling[-1] < finish:
+        date_granularity_sampling = [start]
+        while date_granularity_sampling[-1] < finish:
             periods += 1
-            date_range_sampling = pandas.date_range(
+            date_granularity_sampling = pandas.date_range(
                 start, periods=periods, freq=args.resample)
+        date_range_sampling = pandas.date_range(
+            date_granularity_sampling[0],
+            periods=(finish - date_granularity_sampling[0]).days,
+            freq="1D")
         # Fill the new square matrix
-        matrix = numpy.zeros((len(date_range_sampling),) * 2,
-                             dtype=numpy.float32)
-        for i, gdt in enumerate(date_range_sampling):
-            istart = (date_range_sampling[i - 1] - start).days if i > 0 else 0
+        matrix = numpy.zeros(
+            (len(date_granularity_sampling), len(date_range_sampling)),
+            dtype=numpy.float32)
+        for i, gdt in enumerate(date_granularity_sampling):
+            istart = (date_granularity_sampling[i - 1] - start).days \
+                if i > 0 else 0
             ifinish = (gdt - start).days
-            for j, sdt in enumerate(date_range_sampling[i:]):
-                jfinish = min((date_range_sampling[i + j] - start).days,
-                              daily_matrix.shape[1] - 1)
-                matrix[i, i + j] = daily_matrix[istart:ifinish, jfinish].sum()
+
+            for j, sdt in enumerate(date_range_sampling):
+                if (sdt - start).days >= istart:
+                    break
+            matrix[i, j:] = \
+                daily_matrix[istart:ifinish, (sdt - start).days:].sum(axis=0)
         # Hardcode some cases to improve labels' readability
         if args.resample in ("year", "A"):
-            labels = [dt.year for dt in date_range_sampling]
+            labels = [dt.year for dt in date_granularity_sampling]
         elif args.resample in ("month", "M"):
-            labels = [dt.strftime("%Y %B") for dt in date_range_sampling]
+            labels = [dt.strftime("%Y %B") for dt in date_granularity_sampling]
         else:
-            labels = [dt.date() for dt in date_range_sampling]
+            labels = [dt.date() for dt in date_granularity_sampling]
     else:
         labels = [
             "%s - %s" % ((start + timedelta(days=i * granularity)).date(),
@@ -167,27 +175,25 @@ def plot_matrix(args, matrix, date_range_sampling, labels, granularity,
         del locs[0]
     endindex = -1
     if len(locs) >= 2 and \
-            pyplot.xlim()[1] - locs[-1] > (locs[-1] - locs[-2]) / 3:
+            pyplot.xlim()[1] - locs[-1] > (locs[-1] - locs[-2]) / 2:
         locs.append(pyplot.xlim()[1])
         endindex = len(locs) - 1
     startindex = -1
     if len(locs) >= 2 and \
-            locs[0] - pyplot.xlim()[0] > (locs[1] - locs[0]) / 3:
+            locs[0] - pyplot.xlim()[0] > (locs[1] - locs[0]) / 2:
         locs.append(pyplot.xlim()[0])
         startindex = len(locs) - 1
     pyplot.gca().set_xticks(locs)
     # hacking time!
     labels = pyplot.gca().get_xticklabels()
     if startindex >= 0:
-        if "M" in args.resample:
-            labels[startindex].set_text(date_range_sampling[0].date())
-            labels[startindex].set_text = lambda _: None
+        labels[startindex].set_text(date_range_sampling[0].date())
+        labels[startindex].set_text = lambda _: None
         labels[startindex].set_rotation(30)
         labels[startindex].set_ha("right")
     if endindex >= 0:
-        if "M" in args.resample:
-            labels[endindex].set_text(date_range_sampling[-1].date())
-            labels[endindex].set_text = lambda _: None
+        labels[endindex].set_text(date_range_sampling[-1].date())
+        labels[endindex].set_text = lambda _: None
         labels[endindex].set_rotation(30)
         labels[endindex].set_ha("right")
     if not args.output:

二進制
linux.png