Browse Source

Adding dataset about git history of PyData projects

Marc Garcia 6 years ago
parent
commit
31ed5ce4ee

+ 32 - 0
00_Empty.ipynb

@@ -0,0 +1,32 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

File diff suppressed because it is too large
+ 5007 - 0
data/git_history/female.txt


+ 10 - 0
data/git_history/fix_csv.py

@@ -0,0 +1,10 @@
+#!/usr/bin/env python
+import sys
+
+
+result = []
+for line in sys.stdin:
+    values = line.split(',', maxsplit=int(sys.argv[1]) - 1)
+    values[-1] = '"{}"\n'.format(values[-1].rstrip('\n')
+                                           .replace('"', '""'))
+    sys.stdout.write(','.join(values))

+ 5 - 0
data/git_history/git_info.sh

@@ -0,0 +1,5 @@
+#!/bin/sh
+FIX_CSV_CMD=`pwd`/fix_csv.py
+cd $1
+echo "commit_hash,author_name,author_email,author_date,committer_name,committer_email,committer_date,subject"
+git log --pretty=format:%H,%an,%ae,%ai,%cn,%ce,%ci,%s | ${FIX_CSV_CMD} 8

BIN
data/git_history/jupyter.csv.gz


File diff suppressed because it is too large
+ 2949 - 0
data/git_history/male.txt


BIN
data/git_history/matplotlib.csv.gz


BIN
data/git_history/pandas.csv.gz


BIN
data/git_history/sklearn.csv.gz


+ 1 - 1
environment.yml

@@ -4,6 +4,6 @@ channels:
   - defaults
 dependencies:
   - python=3.7
-  - pandas=0.23.3
+  - pandas=0.25
   - matplotlib=2.2
   - jupyter=1.0