john lockman преди 5 години
родител
ревизия
2346c454ff
променени са 50 файла, в които са добавени 1692 реда и са изтрити 2 реда
  1. 55 2
      README.md
  2. 35 0
      build-kubernetes-cluster.yml
  3. 54 0
      create_users/create_user.sh
  4. 6 0
      create_users/users
  5. 43 0
      device-plugin.yaml
  6. 30 0
      example.yaml
  7. 25 0
      host_inventory_file
  8. 12 0
      k8s-rdma-sriov-node-config.yaml
  9. 38 0
      k8s-sriov-cni-installer.yaml
  10. 3 0
      roles/common/files/k8s.conf
  11. 8 0
      roles/common/files/kubernetes.repo
  12. 3 0
      roles/common/files/nvidia
  13. 21 0
      roles/common/handlers/main.yml
  14. 134 0
      roles/common/tasks/main.yml
  15. 10 0
      roles/common/vars/main.yml
  16. 3 0
      roles/computeGPU/files/k8s.conf
  17. 8 0
      roles/computeGPU/files/kubernetes.repo
  18. 3 0
      roles/computeGPU/files/nvidia
  19. 21 0
      roles/computeGPU/handlers/main.yml
  20. 64 0
      roles/computeGPU/tasks/main.yml
  21. 10 0
      roles/computeGPU/vars/main.yml
  22. 3 0
      roles/master/files/k8s.conf
  23. 8 0
      roles/master/files/kubernetes.repo
  24. 3 0
      roles/master/files/nvidia
  25. 31 0
      roles/master/tasks/main.yml
  26. 5 0
      roles/startmaster/files/create_admin_user.yaml
  27. 12 0
      roles/startmaster/files/create_clusterRoleBinding.yaml
  28. 20 0
      roles/startmaster/files/data-pv.yaml
  29. 20 0
      roles/startmaster/files/data2-pv.yaml
  30. 20 0
      roles/startmaster/files/data3-pv.yaml
  31. 20 0
      roles/startmaster/files/data4-pv.yaml
  32. 1 0
      roles/startmaster/files/enable_gpu_k8s.sh
  33. 3 0
      roles/startmaster/files/flannel_net.sh
  34. 16 0
      roles/startmaster/files/katib-pv.yaml
  35. 536 0
      roles/startmaster/files/kube-flannel.yaml
  36. 51 0
      roles/startmaster/files/kubeflow_persistent_volumes.yaml
  37. 16 0
      roles/startmaster/files/minio-pvc.yaml
  38. 17 0
      roles/startmaster/files/mysql-pv.yaml
  39. 7 0
      roles/startmaster/files/nfs-class.yaml
  40. 32 0
      roles/startmaster/files/nfs-deployment.yaml
  41. 4 0
      roles/startmaster/files/nfs-serviceaccount.yaml
  42. 20 0
      roles/startmaster/files/nfs_clusterrole.yaml
  43. 12 0
      roles/startmaster/files/nfs_clusterrolebinding.yaml
  44. 17 0
      roles/startmaster/files/notebook-pv.yaml
  45. 20 0
      roles/startmaster/files/persistent_volumes.yaml
  46. 12 0
      roles/startmaster/files/pvc.yaml
  47. 3 0
      roles/startmaster/files/tiller_config.sh
  48. 145 0
      roles/startmaster/tasks/main.yml
  49. 33 0
      roles/startworkers/tasks/main.yml
  50. 19 0
      scuttle

+ 55 - 2
README.md

@@ -1,2 +1,55 @@
-# omnia
-Software tools for standing up Slurm/Kubernetes clusters on Dell EMC PowerEdge servers from factory OS images
+Dancing to the beat of a different drum.
+
+# Short Version:
+
+Install Kubernetes and all dependencies
+```
+ansible-playbook -i host_inventory_file build-kubernetes-cluster.yml
+```
+
+Initialize K8S cluster
+```
+ansible-playbook -i host_inventory_file build-kubernetes-cluster.yml --tags "init"
+```
+
+
+# What this does:
+
+## Build/Install
+
+### Add additional repositories:
+
+- Kubernetes (Google)
+- El Repo (nvidia drivers)
+- Nvidia (nvidia-docker)
+- EPEL (Extra Packages for Enterprise Linux)
+
+### Install common packages
+ - gcc
+ - python-pip
+ - docker
+ - kubelet
+ - kubeadm
+ - kubectl
+ - nvidia-detect
+ - kmod-nvidia
+ - nvidia-x11-drv
+ - nvidia-container-runtime
+ - ksonnet (CLI framework for K8S configs)
+
+### Enable GPU Device Plugins (nvidia-container-runtime-hook)
+
+### Modify kubeadm config to allow GPUs as schedulable resource 
+
+### Start and enable services
+ - Docker
+ - Kubelet
+
+## Initialize Cluster
+### Head/master
+- Start K8S pass startup token to compute/slaves
+- Initialize networking (Currently using WeaveNet)
+-Setup K8S Dashboard
+- Create dynamic/persistent volumes
+### Compute/slaves
+- Join k8s cluster

+ 35 - 0
build-kubernetes-cluster.yml

@@ -0,0 +1,35 @@
+---
+#Playbook for kubernetes cluster 
+
+#collect info from everything
+- hosts: all
+
+# Apply Common Installation and Config
+- hosts: cluster
+  gather_facts: false
+  roles:
+    - common
+
+# Apply GPU Node Config
+- hosts: gpus
+  gather_facts: false
+  roles:
+    - computeGPU
+
+# Apply Master Config
+- hosts: master
+  gather_facts: false
+  roles:
+    - master
+
+# Start K8s on master server
+- hosts: master
+  gather_facts: false
+  roles:
+    - startmaster
+
+# Start K8s worker servers
+- hosts: compute,gpus
+  gather_facts: false
+  roles:
+    - startworkers

+ 54 - 0
create_users/create_user.sh

@@ -0,0 +1,54 @@
+SLURM=0
+FILENAME=''
+DEFAULT=''
+while [[ $# -gt 1 ]]
+do
+key="$1"
+
+case $key in
+    -s|--slurm)
+    SLURM=1
+    ;;
+    -f|--file)
+    FILENAME="$2"
+    shift # past argument
+    ;;
+    --default)
+    DEFAULT=YES
+    ;;
+    *)
+            # unknown option
+    ;;
+esac
+shift # past argument or value
+done
+echo Add Slurm Account   = "${SLURM}"
+echo FILENAME      = "${FILENAME}"
+
+#input file is in the form:
+#username First Last
+INFILE=${FILENAME}
+
+while IFS='' read -r line; do
+   IFS=" " read -ra ACCOUNT <<< "$line"
+   user=${ACCOUNT[0]}
+   password="changeme"
+   pass=$(perl -e 'print crypt($ARGV[0], "password")' $password)
+
+  echo "Creating account for $user"
+  useradd -m -p $pass $user
+  pdsh "useradd -m -p $pass $user"
+  #force reset on login
+  chage -d 0 $user
+  #useradd -m -p $pass $user
+
+  #become user to create home directory
+  sudo su - $user "exit"
+  #generate ssh-keys
+  sudo -u $user  ssh-keygen -N "" -t rsa -f /home/$user/.ssh/id_rsa
+  sudo -u $user  cat /home/$user/.ssh/id_rsa.pub > /home/$user/.ssh/authorized_keys
+  chown $user:$user /home/$user/.ssh/authorized_keys
+  sudo -u $user  chmod 0644 /home/$user/.ssh/authorized_keys
+
+done < $INFILE
+

+ 6 - 0
create_users/users

@@ -0,0 +1,6 @@
+john.lockman
+don_smith2
+lwilson
+gundev1
+pei_yang
+srinivas

+ 43 - 0
device-plugin.yaml

@@ -0,0 +1,43 @@
+apiVersion: extensions/v1beta1
+kind: DaemonSet
+metadata:
+  name: rdma-sriov-dp-ds
+  namespace: kube-system
+spec:
+  template:
+    metadata:
+      # Mark this pod as a critical add-on; when enabled, the critical add-on scheduler
+      # reserves resources for critical add-on pods so that they can be rescheduled after
+      # a failure.  This annotation works in tandem with the toleration below.
+      annotations:
+        scheduler.alpha.kubernetes.io/critical-pod: ""
+      labels:
+        name: rdma-sriov-dp-ds
+    spec:
+      hostNetwork: true
+      tolerations:
+      # Allow this pod to be rescheduled while the node is in "critical add-ons only" mode.
+      # This, along with the annotation above marks this pod as a critical add-on.
+      - key: CriticalAddonsOnly
+        operator: Exists
+      containers:
+      - image: rdma/k8s-rdma-sriov-dev-plugin
+        name: k8s-rdma-sriov-dp-ds
+        imagePullPolicy: IfNotPresent
+        securityContext:
+          privileged: true
+        volumeMounts:
+          - name: device-plugin
+            mountPath: /var/lib/kubelet/device-plugins
+          - name: config
+            mountPath: /k8s-rdma-sriov-dev-plugin
+      volumes:
+        - name: device-plugin
+          hostPath:
+            path: /var/lib/kubelet/device-plugins
+        - name: config
+          configMap:
+            name: rdma-devices
+            items:
+            - key: config.json
+              path: config.json

+ 30 - 0
example.yaml

@@ -0,0 +1,30 @@
+apiVersion: "kubeflow.org/v1alpha2"
+kind: "TFJob"
+metadata:
+  name: "example-job"
+spec:
+  replicaSpecs:
+    - replicas: 1
+      tfReplicaType: MASTER
+      template:
+        spec:
+          containers:
+            - image: gcr.io/tf-on-k8s-dogfood/tf_sample:dc944ff
+              name: tensorflow
+          restartPolicy: OnFailure
+    - replicas: 1
+      tfReplicaType: WORKER
+      template:
+        spec:
+          containers:
+            - image: gcr.io/tf-on-k8s-dogfood/tf_sample:dc944ff
+              name: tensorflow
+          restartPolicy: OnFailure
+    - replicas: 2
+      tfReplicaType: PS
+      template:
+        spec:
+          containers:
+            - image: gcr.io/tf-on-k8s-dogfood/tf_sample:dc944ff
+              name: tensorflow
+          restartPolicy: OnFailure

+ 25 - 0
host_inventory_file

@@ -0,0 +1,25 @@
+[master]
+friday
+
+[compute]
+#compute001
+#compute002
+compute000
+compute[002:005]
+#compute[201:204]
+#compute[301:304]
+#compute[401:404]
+
+[gpus]
+#compute001
+compute002
+compute004
+compute005
+
+[workers:children]
+compute
+gpus
+
+[cluster:children]
+master
+workers

+ 12 - 0
k8s-rdma-sriov-node-config.yaml

@@ -0,0 +1,12 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: rdma-devices
+  namespace: kube-system
+data:
+  config.json: |
+    {
+        "mode" : "sriov",
+        #"pfNetdevices": [ "eth0" , "eth1" ]
+        "pfNetdevices": [ "ib0" ]
+    }

+ 38 - 0
k8s-sriov-cni-installer.yaml

@@ -0,0 +1,38 @@
+apiVersion: extensions/v1beta1
+kind: DaemonSet
+metadata:
+  name: kube-sriov-cni-ds-installer
+  namespace: kube-system
+spec:
+  template:
+    metadata:
+      labels:
+        name: sriov-cni-ds
+    spec:
+      hostNetwork: true
+      tolerations:
+      - key: node-role.kubernetes.io/master
+        operator: Exists
+        effect: NoSchedule
+      initContainers:
+      - name: install-cni
+        image: rdma/k8s-sriov-cni-installer
+        imagePullPolicy: IfNotPresent
+        command: [ "/installer/installer.sh" ]
+        volumeMounts:
+        - name: host-cni-etc
+          mountPath: /host-cni-etc
+        - name: host-cni-bin
+          mountPath: /host-cni-bin
+      containers:      
+      - name: install-cni-sleep
+        image: rdma/k8s-sriov-cni-installer
+        imagePullPolicy: IfNotPresent
+        command: [ "/installer/installer_sleep.sh" ]
+      volumes:
+      - name: host-cni-etc
+        hostPath:
+              path: /etc/cni/net.d/
+      - name: host-cni-bin
+        hostPath:
+              path: /opt/cni/bin

+ 3 - 0
roles/common/files/k8s.conf

@@ -0,0 +1,3 @@
+net.bridge.bridge-nf-call-ip6tables = 1
+net.bridge.bridge-nf-call-iptables = 1
+

+ 8 - 0
roles/common/files/kubernetes.repo

@@ -0,0 +1,8 @@
+[kubernetes]
+name=Kubernetes
+baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64
+enabled=1
+gpgcheck=1
+repo_gpgcheck=1
+gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
+

+ 3 - 0
roles/common/files/nvidia

@@ -0,0 +1,3 @@
+#!/bin/sh
+PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" exec nvidia-container-runtime-hook "$@"
+

+ 21 - 0
roles/common/handlers/main.yml

@@ -0,0 +1,21 @@
+---
+
+#- name: Enable docker service
+  #service:
+    #name: docker
+    #enabled: yes
+#
+- name: Start and Enable docker service
+  service:
+    name: docker
+    state: restarted
+    enabled: yes
+  #tags: install
+
+- name: Start and Enable Kubernetes - kubelet
+  service:
+    name: kubelet
+    state: started
+    enabled: yes
+  #tags: install
+

+ 134 - 0
roles/common/tasks/main.yml

@@ -0,0 +1,134 @@
+---
+
+- name: add kubernetes repo
+  copy: src=kubernetes.repo dest=/etc/yum.repos.d/ owner=root group=root mode=644
+  tags: install
+
+# add ElRepo GPG Key
+- rpm_key:
+    state: present
+    key: https://www.elrepo.org/RPM-GPG-KEY-elrepo.org
+  tags: install
+
+- name: add ElRepo (Nvidia kmod drivers)
+  yum:
+    name: http://www.elrepo.org/elrepo-release-7.0-3.el7.elrepo.noarch.rpm
+    state: present
+  tags: install
+
+- name: update sysctl to handle incorrectly routed traffic when iptables is bypassed
+  copy: src=k8s.conf dest=/etc/sysctl.d/ owner=root group=root mode=644
+  tags: install
+
+- name: update sysctl
+  command: /sbin/sysctl --system
+  tags: install
+
+- name: Install EPEL Repository
+  yum: name=epel-release state=present
+  tags: install
+
+#likely need to add a reboot hook in here
+#- name: update kernel and all other system packages
+  #yum: name=* state=latest
+  #tags: install
+
+- name: disable swap
+  command: /sbin/swapoff -a
+  tags: install
+
+# Disable selinux
+- selinux:
+    state: disabled
+  tags: install
+
+- name: install common packages
+  yum: 
+    name:
+      - gcc
+      - nfs-utils
+      - python-pip
+      - docker
+      - bash-completion
+      - kubelet
+      - kubeadm
+      - kubectl
+      - nvidia-detect
+    state: present
+  tags: install
+
+- name: install InfiniBand Support
+  yum:
+    name: "@Infiniband Support"
+    state: present
+
+- name: Install KSonnet
+  unarchive:
+    src: https://github.com/ksonnet/ksonnet/releases/download/v0.13.1/ks_0.13.1_linux_amd64.tar.gz
+    dest: /usr/bin/
+    extra_opts: [--strip-components=1]
+    remote_src: yes
+    exclude:
+      - ks_0.11.0_linux_amd64/CHANGELOG.md
+      - ks_0.11.0_linux_amd64/CODE-OF-CONDUCT.md
+      - ks_0.11.0_linux_amd64/CONTRIBUTING.md
+      - ks_0.11.0_linux_amd64/LICENSE
+      - ks_0.11.0_linux_amd64/README.md
+  tags: install
+
+- name: upgrade pip
+  command: /bin/pip install --upgrade pip
+  tags: install
+
+#- name: Enable DevicePlugins for all GPU nodes (nvidia-container-runtime-hook)
+  #copy: src=nvidia dest=/usr/libexec/oci/hooks.d/ owner=root group=root mode=755
+  #tags: install
+
+- name: Add KUBE_EXTRA_ARGS to enable GPUs
+  lineinfile:
+    path: /etc/systemd/system/kubelet.service.d/10-kubeadm.conf
+    line: 'Environment="KUBELET_EXTRA_ARGS=--feature-gates=DevicePlugins=true"'
+    insertbefore: 'KUBELET_KUBECONFIG_ARGS='
+  tags: install
+
+- name: Start and Enable docker service
+  service:
+    name: docker
+    state: restarted
+    enabled: yes
+  tags: install
+
+- name: Start and Enable Kubernetes - kubelet
+  service:
+    name: kubelet
+    state: restarted
+    enabled: yes
+  tags: install
+
+- name: Start and rpcbind service
+  service:
+    name: rpcbind
+    state: restarted
+    enabled: yes
+  tags: install
+
+- name: Start and nfs-server service
+  service:
+    name: nfs-server
+    state: restarted
+    enabled: yes
+  tags: install
+
+- name: Start and nfs-lock service
+  service:
+    name: nfs-lock
+    state: restarted
+    enabled: yes
+  tags: install
+
+- name: Start and nfs-idmap service
+  service:
+    name: nfs-idmap
+    state: restarted
+    enabled: yes
+  tags: install

+ 10 - 0
roles/common/vars/main.yml

@@ -0,0 +1,10 @@
+---
+
+common_packages:
+  - epel-release
+  - python-pip
+  - docker
+  - bash-completion
+  - kubelet 
+  - kubeadm
+  - kubectl

+ 3 - 0
roles/computeGPU/files/k8s.conf

@@ -0,0 +1,3 @@
+net.bridge.bridge-nf-call-ip6tables = 1
+net.bridge.bridge-nf-call-iptables = 1
+

+ 8 - 0
roles/computeGPU/files/kubernetes.repo

@@ -0,0 +1,8 @@
+[kubernetes]
+name=Kubernetes
+baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64
+enabled=1
+gpgcheck=1
+repo_gpgcheck=1
+gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
+

+ 3 - 0
roles/computeGPU/files/nvidia

@@ -0,0 +1,3 @@
+#!/bin/sh
+PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" exec nvidia-container-runtime-hook "$@"
+

+ 21 - 0
roles/computeGPU/handlers/main.yml

@@ -0,0 +1,21 @@
+---
+
+#- name: Enable docker service
+  #service:
+    #name: docker
+    #enabled: yes
+#
+- name: Start and Enable docker service
+  service:
+    name: docker
+    state: restarted
+    enabled: yes
+  #tags: install
+
+- name: Start and Enable Kubernetes - kubelet
+  service:
+    name: kubelet
+    state: started
+    enabled: yes
+  #tags: install
+

+ 64 - 0
roles/computeGPU/tasks/main.yml

@@ -0,0 +1,64 @@
+---
+- name: install Nvidia driver
+  yum: 
+    name: 
+      - kmod-nvidia
+      #- nvidia-x11-drv
+    state: present
+  tags: install
+
+#- name: add Nvidia container runtime support
+  #get_url:
+    #url: https://nvidia.github.io/nvidia-docker/centos7/nvidia-docker.repo
+    #dest: /etc/yum.repos.d/nvidia-docker.repo
+  #tags: install
+
+- name: add Nvidia container runtime support
+  get_url:
+    url: https://nvidia.github.io/nvidia-container-runtime/centos7/nvidia-container-runtime.repo
+    dest: /etc/yum.repos.d/nvidia-container-runtime.repo
+  tags: install, testing
+
+# disable gpg key (because Nvidia doesn't know how to make that work yet for some reason)
+- replace:
+    path: /etc/yum.repos.d/nvidia-container-runtime.repo
+    regexp: 'repo_gpgcheck=1'
+    replace: 'repo_gpgcheck=0'
+    backup: yes
+  tags: testing
+
+- name: install Nvidia-container-runtime-hook
+  yum: 
+    name: 
+      #- nvidia-detect
+      #- kmod-nvidia-410.73-1.el7_5.elrepo
+      - nvidia-container-runtime-hook
+    state: present
+  tags: install
+
+
+# This needs to be done on GPU nodes 
+#- name: Enable DevicePlugins for all GPU nodes (nvidia-container-runtime-hook)
+  #copy: src=nvidia dest=/usr/libexec/oci/hooks.d/ owner=root group=root mode=755
+  #tags: install
+
+#- name: Add KUBE_EXTRA_ARGS to enable Plugins (GPU support)  --III alreday done in common
+  #lineinfile:
+    #path: /etc/systemd/system/kubelet.service.d/10-kubeadm.conf
+    #line: 'Environment="KUBELET_EXTRA_ARGS=--feature-gates=DevicePlugins=true"'
+    #insertbefore: 'KUBELET_KUBECONFIG_ARGS='
+  #tags: install
+
+- name: Restart and Enable docker service
+  service:
+    name: docker
+    state: restarted
+    enabled: yes
+  tags: install
+
+- name: Restart and Enable Kubernetes - kubelet
+  service:
+    name: kubelet
+    state: restarted
+    enabled: yes
+  tags: install

+ 10 - 0
roles/computeGPU/vars/main.yml

@@ -0,0 +1,10 @@
+---
+
+common_packages:
+  - epel-release
+  - python-pip
+  - docker
+  - bash-completion
+  - kubelet 
+  - kubeadm
+  - kubectl

+ 3 - 0
roles/master/files/k8s.conf

@@ -0,0 +1,3 @@
+net.bridge.bridge-nf-call-ip6tables = 1
+net.bridge.bridge-nf-call-iptables = 1
+

+ 8 - 0
roles/master/files/kubernetes.repo

@@ -0,0 +1,8 @@
+[kubernetes]
+name=Kubernetes
+baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64
+enabled=1
+gpgcheck=1
+repo_gpgcheck=1
+gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
+

+ 3 - 0
roles/master/files/nvidia

@@ -0,0 +1,3 @@
+#!/bin/sh
+PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" exec nvidia-container-runtime-hook "$@"
+

+ 31 - 0
roles/master/tasks/main.yml

@@ -0,0 +1,31 @@
+---
+- name: Firewall Rule K8s:6443/tcp
+  command: firewall-cmd  --zone=internal --add-port=6443/tcp --permanent
+  tags: master
+
+- name: Firewall Rule K8s:10250/tcp
+  command: firewall-cmd  --zone=internal --add-port=10250/tcp --permanent
+  tags: master
+
+- name: Firewall Reload
+  command: firewall-cmd  --reload
+  tags: master
+
+- name: Create /root/bin (if it doesn't exist)
+  file:
+    path: /root/bin
+    state: directory
+    mode: 0755
+
+- name: Get Helm Installer
+  get_url:
+    url: https://raw.githubusercontent.com/kubernetes/helm/master/scripts/get 
+    dest: /root/bin/get_helm.sh
+    mode: 700 
+  tags: master
+
+- name: Install Helm
+  command: /root/bin/get_helm.sh
+  tags: master
+
+# install and start up OpenSM -  III

+ 5 - 0
roles/startmaster/files/create_admin_user.yaml

@@ -0,0 +1,5 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: admin-user
+  namespace: kube-system

+ 12 - 0
roles/startmaster/files/create_clusterRoleBinding.yaml

@@ -0,0 +1,12 @@
+apiVersion: rbac.authorization.k8s.io/v1beta1
+kind: ClusterRoleBinding
+metadata:
+  name: admin-user
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: cluster-admin
+subjects:
+- kind: ServiceAccount
+  name: admin-user
+  namespace: kube-system

+ 20 - 0
roles/startmaster/files/data-pv.yaml

@@ -0,0 +1,20 @@
+# yaml file contents
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: data-pv
+spec:
+  capacity:
+    storage: 10Gi
+  accessModes:
+  - ReadWriteOnce
+  - ReadOnlyMany
+  - ReadWriteMany
+  nfs:
+    server: 10.0.0.1
+    path: /work/k8s/data1
+  persistentVolumeReclaimPolicy: Recycle
+  #storageClassName: local-storage
+  #hostPath:
+    #path: /home/k8s/data1
+ 

+ 20 - 0
roles/startmaster/files/data2-pv.yaml

@@ -0,0 +1,20 @@
+# yaml file contents
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: data2-pv
+spec:
+  capacity:
+    storage: 20Gi
+  accessModes:
+  - ReadWriteOnce
+  - ReadOnlyMany
+  - ReadWriteMany
+  nfs:
+    server: 10.0.0.1
+    path: /work/k8s/data2
+  persistentVolumeReclaimPolicy: Recycle
+  #storageClassName: local-storage
+  #hostPath:
+    #path: /home/k8s/
+ 

+ 20 - 0
roles/startmaster/files/data3-pv.yaml

@@ -0,0 +1,20 @@
+# yaml file contents
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: data3-pv
+spec:
+  capacity:
+    storage: 50Gi
+  accessModes:
+  - ReadWriteOnce
+  - ReadOnlyMany
+  - ReadWriteMany
+  nfs:
+    server: 10.0.0.1
+    path: /work/k8s/data3
+  persistentVolumeReclaimPolicy: Recycle
+  #storageClassName: local-storage
+  #hostPath:
+    #path: /home/k8s/
+ 

+ 20 - 0
roles/startmaster/files/data4-pv.yaml

@@ -0,0 +1,20 @@
+# yaml file contents
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: data4-pv
+spec:
+  capacity:
+    storage: 50Gi
+  accessModes:
+  - ReadWriteOnce
+  - ReadOnlyMany
+  - ReadWriteMany
+  nfs:
+    server: 10.0.0.1
+    path: /work/k8s/data4
+  persistentVolumeReclaimPolicy: Recycle
+  #storageClassName: local-storage
+  #hostPath:
+    #path: /home/k8s/
+ 

+ 1 - 0
roles/startmaster/files/enable_gpu_k8s.sh

@@ -0,0 +1 @@
+kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v1.11/nvidia-device-plugin.yml

+ 3 - 0
roles/startmaster/files/flannel_net.sh

@@ -0,0 +1,3 @@
+kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/v0.10.0/Documentation/kube-flannel.yml
+kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/k8s-manifests/kube-flannel-rbac.yml
+

+ 16 - 0
roles/startmaster/files/katib-pv.yaml

@@ -0,0 +1,16 @@
+# yaml file contents
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: katib-pv
+spec:
+  capacity:
+    storage: 20Gi
+  accessModes:
+  - ReadWriteOnce
+  - ReadOnlyMany
+  - ReadWriteMany
+  persistentVolumeReclaimPolicy: Recycle
+  #storageClassName: local-storage
+  hostPath:
+    path: /home/k8s/katibsql

+ 536 - 0
roles/startmaster/files/kube-flannel.yaml

@@ -0,0 +1,536 @@
+---
+apiVersion: extensions/v1beta1
+kind: PodSecurityPolicy
+metadata:
+  name: psp.flannel.unprivileged
+  annotations:
+    seccomp.security.alpha.kubernetes.io/allowedProfileNames: docker/default
+    seccomp.security.alpha.kubernetes.io/defaultProfileName: docker/default
+    apparmor.security.beta.kubernetes.io/allowedProfileNames: runtime/default
+    apparmor.security.beta.kubernetes.io/defaultProfileName: runtime/default
+spec:
+  privileged: false
+  volumes:
+    - configMap
+    - secret
+    - emptyDir
+    - hostPath
+  allowedHostPaths:
+    - pathPrefix: "/etc/cni/net.d"
+    - pathPrefix: "/etc/kube-flannel"
+    - pathPrefix: "/run/flannel"
+  readOnlyRootFilesystem: false
+  # Users and groups
+  runAsUser:
+    rule: RunAsAny
+  supplementalGroups:
+    rule: RunAsAny
+  fsGroup:
+    rule: RunAsAny
+  # Privilege Escalation
+  allowPrivilegeEscalation: false
+  defaultAllowPrivilegeEscalation: false
+  # Capabilities
+  allowedCapabilities: ['NET_ADMIN']
+  defaultAddCapabilities: []
+  requiredDropCapabilities: []
+  # Host namespaces
+  hostPID: false
+  hostIPC: false
+  hostNetwork: true
+  hostPorts:
+  - min: 0
+    max: 65535
+  # SELinux
+  seLinux:
+    # SELinux is unsed in CaaSP
+    rule: 'RunAsAny'
+---
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1beta1
+metadata:
+  name: flannel
+rules:
+  - apiGroups: ['extensions']
+    resources: ['podsecuritypolicies']
+    verbs: ['use']
+    resourceNames: ['psp.flannel.unprivileged']
+  - apiGroups:
+      - ""
+    resources:
+      - pods
+    verbs:
+      - get
+  - apiGroups:
+      - ""
+    resources:
+      - nodes
+    verbs:
+      - list
+      - watch
+  - apiGroups:
+      - ""
+    resources:
+      - nodes/status
+    verbs:
+      - patch
+---
+kind: ClusterRoleBinding
+apiVersion: rbac.authorization.k8s.io/v1beta1
+metadata:
+  name: flannel
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: flannel
+subjects:
+- kind: ServiceAccount
+  name: flannel
+  namespace: kube-system
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: flannel
+  namespace: kube-system
+---
+kind: ConfigMap
+apiVersion: v1
+metadata:
+  name: kube-flannel-cfg
+  namespace: kube-system
+  labels:
+    tier: node
+    app: flannel
+data:
+  cni-conf.json: |
+    {
+      "name": "cbr0",
+      "plugins": [
+        {
+          "type": "flannel",
+          "delegate": {
+            "hairpinMode": true,
+            "isDefaultGateway": true
+          }
+        },
+        {
+          "type": "portmap",
+          "capabilities": {
+            "portMappings": true
+          }
+        }
+      ]
+    }
+  net-conf.json: |
+    {
+      "Network": "10.244.0.0/16",
+      "Backend": {
+        "Type": "vxlan"
+      }
+    }
+---
+apiVersion: extensions/v1beta1
+kind: DaemonSet
+metadata:
+  name: kube-flannel-ds-amd64
+  namespace: kube-system
+  labels:
+    tier: node
+    app: flannel
+spec:
+  template:
+    metadata:
+      labels:
+        tier: node
+        app: flannel
+    spec:
+      hostNetwork: true
+      nodeSelector:
+        beta.kubernetes.io/arch: amd64
+      tolerations:
+      - operator: Exists
+        effect: NoSchedule
+      serviceAccountName: flannel
+      initContainers:
+      - name: install-cni
+        image: quay.io/coreos/flannel:v0.11.0-amd64
+        command:
+        - cp
+        args:
+        - -f
+        - /etc/kube-flannel/cni-conf.json
+        - /etc/cni/net.d/10-flannel.conflist
+        volumeMounts:
+        - name: cni
+          mountPath: /etc/cni/net.d
+        - name: flannel-cfg
+          mountPath: /etc/kube-flannel/
+      containers:
+      - name: kube-flannel
+        image: quay.io/coreos/flannel:v0.11.0-amd64
+        command:
+        - /opt/bin/flanneld
+        args:
+        - --ip-masq
+        - --kube-subnet-mgr
+        - --iface=ib0
+        resources:
+          requests:
+            cpu: "100m"
+            memory: "50Mi"
+          limits:
+            cpu: "100m"
+            memory: "50Mi"
+        securityContext:
+          privileged: false
+          capabilities:
+             add: ["NET_ADMIN"]
+        env:
+        - name: POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: POD_NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        volumeMounts:
+        - name: run
+          mountPath: /run/flannel
+        - name: flannel-cfg
+          mountPath: /etc/kube-flannel/
+      volumes:
+        - name: run
+          hostPath:
+            path: /run/flannel
+        - name: cni
+          hostPath:
+            path: /etc/cni/net.d
+        - name: flannel-cfg
+          configMap:
+            name: kube-flannel-cfg
+---
+apiVersion: extensions/v1beta1
+kind: DaemonSet
+metadata:
+  name: kube-flannel-ds-arm64
+  namespace: kube-system
+  labels:
+    tier: node
+    app: flannel
+spec:
+  template:
+    metadata:
+      labels:
+        tier: node
+        app: flannel
+    spec:
+      hostNetwork: true
+      nodeSelector:
+        beta.kubernetes.io/arch: arm64
+      tolerations:
+      - operator: Exists
+        effect: NoSchedule
+      serviceAccountName: flannel
+      initContainers:
+      - name: install-cni
+        image: quay.io/coreos/flannel:v0.11.0-arm64
+        command:
+        - cp
+        args:
+        - -f
+        - /etc/kube-flannel/cni-conf.json
+        - /etc/cni/net.d/10-flannel.conflist
+        volumeMounts:
+        - name: cni
+          mountPath: /etc/cni/net.d
+        - name: flannel-cfg
+          mountPath: /etc/kube-flannel/
+      containers:
+      - name: kube-flannel
+        image: quay.io/coreos/flannel:v0.11.0-arm64
+        command:
+        - /opt/bin/flanneld
+        args:
+        - --ip-masq
+        - --kube-subnet-mgr
+        - --iface=ib0
+        resources:
+          requests:
+            cpu: "100m"
+            memory: "50Mi"
+          limits:
+            cpu: "100m"
+            memory: "50Mi"
+        securityContext:
+          privileged: false
+          capabilities:
+             add: ["NET_ADMIN"]
+        env:
+        - name: POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: POD_NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        volumeMounts:
+        - name: run
+          mountPath: /run/flannel
+        - name: flannel-cfg
+          mountPath: /etc/kube-flannel/
+      volumes:
+        - name: run
+          hostPath:
+            path: /run/flannel
+        - name: cni
+          hostPath:
+            path: /etc/cni/net.d
+        - name: flannel-cfg
+          configMap:
+            name: kube-flannel-cfg
+---
+apiVersion: extensions/v1beta1
+kind: DaemonSet
+metadata:
+  name: kube-flannel-ds-arm
+  namespace: kube-system
+  labels:
+    tier: node
+    app: flannel
+spec:
+  template:
+    metadata:
+      labels:
+        tier: node
+        app: flannel
+    spec:
+      hostNetwork: true
+      nodeSelector:
+        beta.kubernetes.io/arch: arm
+      tolerations:
+      - operator: Exists
+        effect: NoSchedule
+      serviceAccountName: flannel
+      initContainers:
+      - name: install-cni
+        image: quay.io/coreos/flannel:v0.11.0-arm
+        command:
+        - cp
+        args:
+        - -f
+        - /etc/kube-flannel/cni-conf.json
+        - /etc/cni/net.d/10-flannel.conflist
+        volumeMounts:
+        - name: cni
+          mountPath: /etc/cni/net.d
+        - name: flannel-cfg
+          mountPath: /etc/kube-flannel/
+      containers:
+      - name: kube-flannel
+        image: quay.io/coreos/flannel:v0.11.0-arm
+        command:
+        - /opt/bin/flanneld
+        args:
+        - --ip-masq
+        - --kube-subnet-mgr
+        - --iface=ib0
+        resources:
+          requests:
+            cpu: "100m"
+            memory: "50Mi"
+          limits:
+            cpu: "100m"
+            memory: "50Mi"
+        securityContext:
+          privileged: false
+          capabilities:
+             add: ["NET_ADMIN"]
+        env:
+        - name: POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: POD_NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        volumeMounts:
+        - name: run
+          mountPath: /run/flannel
+        - name: flannel-cfg
+          mountPath: /etc/kube-flannel/
+      volumes:
+        - name: run
+          hostPath:
+            path: /run/flannel
+        - name: cni
+          hostPath:
+            path: /etc/cni/net.d
+        - name: flannel-cfg
+          configMap:
+            name: kube-flannel-cfg
+---
+apiVersion: extensions/v1beta1
+kind: DaemonSet
+metadata:
+  name: kube-flannel-ds-ppc64le
+  namespace: kube-system
+  labels:
+    tier: node
+    app: flannel
+spec:
+  template:
+    metadata:
+      labels:
+        tier: node
+        app: flannel
+    spec:
+      hostNetwork: true
+      nodeSelector:
+        beta.kubernetes.io/arch: ppc64le
+      tolerations:
+      - operator: Exists
+        effect: NoSchedule
+      serviceAccountName: flannel
+      initContainers:
+      - name: install-cni
+        image: quay.io/coreos/flannel:v0.11.0-ppc64le
+        command:
+        - cp
+        args:
+        - -f
+        - /etc/kube-flannel/cni-conf.json
+        - /etc/cni/net.d/10-flannel.conflist
+        volumeMounts:
+        - name: cni
+          mountPath: /etc/cni/net.d
+        - name: flannel-cfg
+          mountPath: /etc/kube-flannel/
+      containers:
+      - name: kube-flannel
+        image: quay.io/coreos/flannel:v0.11.0-ppc64le
+        command:
+        - /opt/bin/flanneld
+        args:
+        - --ip-masq
+        - --kube-subnet-mgr
+        - --iface=ib0
+        resources:
+          requests:
+            cpu: "100m"
+            memory: "50Mi"
+          limits:
+            cpu: "100m"
+            memory: "50Mi"
+        securityContext:
+          privileged: false
+          capabilities:
+             add: ["NET_ADMIN"]
+        env:
+        - name: POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: POD_NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        volumeMounts:
+        - name: run
+          mountPath: /run/flannel
+        - name: flannel-cfg
+          mountPath: /etc/kube-flannel/
+      volumes:
+        - name: run
+          hostPath:
+            path: /run/flannel
+        - name: cni
+          hostPath:
+            path: /etc/cni/net.d
+        - name: flannel-cfg
+          configMap:
+            name: kube-flannel-cfg
+---
+apiVersion: extensions/v1beta1
+kind: DaemonSet
+metadata:
+  name: kube-flannel-ds-s390x
+  namespace: kube-system
+  labels:
+    tier: node
+    app: flannel
+spec:
+  template:
+    metadata:
+      labels:
+        tier: node
+        app: flannel
+    spec:
+      hostNetwork: true
+      nodeSelector:
+        beta.kubernetes.io/arch: s390x
+      tolerations:
+      - operator: Exists
+        effect: NoSchedule
+      serviceAccountName: flannel
+      initContainers:
+      - name: install-cni
+        image: quay.io/coreos/flannel:v0.11.0-s390x
+        command:
+        - cp
+        args:
+        - -f
+        - /etc/kube-flannel/cni-conf.json
+        - /etc/cni/net.d/10-flannel.conflist
+        volumeMounts:
+        - name: cni
+          mountPath: /etc/cni/net.d
+        - name: flannel-cfg
+          mountPath: /etc/kube-flannel/
+      containers:
+      - name: kube-flannel
+        image: quay.io/coreos/flannel:v0.11.0-s390x
+        command:
+        - /opt/bin/flanneld
+        args:
+        - --ip-masq
+        - --kube-subnet-mgr
+        - --iface=ib0
+        resources:
+          requests:
+            cpu: "100m"
+            memory: "50Mi"
+          limits:
+            cpu: "100m"
+            memory: "50Mi"
+        securityContext:
+          privileged: false
+          capabilities:
+             add: ["NET_ADMIN"]
+        env:
+        - name: POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: POD_NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        volumeMounts:
+        - name: run
+          mountPath: /run/flannel
+        - name: flannel-cfg
+          mountPath: /etc/kube-flannel/
+      volumes:
+        - name: run
+          hostPath:
+            path: /run/flannel
+        - name: cni
+          hostPath:
+            path: /etc/cni/net.d
+        - name: flannel-cfg
+          configMap:
+            name: kube-flannel-cfg

+ 51 - 0
roles/startmaster/files/kubeflow_persistent_volumes.yaml

@@ -0,0 +1,51 @@
+---
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: data1-pv
+spec:
+  capacity:
+    storage: 10Gi
+  accessModes:
+  - ReadWriteOnce
+  - ReadOnlyMany
+  - ReadWriteMany
+  nfs:
+    server: 10.0.0.1
+    path: /work/k8s/data1
+  persistentVolumeReclaimPolicy: Recycle
+ 
+---
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: data2-pv
+spec:
+  capacity:
+    storage: 20Gi
+  accessModes:
+  - ReadWriteOnce
+  - ReadOnlyMany
+  - ReadWriteMany
+  nfs:
+    server: 10.0.0.1
+    path: /work/k8s/data2
+  persistentVolumeReclaimPolicy: Recycle
+ 
+---
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: data3-pv
+spec:
+  capacity:
+    storage: 50Gi
+  accessModes:
+  - ReadWriteOnce
+  - ReadOnlyMany
+  - ReadWriteMany
+  nfs:
+    server: 10.0.0.1
+    path: /work/k8s/data3
+  persistentVolumeReclaimPolicy: Recycle
+ 

+ 16 - 0
roles/startmaster/files/minio-pvc.yaml

@@ -0,0 +1,16 @@
+# yaml file contents
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: minio-pv
+spec:
+  capacity:
+    storage: 20Gi
+  accessModes:
+  - ReadWriteOnce
+  - ReadOnlyMany
+  - ReadWriteMany
+  persistentVolumeReclaimPolicy: Recycle
+  #storageClassName: local-storage
+  hostPath:
+    path: /home/k8s

+ 17 - 0
roles/startmaster/files/mysql-pv.yaml

@@ -0,0 +1,17 @@
+# yaml file contents
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: mysql-pv
+spec:
+  capacity:
+    storage: 20Gi
+  accessModes:
+  - ReadWriteOnce
+  - ReadOnlyMany
+  - ReadWriteMany
+  persistentVolumeReclaimPolicy: Recycle
+  #storageClassName: local-storage
+  hostPath:
+    path: /home/k8s/
+ 

+ 7 - 0
roles/startmaster/files/nfs-class.yaml

@@ -0,0 +1,7 @@
+apiVersion: storage.k8s.io/v1
+kind: StorageClass
+metadata:
+  name: managed-nfs-storage
+provisioner: fuseim.pri/ifs # or choose another name, must match deployment's env PROVISIONER_NAME'
+parameters:
+  archiveOnDelete: "false"

+ 32 - 0
roles/startmaster/files/nfs-deployment.yaml

@@ -0,0 +1,32 @@
+kind: Deployment
+apiVersion: extensions/v1beta1
+metadata:
+  name: nfs-client-provisioner
+spec:
+  replicas: 1
+  strategy:
+    type: Recreate
+  template:
+    metadata:
+      labels:
+        app: nfs-client-provisioner
+    spec:
+      serviceAccountName: nfs-client-provisioner
+      containers:
+        - name: nfs-client-provisioner
+          image: quay.io/external_storage/nfs-client-provisioner:latest
+          volumeMounts:
+            - name: nfs-client-root
+              mountPath: /persistentvolumes
+          env:
+            - name: PROVISIONER_NAME
+              value: fuseim.pri/ifs
+            - name: NFS_SERVER
+              value: 10.0.0.1
+            - name: NFS_PATH
+              value: /work/k8s
+      volumes:
+        - name: nfs-client-root
+          nfs:
+            server: 10.0.0.1
+            path: /work/k8s

+ 4 - 0
roles/startmaster/files/nfs-serviceaccount.yaml

@@ -0,0 +1,4 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: nfs-client-provisioner

+ 20 - 0
roles/startmaster/files/nfs_clusterrole.yaml

@@ -0,0 +1,20 @@
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: nfs-client-provisioner-runner
+rules:
+  - apiGroups: [""]
+    resources: ["persistentvolumes"]
+    verbs: ["get", "list", "watch", "create", "delete"]
+  - apiGroups: [""]
+    resources: ["persistentvolumeclaims"]
+    verbs: ["get", "list", "watch", "update"]
+  - apiGroups: ["storage.k8s.io"]
+    resources: ["storageclasses"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: [""]
+    resources: ["events"]
+    verbs: ["create", "update", "patch"]
+  - apiGroups: [""]
+    resources: ["endpoints"]
+    verbs: ["get", "list", "watch", "create", "update", "patch"]

+ 12 - 0
roles/startmaster/files/nfs_clusterrolebinding.yaml

@@ -0,0 +1,12 @@
+kind: ClusterRoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: run-nfs-client-provisioner
+subjects:
+  - kind: ServiceAccount
+    name: nfs-client-provisioner
+    namespace: default
+roleRef:
+  kind: ClusterRole
+  name: nfs-client-provisioner-runner
+  apiGroup: rbac.authorization.k8s.io

+ 17 - 0
roles/startmaster/files/notebook-pv.yaml

@@ -0,0 +1,17 @@
+# yaml file contents
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: notebooks-pv
+spec:
+  capacity:
+    storage: 20Gi
+  accessModes:
+  - ReadWriteOnce
+  - ReadOnlyMany
+  - ReadWriteMany
+  #persistentVolumeReclaimPolicy: Recycle
+  #storageClassName: local-storage
+  hostPath:
+    path: /home/k8s/
+ 

+ 20 - 0
roles/startmaster/files/persistent_volumes.yaml

@@ -0,0 +1,20 @@
+# yaml file contents
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: nfs-pv
+spec:
+  capacity:
+    storage: 20Gi
+  accessModes:
+  #- ReadWriteOnce
+  #- ReadOnlyMany
+  - ReadWriteMany
+  nfs:
+    server: 10.0.0.1
+    path: /work/k8s
+  #persistentVolumeReclaimPolicy: Recycle
+  #storageClassName: local-storage
+  #hostPath:
+    #path: /home/k8s
+ 

+ 12 - 0
roles/startmaster/files/pvc.yaml

@@ -0,0 +1,12 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: pets-pvc
+  namespace: kubeflow
+spec:
+  accessModes:
+  - ReadWriteMany
+  storageClassName: ""
+  resources:
+    requests:
+      storage: 20Gi

+ 3 - 0
roles/startmaster/files/tiller_config.sh

@@ -0,0 +1,3 @@
+kubectl create serviceaccount --namespace kube-system tiller
+kubectl create clusterrolebinding tiller-cluster-rule --clusterrole=cluster-admin --serviceaccount=kube-system:tiller
+kubectl patch deploy --namespace kube-system tiller-deploy -p '{"spec":{"template":{"spec":{"serviceAccount":"tiller"}}}}'

+ 145 - 0
roles/startmaster/tasks/main.yml

@@ -0,0 +1,145 @@
+---
+- name: Turn Swap OFF (if not already disabled)
+  command: /usr/sbin/swapoff -a
+  tags: init
+
+- name: Initialize kubeadm
+  command: /bin/kubeadm init --pod-network-cidr=10.244.0.0/16 --apiserver-advertise-address=10.0.0.1
+  #command: /bin/kubeadm init 
+  register: init_output 
+  tags: init
+
+- name: Setup Directory for Kubernetes environment for root
+  file: path=/root/.kube state=directory
+  tags: init
+
+- name: Copy Kubernetes Config for root #do this for other users too?
+  copy: src=/etc/kubernetes/admin.conf dest=/root/.kube/config owner=root group=root mode=644
+  tags: init
+
+- name: Cluster token
+  shell: kubeadm token list | cut -d ' ' -f1 | sed -n '2p'
+  register: K8S_TOKEN
+  tags: init
+
+- name: CA Hash
+  shell: openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed 's/^.* //'
+  register: K8S_MASTER_CA_HASH
+  tags: init
+
+- name: Add K8S Master IP, Token, and Hash to dummy host
+  add_host:
+    name:   "K8S_TOKEN_HOLDER"
+    token:  "{{ K8S_TOKEN.stdout }}"
+    hash:   "{{ K8S_MASTER_CA_HASH.stdout }}"
+    #ip:     "{{ ansible_ib0.ipv4.address }}"
+    ip:     "{{ ansible_p3p1.ipv4.address }}"
+  tags: init
+
+- name:
+  debug:
+    msg: "[Master] K8S_TOKEN_HOLDER K8S token is {{ hostvars['K8S_TOKEN_HOLDER']['token'] }}"
+  tags: init
+
+- name:
+  debug:
+    msg: "[Master] K8S_TOKEN_HOLDER K8S Hash is  {{ hostvars['K8S_TOKEN_HOLDER']['hash'] }}"
+  tags: init
+
+- name:
+  debug:
+    msg: "[Master] K8S_MASTER_IP is  {{ hostvars['K8S_TOKEN_HOLDER']['ip'] }}"
+  tags: init
+
+  
+- name: Setup Flannel SDN network
+  shell: kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
+  tags: init
+
+- name: Enabled GPU support in Kubernetes
+  #script: enable_gpu_k8s.sh
+  shell: kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/1.0.0-beta4/nvidia-device-plugin.yml
+                           #https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v1.11/nvidia-device-plugin.yml
+  register: gpu_enable
+  tags: init
+
+
+- name: Create yaml repo for setup
+  file: 
+    path: /root/k8s 
+    state: directory
+  tags: init
+
+
+#- name: Persistent Volume Setup Files
+  #copy: src=persistent_volumes.yaml dest=/root/k8s/persistent_volumes.yaml owner=root group=root mode=655
+  #tags: init
+#
+#- name: Persistent Volume Setup - Apply 
+  #shell: kubectl apply -f /root/k8s/persistent_volumes.yaml
+  #tags: init
+  #
+
+#- name: Copy Service Account (NFS Setup)
+  #copy: src=nfs-serviceaccount.yaml dest=/root/k8s/nfs-serviceaccount.yaml owner=root group=root mode=655
+  #tags: init
+#
+#- name: Copy Cluster Role (NFS Setup)
+  #copy: src=nfs_clusterrole.yaml dest=/root/k8s/nfs_clusterrole.yaml owner=root group=root mode=655
+  #tags: init
+#
+#- name: Copy Cluster Role Binding (NFS Setup)
+  #copy: src=nfs_clusterrolebinding.yaml dest=/root/k8s/nfs_clusterrolebinding.yaml owner=root group=root mode=655
+  #tags: init
+#
+#- name: Copy NFS Storage Deployment (NFS Setup)
+  #copy: src=nfs-deployment.yaml dest=/root/k8s/nfs-deployment.yaml owner=root group=root mode=655
+  #tags: init
+#
+#- name: Copy NFS Storage Class (NFS Setup)
+  #copy: src=nfs-class.yaml dest=/root/k8s/nfs-class.yaml owner=root group=root mode=655
+  #tags: init
+#
+#- name: Deploy NFS (NFS Setup)
+  #shell: kubectl create -f /root/k8s/nfs-deployment.yaml -f /root/k8s/nfs-class.yaml -f /root/k8s/nfs-serviceaccount.yaml -f /root/k8s/nfs_clusterrole.yaml -f /root/k8s/nfs_clusterrolebinding.yaml 
+  #tags: init
+
+#- name: Patch NFS Setup (NFS Setup)
+  #shell: kubectl patch deployment nfs-client-provisioner -p '{"spec":{"template":{"spec":{"serviceAccount":"nfs-client-provisioner"}}}}'
+  #tags: init
+
+#- name: Patch NFS Setup (NFS Setup)
+  #shell: "kubectl patch storageclass managed-nfs-storage -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'"
+  #tags: init
+
+  
+- name: Create Service Account (K8S Dashboard) Files
+  copy: src=create_admin_user.yaml dest=/root/k8s/create_admin_user.yaml owner=root group=root mode=655
+  tags: init
+
+- name: Create Service Account (K8S Dashboard) - Create
+  shell: kubectl create -f /root/k8s/create_admin_user.yaml
+  tags: init
+
+- name: Create ClusterRoleBinding (K8S Dashboard) Files
+  copy: src=create_clusterRoleBinding.yaml dest=/root/k8s/create_clusterRoleBinding.yaml owner=root group=root mode=655
+  tags: init
+
+- name: Create ClusterRoleBinding (K8S Dashboard) - Apply
+  shell: kubectl create -f /root/k8s/create_clusterRoleBinding.yaml
+  tags: init
+
+- name: Start K8S Dashboard
+  shell: kubectl create -f https://raw.githubusercontent.com/kubernetes/dashboard/v2.0.0-beta6/aio/deploy/recommended.yaml
+  tags: init
+
+- name: Dump Bearer Token for K8S Dashboard Login
+  shell: kubectl -n kube-system describe secret $(kubectl -n kube-system get secret | grep admin-user | awk '{print $1}') > /root/k8s/token
+  tags: init
+
+#- name: Proxy K8S Dashboard to 8001 on localhost
+  #shell: nohup kubectl proxy </dev/null >/dev/null 2>&1 & 
+  #tags: init
+
+#- debug: var=init_output.stdout_lines
+  #tags: init

+ 33 - 0
roles/startworkers/tasks/main.yml

@@ -0,0 +1,33 @@
+---
+
+- name: Turn Swap OFF (if not already disabled)
+  command: /usr/sbin/swapoff -a
+  tags: init
+
+#- name:
+  #debug:
+    #msg: "[Worker] K8S_TOKEN_HOLDER K8S token is {{ hostvars['K8S_TOKEN_HOLDER']['token'] }}"
+  #tags: init
+
+#- name:
+  #debug:
+    #msg: "[Worker] K8S_TOKEN_HOLDER K8S Hash is  {{ hostvars['K8S_TOKEN_HOLDER']['hash'] }}"
+  #tags: init
+
+#- name:
+  #debug:
+    #msg: "[Worker] K8S_MASTER_IP is  {{ hostvars['K8S_TOKEN_HOLDER']['ip'] }}"
+  #tags: init
+
+- name: "Kubeadmn join"
+  shell: >
+    kubeadm join --token={{ hostvars['K8S_TOKEN_HOLDER']['token'] }}
+    --discovery-token-ca-cert-hash sha256:{{ hostvars['K8S_TOKEN_HOLDER']['hash'] }}
+    {{ hostvars['K8S_TOKEN_HOLDER']['ip'] }}:6443
+  tags: init
+
+
+#- name: Join Computes to pool
+#   command: "{{ kubeJoinCommand }}"
+# tags: init
+

+ 19 - 0
scuttle

@@ -0,0 +1,19 @@
+#!/bin/bash
+
+kubeadm reset -f
+clush -ab "kubeadm reset -f"
+rm -rf /var/lib/etcd/*
+clush -ab "rm -rf /var/lib/etcd/*"
+rm -rf /var/lib/cni/
+clush -ab "rm -rf /var/lib/cni/"
+rm -rf /run/flannel/
+clush -ab "rm -rf /run/flannel/"
+rm -rf /etc/cni/
+clush -ab "rm -rf /etc/cni/"
+ifconfig cni0 down
+clush -ab "ifconfig cni0 down"
+ifconfig flannel.1 down
+clush -ab "ifconfig flannel.1 down"
+brctl delbr flannel.1
+clush -ab "brctl delbr flannel.1"
+clush -ab "brctl delbr cni0"