|
@@ -17,38 +17,10 @@
|
|
|
include_vars: ../../slurm_exporter/vars/main.yml
|
|
|
|
|
|
- name: Wait for CoreDNS to restart
|
|
|
- block:
|
|
|
- - name: Wait for CoreDNS to restart
|
|
|
- command: kubectl rollout status deployment/coredns -n kube-system --timeout=4m
|
|
|
- changed_when: false
|
|
|
- tags: init
|
|
|
- rescue:
|
|
|
- - name: Get K8s pods
|
|
|
- command: kubectl get pods --all-namespaces
|
|
|
- register: k8s_pods
|
|
|
- tags: init
|
|
|
-
|
|
|
- - name: Pull docker images
|
|
|
- command: docker pull {{ item }}
|
|
|
- with_items: "{{ kube_system_docker_images }}"
|
|
|
- when:
|
|
|
- - hostvars['127.0.0.1']['docker_username'] and hostvars['127.0.0.1']['docker_password']
|
|
|
- - "'ImagePullBackOff' in k8s_pods.stdout"
|
|
|
- register: docker_image_pull_result
|
|
|
- until: docker_image_pull_result is not failed
|
|
|
- retries: 5
|
|
|
-
|
|
|
- - name: Wait for CoreDNS to restart
|
|
|
- command: kubectl rollout status deployment/coredns -n kube-system
|
|
|
- when: hostvars['127.0.0.1']['docker_username'] and hostvars['127.0.0.1']['docker_password']
|
|
|
- tags: init
|
|
|
-
|
|
|
- - name: Fail message
|
|
|
- fail:
|
|
|
- msg: "{{ docker_pull_limit_msg }}"
|
|
|
- when:
|
|
|
- - "'ImagePullBackOff' in k8s_pods.stdout or 'ErrImagePull' in k8s_pods.stdout"
|
|
|
- - not hostvars['127.0.0.1']['docker_username'] and not hostvars['127.0.0.1']['docker_password']
|
|
|
+ command: kubectl rollout status deployment/coredns -n kube-system --timeout=5m
|
|
|
+ changed_when: false
|
|
|
+ failed_when: false
|
|
|
+ tags: init
|
|
|
|
|
|
- name: Get K8s pods
|
|
|
command: kubectl get pods --all-namespaces
|
|
@@ -139,7 +111,7 @@
|
|
|
- name: Set NFS-Client Provisioner as DEFAULT StorageClass
|
|
|
shell: >
|
|
|
kubectl patch storageclasses.storage.k8s.io nfs-client \
|
|
|
- -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'
|
|
|
+ -p '{ "metadata": { "annotations":{ "storageclass.kubernetes.io/is-default-class":"true" }}}'
|
|
|
changed_when: true
|
|
|
tags: init
|
|
|
|
|
@@ -224,35 +196,52 @@
|
|
|
when: "'amdgpu-device-plugin' not in k8s_pods.stdout"
|
|
|
tags: init
|
|
|
|
|
|
+- name: Deploy Volcano Scheduling
|
|
|
+ command: "kubectl apply -f '{{ volcano_scheduling_yaml_url }}'"
|
|
|
+ changed_when: true
|
|
|
+ when: "'volcano-system' not in k8s_pods.stdout"
|
|
|
+ tags: init
|
|
|
+
|
|
|
- name: Install Spark Operator
|
|
|
command: "helm repo add spark-operator '{{ spark_operator_repo }}'"
|
|
|
changed_when: true
|
|
|
tags: init
|
|
|
|
|
|
- name: Install Spark Operator Namespace
|
|
|
- command: "helm install my-release spark-operator/spark-operator --namespace spark-operator --create-namespace"
|
|
|
+ command: helm install my-release spark-operator/spark-operator --set image.tag={{ operator_image_tag }} --namespace spark-operator --create-namespace
|
|
|
changed_when: true
|
|
|
when: "'spark-operator' not in k8s_pods.stdout"
|
|
|
tags: init
|
|
|
|
|
|
-- name: Deploy Volcano Scheduling
|
|
|
- command: "kubectl apply -f '{{ volcano_scheduling_yaml_url }}'"
|
|
|
- changed_when: true
|
|
|
- when: "'volcano-system' not in k8s_pods.stdout"
|
|
|
- tags: init
|
|
|
+- name: Wait for k8s pod to come to ready state
|
|
|
+ block:
|
|
|
+ - name: Wait for k8s pod to come to ready state
|
|
|
+ command: "kubectl wait --for=condition=ready -n {{ item.namespace }} pod -l app={{ item.app }} --timeout={{ item.timeout }}"
|
|
|
+ with_items:
|
|
|
+ - { namespace: "default", app: "nfs-client-provisioner", timeout: "10m" }
|
|
|
+ - { namespace: "volcano-system", app: "volcano-scheduler", timeout: "5m" }
|
|
|
+ changed_when: false
|
|
|
+ tags: install
|
|
|
+ rescue:
|
|
|
+ - name: Get K8s pods
|
|
|
+ command: kubectl get pods --all-namespaces
|
|
|
+ changed_when: false
|
|
|
+ register: k8s_pods
|
|
|
+ tags: init
|
|
|
|
|
|
-- name: Get K8s pods
|
|
|
- command: kubectl get pods --all-namespaces
|
|
|
- changed_when: false
|
|
|
- register: k8s_pods
|
|
|
- tags: init
|
|
|
+ - name: Fail message
|
|
|
+ fail:
|
|
|
+ msg: "{{ docker_pull_limit_msg }}"
|
|
|
+ when:
|
|
|
+ - "'ImagePullBackOff' in k8s_pods.stdout or 'ErrImagePull' in k8s_pods.stdout"
|
|
|
+ - not hostvars['127.0.0.1']['docker_username'] and not hostvars['127.0.0.1']['docker_password']
|
|
|
|
|
|
-- name: Pull K8s services docker images
|
|
|
- command: docker pull {{ item }}
|
|
|
- with_items: "{{ k8s_services_docker_images }}"
|
|
|
- when:
|
|
|
- - "'ImagePullBackOff' in k8s_pods.stdout"
|
|
|
- - hostvars['127.0.0.1']['docker_username'] and hostvars['127.0.0.1']['docker_password']
|
|
|
- register: docker_image_pull_result
|
|
|
- until: docker_image_pull_result is not failed
|
|
|
- retries: 5
|
|
|
+ - name: Pull K8s services docker images
|
|
|
+ command: docker pull {{ item }}
|
|
|
+ with_items: "{{ k8s_docker_images }}"
|
|
|
+ when:
|
|
|
+ - "'ImagePullBackOff' in k8s_pods.stdout or 'ErrImagePull' in k8s_pods.stdout"
|
|
|
+ - hostvars['127.0.0.1']['docker_username'] and hostvars['127.0.0.1']['docker_password']
|
|
|
+ register: docker_image_pull_result
|
|
|
+ until: docker_image_pull_result is not failed
|
|
|
+ retries: 5
|