Forráskód Böngészése

Issue #277: Updated .md files for Omnia Core and Appliance

Signed-off-by: avinashvishwanath <avinash_vishwanath@dell.com>
Lucas A. Wilson 4 éve
szülő
commit
7eb741f515

+ 9 - 0
appliance/roles/common/tasks/docker_installation.yml

@@ -65,6 +65,15 @@
     executable: pip3
   tags: install
 
+- name: Versionlock docker
+  command: "yum versionlock '{{ item }}'"
+  args:
+    warn: false
+  with_items:
+    - "{{ container_repo_install }}"
+  changed_when: true
+  tags: install
+
 - name: Configure docker
   copy:
     src: daemon.json

+ 4 - 1
appliance/roles/common/vars/main.yml

@@ -33,6 +33,7 @@ common_packages:
   - python-docker
   - net-tools
   - python-netaddr
+  - yum-plugin-versionlock
 
 # Usage: pre_requisite.yml
 internet_delay: 0
@@ -52,7 +53,9 @@ docker_repo_url: https://download.docker.com/linux/centos/docker-ce.repo
 docker_repo_dest: /etc/yum.repos.d/docker-ce.repo
 success: '0'
 container_type: docker
-container_repo_install: docker-ce
+container_repo_install:
+  - docker-ce-cli-20.10.2
+  - docker-ce-20.10.2
 docker_compose: docker-compose
 daemon_dest: /etc/docker/
 

+ 1 - 1
docs/INSTALL_OMNIA.md

@@ -59,7 +59,7 @@ The following __kubernetes__ roles are provided by Omnia when __omnia.yml__ file
 - **k8s_manager** role: 
 	- __helm__ package for Kubernetes is installed.
 - **k8s_firewalld** role: This role is used to enable the required ports to be used by Kubernetes. 
-	- For __head-node-ports__: 6443, 2379-2380,10251,10252
+	- For __head-node-ports__: 6443,2379-2380,10251,10250,10252
 	- For __compute-node-ports__: 10250,30000-32767
 	- For __calico-udp-ports__: 4789
 	- For __calico-tcp-ports__: 5473,179

+ 1 - 1
docs/INSTALL_OMNIA_APPLIANCE.md

@@ -137,7 +137,7 @@ The following __kubernetes__ roles are provided by Omnia when __omnia.yml__ file
 - **k8s_manager** role: 
 	- __helm__ package for Kubernetes is installed.
 - **k8s_firewalld** role: This role is used to enable the required ports to be used by Kubernetes. 
-	- For __head-node-ports__: 6443, 2379-2380,10251,10252
+	- For __head-node-ports__: 6443, 2379-2380,10251,10250,10252
 	- For __compute-node-ports__: 10250,30000-32767
 	- For __calico-udp-ports__: 4789
 	- For __calico-tcp-ports__: 5473,179

+ 10 - 6
docs/MONITOR_CLUSTERS.md

@@ -36,14 +36,18 @@ __Note:__ Kuberenetes and Slurm job can be scheduled only for users with __sudo_
 
 ## Access Kubeflow Dashboard
 
-It is recommended that use port numbers between __8000-8999__ and suggested port number is __8085__.
+It is recommended that you use port numbers between __8000-8999__ and the suggested port number is __8085__.
 
-1. To see which are the ports are in use, use the following command:
+1. To view the ports which are in use, run the following command:
    `netstat -an`
-2. Choose a port number between __8000-8999__ which is not in use.
-3. To run the __kubeflow__ dashboard at selected port number, run the following command:
-   `kubectl port-forward -n kubeflow service/centraldashboard __selected_port_number__:80`
-4. On a web browser installed on the __manager node__, go to http://localhost:selected-port-number/ to launch the kubeflow central navigation dashboard.
+2. Select a port number between __8000-8999__ which is not in use.
+3. To run the **Kubeflow Dashboard** at selected port number, run one of the following commands:  
+	`kubectl port-forward -n kubeflow service/centraldashboard __selected_port_number__:80`  
+	(Or)  
+	`kubectl port-forward -n istio-system svc/istio-ingressgateway __selected_port_number__:80`
+4. On a web browser installed on the manager node, go to http://localhost:selected-port-number/ to launch the Kubeflow Central Dashboard.  
+
+For more information about the Kubeflow Central Dashboard, see https://www.kubeflow.org/docs/components/central-dash/overview/.
 
 ## Access JupyterHub Dashboard
 

+ 9 - 0
roles/common/tasks/main.yml

@@ -62,6 +62,15 @@
     state: present
   tags: install
 
+- name: Versionlock docker
+  command: "yum versionlock '{{ item }}'"
+  args:
+    warn: false
+  with_items:
+    - "{{ docker_packages }}"
+  changed_when: true
+  tags: install
+
 - name: Collect host facts (including acclerator information)
   setup: ~
 

+ 7 - 2
roles/common/vars/main.yml

@@ -23,9 +23,14 @@ common_packages:
   - nvidia-detect
   - chrony
   - pciutils
-  - docker-ce
+  - docker-ce-cli-20.10.2
+  - docker-ce-20.10.2
   - openssl
 
+docker_packages:
+  - docker-ce-cli-20.10.2
+  - docker-ce-20.10.2
+
 custom_fact_dir: /etc/ansible/facts.d
 
 custom_fact_dir_mode: 0755
@@ -52,7 +57,7 @@ delay_count_one: "60"
 retry_count: "6"
 delay_count: "10"
 
-ntp_servers: 
+ntp_servers:
   - 0.centos.pool.ntp.org
   - 1.centos.pool.ntp.org
   - 2.centos.pool.ntp.org

+ 7 - 0
roles/slurm_start_services/tasks/main.yml

@@ -39,6 +39,13 @@
     enabled: yes
   tags: install
 
+- name: check slurmdbd is active
+  systemd:
+    name: slurmdbd
+  register: slurmdbd_status
+  until: 'slurmdbd_status.status.ActiveState=="active"'
+  retries: 20
+
 - name: Show cluster if exists
   command: sacctmgr -n show cluster {{ cluster_name }}
   register: slurm_clusterlist