Browse Source

Issue #431: Offline repo creation for iDRAC firmware updates

Signed-off-by: Blesson James <blesson_james@Dellteam.com>
Lucas A. Wilson 3 years ago
parent
commit
b0553cd45c
38 changed files with 337 additions and 20 deletions
  1. 2 2
      control_plane/control_plane.yml
  2. 4 1
      control_plane/roles/control_plane_common/tasks/main.yml
  3. 0 0
      control_plane/roles/control_plane_common/tasks/nfs_server_setup.yml
  4. 2 0
      control_plane/roles/control_plane_common/tasks/verify_omnia_params.yml
  5. 11 1
      control_plane/roles/control_plane_common/vars/main.yml
  6. 12 0
      control_plane/roles/control_plane_k8s/tasks/k8s_init.yml
  7. 17 0
      control_plane/roles/control_plane_k8s/tasks/k8s_installation.yml
  8. 3 0
      control_plane/roles/control_plane_k8s/vars/main.yml
  9. 1 0
      control_plane/roles/control_plane_repo/files/C4140_inv.xml
  10. 1 0
      control_plane/roles/control_plane_repo/files/C6420_inv.xml
  11. 1 0
      control_plane/roles/control_plane_repo/files/C6520_inv.xml
  12. 1 0
      control_plane/roles/control_plane_repo/files/R240_inv.xml
  13. 1 0
      control_plane/roles/control_plane_repo/files/R340_inv.xml
  14. 1 0
      control_plane/roles/control_plane_repo/files/R430_inv.xml
  15. 1 0
      control_plane/roles/control_plane_repo/files/R440_inv.xml
  16. 1 0
      control_plane/roles/control_plane_repo/files/R540_inv.xml
  17. 1 0
      control_plane/roles/control_plane_repo/files/R640_inv.xml
  18. 1 0
      control_plane/roles/control_plane_repo/files/R650_inv.xml
  19. 1 0
      control_plane/roles/control_plane_repo/files/R740_inv.xml
  20. 1 0
      control_plane/roles/control_plane_repo/files/R740xd2_inv.xml
  21. 1 0
      control_plane/roles/control_plane_repo/files/R740xd_inv.xml
  22. 1 0
      control_plane/roles/control_plane_repo/files/R750_inv.xml
  23. 1 0
      control_plane/roles/control_plane_repo/files/R750xa_inv.xml
  24. 1 0
      control_plane/roles/control_plane_repo/files/R840_inv.xml
  25. 1 0
      control_plane/roles/control_plane_repo/files/R940_inv.xml
  26. 1 0
      control_plane/roles/control_plane_repo/files/R940xa_inv.xml
  27. 18 0
      control_plane/roles/control_plane_repo/files/poweredge_models.txt
  28. 20 0
      control_plane/roles/control_plane_repo/tasks/download_fmw_updates.yml
  29. 40 0
      control_plane/roles/control_plane_repo/tasks/install_dsu.yml
  30. 13 2
      control_plane/roles/control_plane_repo/tasks/main.yml
  31. 41 0
      control_plane/roles/control_plane_repo/tasks/validate_idrac_vars.yml
  32. 11 9
      control_plane/roles/control_plane_repo/vars/main.yml
  33. 11 0
      omnia_config.yml
  34. 2 0
      roles/cluster_validation/tasks/fetch_password.yml
  35. 34 0
      roles/k8s_start_manager/tasks/main.yml
  36. 2 0
      roles/k8s_start_manager/vars/main.yml
  37. 49 5
      roles/k8s_start_services/tasks/main.yml
  38. 27 0
      roles/k8s_start_services/vars/main.yml

+ 2 - 2
control_plane/control_plane.yml

@@ -18,11 +18,11 @@
   connection: local
   roles:
     - control_plane_common
-    - control_plane_repo
     - control_plane_k8s
     - control_plane_device
     - provision_cobbler
+    - webui_awx
     - control_plane_ib
     - control_plane_sm
     - control_plane_customiso
-    - webui_awx
+    - control_plane_repo

+ 4 - 1
control_plane/roles/control_plane_common/tasks/main.yml

@@ -36,4 +36,7 @@
 
 - name: Subnet manager inputs validation
   import_tasks: fetch_sm_inputs.yml
-  when: ib_switch_support
+  when: ib_switch_support
+
+- name: NFS Server setup for offline repo and awx
+  import_tasks: nfs_server_setup.yml

control_plane/roles/control_plane_repo/tasks/nfs_server_setup.yml → control_plane/roles/control_plane_common/tasks/nfs_server_setup.yml


+ 2 - 0
control_plane/roles/control_plane_common/tasks/verify_omnia_params.yml

@@ -87,6 +87,8 @@
     db_password: "{{ mariadb_password }}"
     k8s_version: "{{ k8s_version }}"
     k8s_cni: "{{ k8s_cni }}"
+    docker_username: "{{ docker_username }}"
+    docker_password: "{{ docker_password }}"
   no_log: True
 
 - name: Encrypt input config file

+ 11 - 1
control_plane/roles/control_plane_common/vars/main.yml

@@ -141,4 +141,14 @@ fail_msg_config_file: ib_vars.yml file doesn't exist.
 fail_msg_opensm_config_file: opensm.conf file doesn't exist.
 
 fail_msg_ib_input_definition: Infiniband config directories must be defined.
-fail_msg_ib_input: Infiniband config directories can't be left empty.
+fail_msg_ib_input: Infiniband config directories can't be left empty.
+
+# Usage: nfs_server_setup.yml
+nfs_share_offline_repo: /var/nfs_repo
+nfs_share_awx: /var/nfs_awx
+nfs_share_dir_mode: 0644
+exports_file_path: /etc/exports
+nfs_services:
+  - mountd
+  - rpc-bind
+  - nfs

+ 12 - 0
control_plane/roles/control_plane_k8s/tasks/k8s_init.yml

@@ -36,6 +36,18 @@
   ignore_errors: True
   register: k8s_pods
 
+- name: Docker login
+  command: docker login -u {{ docker_username }} -p {{ docker_password }}
+  changed_when: true
+  register: docker_login_output
+  ignore_errors: True
+  when: docker_username or docker_password
+
+- name: Docker login check
+  fail:
+    msg: "{{ docker_login_fail_msg }}"
+  when: docker_login_output is failed
+
 - name: Initialize kubeadm
   block:
     - name: Initialize kubeadm

+ 17 - 0
control_plane/roles/control_plane_k8s/tasks/k8s_installation.yml

@@ -105,6 +105,23 @@
     - "{{ k8s_packages }}"
   changed_when: true
 
+- name: Add docker community edition repository for docker-ce-cli
+  get_url:
+    url: "{{ docker_repo_url }}"
+    dest: "{{ docker_repo_dest }}"
+  when: docker_username and docker_password
+  register: docker_repo
+  until: docker_repo is not failed
+  retries: 20
+  delay: 10
+  tags: install
+
+- name: Install docker-ce-cli
+  package:
+    name: docker-ce-cli
+    state: present
+  when: docker_username and docker_password
+
 - name: Start and enable crio
   service:
     name: crio

+ 3 - 0
control_plane/roles/control_plane_k8s/vars/main.yml

@@ -33,6 +33,8 @@ crio_repo1_url: https://download.opensuse.org/repositories/devel:/kubic:/libcont
 crio_repo1_dest: /etc/yum.repos.d/devel:kubic:libcontainers:stable.repo
 crio_repo2_url: https://download.opensuse.org/repositories/devel:kubic:libcontainers:stable:cri-o:1.21/CentOS_8/devel:kubic:libcontainers:stable:cri-o:1.21.repo
 crio_repo2_dest: /etc/yum.repos.d/devel:kubic:libcontainers:stable:cri-o:1.21.repo
+docker_repo_url: https://download.docker.com/linux/centos/docker-ce.repo
+docker_repo_dest: /etc/yum.repos.d/docker-ce.repo
 
 # Usage: k8s_firewalld.yml
 k8s_master_ports:
@@ -56,6 +58,7 @@ helm_installer_file_mode: 0700
 helm_stable_repo_url: https://charts.helm.sh/stable
 
 # Usage: k8s_init.yml
+docker_login_fail_msg: "Docker login failed! Please check the credentials and re-execute playbook."
 k8s_root_directory: /root/.kube
 k8s_root_directory_mode: 0755
 k8s_config_src: /etc/kubernetes/admin.conf

File diff suppressed because it is too large
+ 1 - 0
control_plane/roles/control_plane_repo/files/C4140_inv.xml


File diff suppressed because it is too large
+ 1 - 0
control_plane/roles/control_plane_repo/files/C6420_inv.xml


File diff suppressed because it is too large
+ 1 - 0
control_plane/roles/control_plane_repo/files/C6520_inv.xml


File diff suppressed because it is too large
+ 1 - 0
control_plane/roles/control_plane_repo/files/R240_inv.xml


File diff suppressed because it is too large
+ 1 - 0
control_plane/roles/control_plane_repo/files/R340_inv.xml


File diff suppressed because it is too large
+ 1 - 0
control_plane/roles/control_plane_repo/files/R430_inv.xml


File diff suppressed because it is too large
+ 1 - 0
control_plane/roles/control_plane_repo/files/R440_inv.xml


File diff suppressed because it is too large
+ 1 - 0
control_plane/roles/control_plane_repo/files/R540_inv.xml


File diff suppressed because it is too large
+ 1 - 0
control_plane/roles/control_plane_repo/files/R640_inv.xml


File diff suppressed because it is too large
+ 1 - 0
control_plane/roles/control_plane_repo/files/R650_inv.xml


File diff suppressed because it is too large
+ 1 - 0
control_plane/roles/control_plane_repo/files/R740_inv.xml


File diff suppressed because it is too large
+ 1 - 0
control_plane/roles/control_plane_repo/files/R740xd2_inv.xml


File diff suppressed because it is too large
+ 1 - 0
control_plane/roles/control_plane_repo/files/R740xd_inv.xml


File diff suppressed because it is too large
+ 1 - 0
control_plane/roles/control_plane_repo/files/R750_inv.xml


File diff suppressed because it is too large
+ 1 - 0
control_plane/roles/control_plane_repo/files/R750xa_inv.xml


File diff suppressed because it is too large
+ 1 - 0
control_plane/roles/control_plane_repo/files/R840_inv.xml


File diff suppressed because it is too large
+ 1 - 0
control_plane/roles/control_plane_repo/files/R940_inv.xml


File diff suppressed because it is too large
+ 1 - 0
control_plane/roles/control_plane_repo/files/R940xa_inv.xml


+ 18 - 0
control_plane/roles/control_plane_repo/files/poweredge_models.txt

@@ -0,0 +1,18 @@
+C4140
+C6420
+C6520
+R240
+R340
+R430
+R440
+R540
+R640
+R650
+R740
+R740xd
+R740xd2
+R750
+R750xa
+R840
+R940
+R940xa

+ 20 - 0
control_plane/roles/control_plane_repo/tasks/download_fmw_updates.yml

@@ -0,0 +1,20 @@
+#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Downloading Firwmare Updates (This process may take few hours depending upon the poweredge_model list)
+  command: 'dsu --destination-type=REP --input-inventory-file="{{ role_path }}/files/{{ item }}_inv.xml" \
+    --destination-location="{{ nfs_share_offline_repo }}" --non-interactive'
+  with_items: "{{ poweredge_model.split(',') | map('trim') }}"
+  changed_when: true

+ 40 - 0
control_plane/roles/control_plane_repo/tasks/install_dsu.yml

@@ -0,0 +1,40 @@
+#  Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+---
+
+- name: Creating DSU directory
+  file:
+    path: "{{ dsu_folder_dest }}"
+    state: directory
+    mode: "{{ dsu_folder_dest_mode }}"
+
+- name: Download bootstrap.cgi
+  get_url:
+    url: "{{ bootstrap_repo_url }}"
+    dest: "{{ dsu_folder_dest }}"
+  register: bootstrap_repo_result
+  until: bootstrap_repo_result is not failed
+  retries: 20
+
+- name: Execute bootstrap.cgi
+  shell: >
+    echo "y" | bash {{ dsu_folder_dest }}/bootstrap.cgi
+  changed_when: true
+  register: bootstrap_execution_result
+  failed_when: "'FAILED' in bootstrap_execution_result.stderr"
+
+- name: Install DSU
+  package:
+    name: dell-system-update
+    state: present

+ 13 - 2
control_plane/roles/control_plane_repo/tasks/main.yml

@@ -13,5 +13,16 @@
 #  limitations under the License.
 ---
 
-- name: NFS Server setup
-  import_tasks: nfs_server_setup.yml
+- name: Include common variables
+  include_vars:  ../../control_plane_common/vars/main.yml
+
+- name: Validate idrac_vars
+  include_tasks: validate_idrac_vars.yml
+
+- name: Install DSU
+  include_tasks: install_dsu.yml
+  when: firmware_update_required
+
+- name: Download iDRAC firmware updates
+  include_tasks: download_fmw_updates.yml
+  when: firmware_update_required

+ 41 - 0
control_plane/roles/control_plane_repo/tasks/validate_idrac_vars.yml

@@ -0,0 +1,41 @@
+# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+
+- name: Include variable file idrac_vars.yml
+  include_vars: "{{ idrac_input_filename }}"
+  run_once: true
+
+- name: Assert firmware_update_required value
+  assert:
+    that:
+      - firmware_update_required == true or firmware_update_required == false
+    success_msg: "{{ firmware_update_success_msg }}"
+    fail_msg: "{{ firmware_update_fail_msg }}"
+
+- name: Read poweredge_model file
+  command: cat {{ role_path }}/files/poweredge_models.txt
+  ignore_errors: yes
+  register: poweredge_models_file_output
+  changed_when: false
+
+- name: Assert poweredge_model value
+  assert:
+    that:
+      - item | length > 1
+      - item in poweredge_models_file_output.stdout
+    success_msg: "{{ poweredge_model_success_msg }}"
+    fail_msg: "{{ poweredge_model_fail_msg }}"
+  when: firmware_update_required
+  with_items: "{{ poweredge_model.split(',') | map('trim') }}"

+ 11 - 9
control_plane/roles/control_plane_repo/vars/main.yml

@@ -15,12 +15,14 @@
 
 # vars file for offline_repo
 
-# Usage: nfs_server_setup.yml
-nfs_share_offline_repo: /var/nfs_repo
-nfs_share_awx: /var/nfs_awx
-nfs_share_dir_mode: 0777
-exports_file_path: /etc/exports
-nfs_services:
-  - mountd
-  - rpc-bind
-  - nfs
+# Usage: validate_idrac_vars.yml
+idrac_input_filename: input_params/idrac_vars.yml
+firmware_update_success_msg: "firmware_update_required validated"
+firmware_update_fail_msg: "Failed. firmware_update_required accepts only true or false in idrac_vars.yml"
+poweredge_model_success_msg: "poweredge_model validated"
+poweredge_model_fail_msg: "Failed. poweredge_model is incorrect or unsupported. Please update the list with the supported models in the correct format"
+
+# Usage: install_dsu.yml
+dsu_folder_dest: /root/dsu
+dsu_folder_dest_mode: 0644
+bootstrap_repo_url: https://linux.dell.com/repo/hardware/dsu/bootstrap.cgi

+ 11 - 0
omnia_config.yml

@@ -33,6 +33,17 @@ k8s_cni: "calico"
 # Make sure this value does not overlap with any of the host networks.
 k8s_pod_network_cidr: "10.244.0.0/16"
 
+# Username for Dockerhub account
+# This will be used for Docker login and a kubernetes secret will be created and patched to service account in default namespace.
+# This kubernetes secret can be used to pull images from private repositories
+# This value is optional but suggested to avoid docker pull limit issues
+docker_username: ""
+
+# Password for Dockerhub account
+# This will be used for Docker login
+# This value is mandatory if docker username is provided
+docker_password: ""
+
 # Path to directory hosting ansible config file (ansible.cfg file)
 # Default value is "/etc/ansible"
 # This directory is on the host running ansible, if ansible is installed using dnf

+ 2 - 0
roles/cluster_validation/tasks/fetch_password.yml

@@ -97,6 +97,8 @@
     k8s_version: "{{ k8s_version }}"
     k8s_cni: "{{ k8s_cni }}"
     k8s_pod_network_cidr: "{{ k8s_pod_network_cidr }}"
+    docker_username: "{{ docker_username }}"
+    docker_password: "{{ docker_password }}"
     ansible_conf_file_path: "{{ ansible_config_file_path }}"
   no_log: True
 

+ 34 - 0
roles/k8s_start_manager/tasks/main.yml

@@ -49,6 +49,18 @@
   register: k8s_nodes_not_ready
   tags: init
 
+- name: Docker login
+  command: docker login -u {{ hostvars['127.0.0.1']['docker_username'] }} -p {{ hostvars['127.0.0.1']['docker_password'] }}
+  changed_when: true
+  register: docker_login_output
+  ignore_errors: True
+  when: hostvars['127.0.0.1']['docker_username'] or hostvars['127.0.0.1']['docker_password']
+
+- name: Docker login check
+  fail:
+    msg: "{{ docker_login_fail_msg }}"
+  when: docker_login_output is failed
+
 - name: Initialize kubeadm
   block:
     - name: Initialize kubeadm
@@ -139,6 +151,28 @@
     verbosity: 2
   tags: init
 
+- name: Get K8s secrets
+  command: kubectl get secrets
+  changed_when: false
+  register: k8s_secret
+  when: hostvars['127.0.0.1']['docker_username'] and hostvars['127.0.0.1']['docker_password']
+
+- name: Create docker secret
+  command: kubectl create secret docker-registry dockerregcred --docker-username={{ hostvars['127.0.0.1']['docker_username'] }} \
+    --docker-password={{ hostvars['127.0.0.1']['docker_password'] }}
+  when:
+    - hostvars['127.0.0.1']['docker_username'] and hostvars['127.0.0.1']['docker_password']
+    - "'dockerregcred' not in k8s_secret.stdout"
+
+- name: Add docker secret to default service account
+  shell: >
+    kubectl patch serviceaccount default -p '{"imagePullSecrets": [{"name": "dockerregcred"}]}'
+  register: patch_service_account
+  when: hostvars['127.0.0.1']['docker_username'] and hostvars['127.0.0.1']['docker_password']
+  until: patch_service_account is not failed
+  retries: 10
+  tags: install
+
 - name: Setup Calico SDN network
   command: "kubectl apply -f '{{ calico_yml_url }}'"
   when: hostvars['127.0.0.1']['k8s_cni'] == "calico"

+ 2 - 0
roles/k8s_start_manager/vars/main.yml

@@ -13,6 +13,8 @@
 #  limitations under the License.
 ---
 
+docker_login_fail_msg: "Docker login failed! Please check the credentials and re-execute playbook."
+
 k8s_root_directory: /root/.kube
 
 k8s_root_directory_mode: 0755

+ 49 - 5
roles/k8s_start_services/tasks/main.yml

@@ -17,10 +17,38 @@
   include_vars: ../../slurm_exporter/vars/main.yml
 
 - name: Wait for CoreDNS to restart
-  command: kubectl rollout status deployment/coredns -n kube-system
-  changed_when: false
-  ignore_errors: True
-  tags: init
+  block:
+    - name: Wait for CoreDNS to restart
+      command: kubectl rollout status deployment/coredns -n kube-system  --timeout=4m
+      changed_when: false
+      tags: init
+  rescue:
+    - name: Get K8s pods
+      command: kubectl get pods --all-namespaces
+      register: k8s_pods
+      tags: init
+
+    - name: Pull docker images
+      command: docker pull {{ item }}
+      with_items: "{{ kube_system_docker_images }}"
+      when:
+        - hostvars['127.0.0.1']['docker_username'] and hostvars['127.0.0.1']['docker_password']
+        - "'ImagePullBackOff' in k8s_pods.stdout"
+      register: docker_image_pull_result
+      until: docker_image_pull_result is not failed
+      retries: 5
+
+    - name: Wait for CoreDNS to restart
+      command: kubectl rollout status deployment/coredns -n kube-system
+      when: hostvars['127.0.0.1']['docker_username'] and hostvars['127.0.0.1']['docker_password']
+      tags: init
+
+    - name: Fail message
+      fail:
+        msg: "{{ docker_pull_limit_msg }}"
+      when:
+        - "'ImagePullBackOff' in k8s_pods.stdout or 'ErrImagePull' in k8s_pods.stdout"
+        - not hostvars['127.0.0.1']['docker_username'] and not hostvars['127.0.0.1']['docker_password']
 
 - name: Get K8s pods
   command: kubectl get pods --all-namespaces
@@ -211,4 +239,20 @@
   command: "kubectl apply -f '{{ volcano_scheduling_yaml_url }}'"
   changed_when: true
   when: "'volcano-system' not in k8s_pods.stdout"
-  tags: init
+  tags: init
+
+- name: Get K8s pods
+  command: kubectl get pods --all-namespaces
+  changed_when: false
+  register: k8s_pods
+  tags: init
+
+- name: Pull K8s services docker images
+  command: docker pull {{ item }}
+  with_items: "{{ k8s_services_docker_images }}"
+  when:
+    - "'ImagePullBackOff' in k8s_pods.stdout"
+    - hostvars['127.0.0.1']['docker_username'] and hostvars['127.0.0.1']['docker_password']
+  register: docker_image_pull_result
+  until: docker_image_pull_result is not failed
+  retries: 5

+ 27 - 0
roles/k8s_start_services/vars/main.yml

@@ -13,6 +13,33 @@
 #  limitations under the License.
 ---
 
+kube_system_docker_images:
+  - docker.io/calico/kube-controllers:v3.19.1
+  - docker.io/calico/cni:v3.19.1
+  - docker.io/calico/pod2daemon-flexvol:v3.19.1
+  - docker.io/calico/node:v3.19.1
+  - xilinxatg/xilinx_k8s_fpga_plugin:2020.11.24
+  - nvidia/k8s-device-plugin:v0.7.0
+
+k8s_services_docker_images:
+  - docker.io/rocm/k8s-device-plugin
+  - kubernetesui/dashboard:v2.0.5
+  - kubernetesui/metrics-scraper:v1.0.6
+  - prom/alertmanager:v0.21.0
+  - jimmidyson/configmap-reload:v0.4.0
+  - prom/node-exporter:v1.0.1
+  - prom/pushgateway:v1.2.0
+  - prom/prometheus:v2.20.1
+  - metallb/controller:v0.8.1
+  - metallb/controller:v0.7.3
+  - metallb/speaker:v0.7.3
+  - volcanosh/vc-controller-manager:latest
+  - volcanosh/vc-scheduler:latest
+  - volcanosh/vc-webhook-manager:latest
+  - quay.io/external_storage/nfs-client-provisioner:v3.1.0-k8s1.11
+
+docker_pull_limit_msg: "You have reached your docker pull rate limit. Please provide docker credentials in omnia_config.yml and try again"
+
 metallb_config_file_dest: /root/k8s/metal-config.yaml
 
 metallb_config_file_mode: 0655