Browse Source

Issue #787: AWX operator pod restarting for many times

Signed-off-by: Lakshmi-Patneedi <Lakshmi_Patneedi@Dellteam.com>
Lakshmi-Patneedi 3 years ago
parent
commit
ee099cae21

+ 6 - 8
control_plane/roles/control_plane_common/tasks/package_installation.yml

@@ -12,20 +12,18 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 ---
-- name: Install packages - leap
-  zypper:
-    name: "{{ common_packages_leap }}"
+- name: Install packages
+  package:
+    name: "{{ common_packages }}"
     state: present
-  when: ( os_supported_leap in mgmt_os ) and ( ansible_distribution_version >= os_supported_leap_version )
 
 - name: Install packages - rocky
   package:
-    name: "{{ common_packages }}"
+    name: "{{ common_packages_rocky }}"
     state: present
   when:
-    - ( mgmt_os == os_supported_centos ) and ( ansible_distribution_version >= os_supported_centos_version ) or
-      ( mgmt_os == os_supported_rocky ) and ( ansible_distribution_version >= os_supported_rocky_version )
+    - os_supported_leap not in mgmt_os
 
 - name: Install netaddr
   command: pip3 install netaddr
-  changed_when: false
+  changed_when: false

+ 16 - 15
control_plane/roles/control_plane_common/tasks/pre_requisite.yml

@@ -32,13 +32,6 @@
   set_fact:
     mount_path: "{{ role_path + '/../../..'  }}"
   tags: init
-    
-- name: Collecting ansible python version
-  shell:
-    cmd: ansible --version | grep "python version" | cut -d ' ' -f 6
-  register: ansible_python_version
-  changed_when: false
-  tags: init
 
 - name: Saving management station os
   set_fact:
@@ -49,9 +42,18 @@
   fail:
     msg: "{{ ansible_python_version_status }}"
   tags: init
-  when:
-    - ansible_python_version.stdout != python_version_leap
-    - ansible_python_version.stdout != python_version_support
+  when: 
+    - ansible_python_version < python_version_support 
+    - os_supported_leap not in mgmt_os
+
+- name: Verify the ansible and python versions installed
+  fail:
+    msg: "{{ ansible_python_version_status }}"
+  tags: init
+  when: 
+    - ansible_python_version < python_version_leap
+    - os_supported_leap in mgmt_os
+    - ansible_distribution_version >= os_supported_leap_version
 
 - name: Verify whether ansible configuration file exists
   stat:
@@ -70,13 +72,13 @@
 - name: Check OS support
   assert:
     that:
-      - ( mgmt_os == os_supported_centos ) and ( ansible_distribution_version >= os_supported_centos_version ) or
-        ( mgmt_os == os_supported_rocky ) and ( ansible_distribution_version >= os_supported_rocky_version ) or
+      - ( os_supported_centos in mgmt_os ) and ( ansible_distribution_version >= os_supported_centos_version ) or
+        ( os_supported_rocky in mgmt_os ) and ( ansible_distribution_version >= os_supported_rocky_version ) or
         ( os_supported_leap in mgmt_os ) and ( ansible_distribution_version >= os_supported_leap_version )
     fail_msg: "{{ fail_os_status }}"
     success_msg: "{{ success_os_status }}"
   register: os_value
-  tags: init
+  tags: [ init, validate ]
 
 - block:
     - name: Fetch SElinux mode
@@ -98,8 +100,7 @@
       register: selinux_value
   tags: init
   when:
-    - ( mgmt_os == os_supported_centos ) and ( ansible_distribution_version >= os_supported_centos_version ) or
-      ( mgmt_os == os_supported_rocky ) and ( ansible_distribution_version >= os_supported_rocky_version )
+    - os_supported_leap not in mgmt_os
 
 - name: State of firewall
   service:

+ 6 - 21
control_plane/roles/control_plane_common/vars/main.yml

@@ -16,7 +16,7 @@
 # vars file for control_plane_common
 
 # Usage: package_installation.yml
-common_packages_leap:
+common_packages:
   - git
   - gcc
   - gcc-c++
@@ -30,30 +30,15 @@ common_packages_leap:
   - net-tools
   - python3-netaddr
   - dos2unix
-  - cri-o
   - make
 os_supported_leap: "leap"
 os_supported_leap_version: "15.3"
-python_version_leap: '3.6.15'
-common_packages:
+python_version_leap: '3.6.13'
+common_packages_rocky:
   - epel-release
   - yum-utils
-  - git
-  - gcc
-  - gcc-c++
-  - nodejs
   - device-mapper-persistent-data
-  - bzip2
-  - python2-pip
-  - python3-pip
-  - nano
-  - lvm2
-  - gettext
-  - net-tools
-  - python3-netaddr
   - yum-plugin-versionlock
-  - dos2unix
-  - make
 
 # Usage: pre_requisite.yml
 internet_delay: 0
@@ -64,11 +49,11 @@ os_supported_centos: "centos"
 os_supported_rocky: "rocky"
 os_supported_centos_version: "8.3"
 os_supported_rocky_version: "8.4"
-fail_os_status: "Unsupported OS or OS version. OS should be {{ os_supported_centos }} {{ os_supported_centos_version }} or {{ os_supported_rocky }} {{ os_supported_rocky_version }}"
+fail_os_status: "Unsupported OS or OS version. OS should be {{ os_supported_centos }} {{ os_supported_centos_version }} or {{ os_supported_rocky }} {{ os_supported_rocky_version }} or {{ os_supported_leap }} {{ os_supported_leap_version }}"
 success_os_status: "Management Station OS validated"
 internet_status: "Failed. No Internet connection. Make sure network is up."
 selinux_status: "SElinux is not disabled. Disable it in /etc/sysconfig/selinux and reboot the system"
-ansible_python_version_status: "For CentOS 8.3, python bindings of firewalld, dnf, selinux are not available if python is installed from source and not from dnf. So please make sure python3.6 is installed using dnf. And ansible uses the python version 3.6 installed using dnf"
+ansible_python_version_status: "For {{ mgmt_os }} {{ ansible_distribution_version }}, python bindings of firewalld, dnf, selinux are not available if python is installed from source and not from dnf or zypper. So please make sure python3.6 is installed using dnf or zypper. And ansible uses the python version 3.6 installed using dnf or zypper"
 python_version_support: '3.6.8'
 default_ansible_config_file_path: /etc/ansible/ansible.cfg
 invalid_run_tag_msg: "Failed. init tag should be used with run tags"
@@ -255,4 +240,4 @@ idrac_input_filename: input_params/idrac_vars.yml
 firmware_update_success_msg: "firmware_update_required validated"
 firmware_update_fail_msg: "Failed. firmware_update_required accepts only true or false in idrac_vars.yml"
 poweredge_model_success_msg: "poweredge_model validated"
-poweredge_model_fail_msg: "Failed. poweredge_model is incorrect or unsupported. Please update the list with the supported models in the correct format"
+poweredge_model_fail_msg: "Failed. poweredge_model is incorrect or unsupported. Please update the list with the supported models in the correct format"

+ 24 - 12
control_plane/roles/webui_awx/tasks/install_awx.yml

@@ -1,4 +1,4 @@
-# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+# Copyright 2022 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -59,11 +59,28 @@
   command: "kubectl config set-context --current --namespace={{ awx_namespace }}"
   changed_when: false
 
+- name: Installing jq package
+  package:
+    name: jq
+    state: present
+
 - name: Deploying awx-operator
   command: make deploy
   changed_when: false
   args:
     chdir: "{{ awx_operator_folder }}"
+  environment:
+    NAMESPACE: "{{ awx_namespace }}"
+
+- name: Waiting for awx operator deployment {This might take 10-15 minutes}
+  block:
+    - name: Waiting for awx-operator deployment to be up and running
+      command: kubectl wait --for=condition=available deployment awx-operator-controller-manager -n {{ awx_namespace }} --timeout={{ awx_operator_time }}
+      changed_when: false 
+  rescue:
+    - name: Display failure message
+      debug:
+        msg: "{{ operator_deployment_failure }}"
 
 - name: Get K8s pods
   command: "kubectl get pods -n {{ awx_namespace }}"
@@ -103,15 +120,16 @@
   when: "'custom-awx-ee' not in docker_images.stdout"
   retries: "{{ min_retries }}"
 
-- name: Waiting for awx-operator deployment to be up and running
-  command: kubectl wait --for=condition=available deployment awx-operator-controller-manager -n {{ awx_namespace }} --timeout={{ awx_operator_time }}
-  changed_when: false
-
 - name: Deploy awx
   command: "kubectl apply -f {{ awx_yml_file_path }}"
   changed_when: true
   when: not k8s_pods.stdout | regex_search('awx-([A-Za-z0-9]{10})-([A-Za-z0-9]{5})')
 
+- name: Wait for awx pods to get created
+  wait_for:
+    timeout: "{{ awx_wait_time }}"
+  when: not k8s_pods.stdout | regex_search('awx-([A-Za-z0-9]{10})-([A-Za-z0-9]{5})')
+
 - name: Install awxkit using pip3
   pip:
     name: awxkit
@@ -122,18 +140,12 @@
   changed_when: true
   register: installation_status
 
-- name: Wait for awx pods to get created
-  wait_for:
-    timeout: "{{ awx_wait_time }}"
-  when: not k8s_pods.stdout | regex_search('awx-([A-Za-z0-9]{10})-([A-Za-z0-9]{5})')
-
 - name: Fails if the pods go into ImagePullBackOff state
   block:
     - name: Waiting for awx deployment to be up and running
       command: kubectl wait --for=condition=available deployment awx -n {{ awx_namespace }} --timeout={{ awx_deployment_time }}
       changed_when: false
-
   rescue:
     - name: Display failure message
       debug:
-        msg: "{{ deployment_failure_msg }}"
+        msg: "{{ deployment_failure_msg }}"

+ 2 - 1
control_plane/roles/webui_awx/vars/main.yml

@@ -36,6 +36,7 @@ readiness_probe_initial: "initialDelaySeconds: 5"
 readiness_probe_final: "initialDelaySeconds: 300"
 awx_operator_time: 900s
 awx_deployment_time: 1500s
+operator_deployment_failure: "AWX Operator deployment failed due to awx operator pod is in crashloopbackoff state"
 
 # Usage: configure_settings.yml
 awx_port: 8052
@@ -54,7 +55,7 @@ awx_ui_msg: "AWX Upgrading"
 return_status: 200
 max_retries: 20
 max_delay: 15
-awx_ui_wait_time: 30
+awx_ui_wait_time: 200
 
 # Usage: check_prerequisites.yml
 min_retries: 2