Parcourir la source

Retry restarting master controllers

Currently, master controller services may fail to restart
if master api services are not fully initialized.

This commit enables retry of master controllers.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1509837
Michael Gugino il y a 7 ans
Parent
commit
eb9914420e

+ 6 - 1
playbooks/common/openshift-master/scaleup.yml

@@ -22,8 +22,13 @@
   - name: restart master api
     service: name={{ openshift.common.service_type }}-master-controllers state=restarted
     notify: verify api server
+  # We retry the controllers because the API may not be 100% initialized yet.
   - name: restart master controllers
-    service: name={{ openshift.common.service_type }}-master-controllers state=restarted
+    command: "systemctl restart {{ openshift.common.service_type }}-master-controllers"
+    retries: 3
+    delay: 5
+    register: result
+    until: result.rc == 0
   - name: verify api server
     command: >
       curl --silent --tlsv1.2

+ 6 - 1
playbooks/common/openshift-master/tasks/wire_aggregator.yml

@@ -179,8 +179,13 @@
   - yedit_output.changed
   - openshift.master.cluster_method == 'native'
 
+# We retry the controllers because the API may not be 100% initialized yet.
 - name: restart master controllers
-  systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted
+  command: "systemctl restart {{ openshift.common.service_type }}-master-controllers"
+  retries: 3
+  delay: 5
+  register: result
+  until: result.rc == 0
   when:
   - yedit_output.changed
   - openshift.master.cluster_method == 'native'

+ 6 - 1
roles/nuage_master/handlers/main.yaml

@@ -7,8 +7,13 @@
     openshift.master.cluster_method == 'native'
 
 # TODO: need to fix up ignore_errors here
+# We retry the controllers because the API may not be 100% initialized yet.
 - name: restart master controllers
-  systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted
+  command: "systemctl restart {{ openshift.common.service_type }}-master-controllers"
+  retries: 3
+  delay: 5
+  register: result
+  until: result.rc == 0
   when: >
     (openshift_master_ha | bool) and
     (not master_controllers_service_status_changed | default(false)) and

+ 6 - 1
roles/openshift_hosted_metrics/handlers/main.yml

@@ -4,8 +4,13 @@
   when: (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
   notify: Verify API Server
 
+# We retry the controllers because the API may not be 100% initialized yet.
 - name: restart master controllers
-  systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted
+  command: "systemctl restart {{ openshift.common.service_type }}-master-controllers"
+  retries: 3
+  delay: 5
+  register: result
+  until: result.rc == 0
   when: (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
 
 - name: Verify API Server

+ 6 - 1
roles/openshift_logging/handlers/main.yml

@@ -4,8 +4,13 @@
   when: (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
   notify: Verify API Server
 
+# We retry the controllers because the API may not be 100% initialized yet.
 - name: restart master controllers
-  systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted
+  command: "{{ openshift.common.service_type }}-master-controllers"
+  retries: 3
+  delay: 5
+  register: result
+  until: result.rc == 0
   when: (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
 
 - name: Verify API Server

+ 6 - 3
roles/openshift_master/handlers/main.yml

@@ -9,10 +9,13 @@
   notify:
   - Verify API Server
 
+# We retry the controllers because the API may not be 100% initialized yet.
 - name: restart master controllers
-  systemd:
-    name: "{{ openshift.common.service_type }}-master-controllers"
-    state: restarted
+  command: "systemctl restart {{ openshift.common.service_type }}-master-controllers"
+  retries: 3
+  delay: 5
+  register: result
+  until: result.rc == 0
   when:
   - not (master_controllers_service_status_changed | default(false) | bool)
   - openshift.master.cluster_method == 'native'

+ 6 - 1
roles/openshift_metrics/handlers/main.yml

@@ -4,8 +4,13 @@
   when: (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
   notify: Verify API Server
 
+# We retry the controllers because the API may not be 100% initialized yet.
 - name: restart master controllers
-  systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted
+  command: "systemctl restart {{ openshift.common.service_type }}-master-controllers"
+  retries: 3
+  delay: 5
+  register: result
+  until: result.rc == 0
   when: (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
 
 - name: Verify API Server