Przeglądaj źródła

Retry restarting master controllers

Currently, master controller services may fail to restart
if master api services are not fully initialized.

This commit enables retry of master controllers.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1509837
Michael Gugino 7 lat temu
rodzic
commit
eb9914420e

+ 6 - 1
playbooks/common/openshift-master/scaleup.yml

@@ -22,8 +22,13 @@
   - name: restart master api
     service: name={{ openshift.common.service_type }}-master-controllers state=restarted
     notify: verify api server
+  # We retry the controllers because the API may not be 100% initialized yet.
   - name: restart master controllers
-    service: name={{ openshift.common.service_type }}-master-controllers state=restarted
+    command: "systemctl restart {{ openshift.common.service_type }}-master-controllers"
+    retries: 3
+    delay: 5
+    register: result
+    until: result.rc == 0
   - name: verify api server
     command: >
       curl --silent --tlsv1.2

+ 6 - 1
playbooks/common/openshift-master/tasks/wire_aggregator.yml

@@ -179,8 +179,13 @@
   - yedit_output.changed
   - openshift.master.cluster_method == 'native'
 
+# We retry the controllers because the API may not be 100% initialized yet.
 - name: restart master controllers
-  systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted
+  command: "systemctl restart {{ openshift.common.service_type }}-master-controllers"
+  retries: 3
+  delay: 5
+  register: result
+  until: result.rc == 0
   when:
   - yedit_output.changed
   - openshift.master.cluster_method == 'native'

+ 6 - 1
roles/nuage_master/handlers/main.yaml

@@ -7,8 +7,13 @@
     openshift.master.cluster_method == 'native'
 
 # TODO: need to fix up ignore_errors here
+# We retry the controllers because the API may not be 100% initialized yet.
 - name: restart master controllers
-  systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted
+  command: "systemctl restart {{ openshift.common.service_type }}-master-controllers"
+  retries: 3
+  delay: 5
+  register: result
+  until: result.rc == 0
   when: >
     (openshift_master_ha | bool) and
     (not master_controllers_service_status_changed | default(false)) and

+ 6 - 1
roles/openshift_hosted_metrics/handlers/main.yml

@@ -4,8 +4,13 @@
   when: (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
   notify: Verify API Server
 
+# We retry the controllers because the API may not be 100% initialized yet.
 - name: restart master controllers
-  systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted
+  command: "systemctl restart {{ openshift.common.service_type }}-master-controllers"
+  retries: 3
+  delay: 5
+  register: result
+  until: result.rc == 0
   when: (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
 
 - name: Verify API Server

+ 6 - 1
roles/openshift_logging/handlers/main.yml

@@ -4,8 +4,13 @@
   when: (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
   notify: Verify API Server
 
+# We retry the controllers because the API may not be 100% initialized yet.
 - name: restart master controllers
-  systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted
+  command: "{{ openshift.common.service_type }}-master-controllers"
+  retries: 3
+  delay: 5
+  register: result
+  until: result.rc == 0
   when: (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
 
 - name: Verify API Server

+ 6 - 3
roles/openshift_master/handlers/main.yml

@@ -9,10 +9,13 @@
   notify:
   - Verify API Server
 
+# We retry the controllers because the API may not be 100% initialized yet.
 - name: restart master controllers
-  systemd:
-    name: "{{ openshift.common.service_type }}-master-controllers"
-    state: restarted
+  command: "systemctl restart {{ openshift.common.service_type }}-master-controllers"
+  retries: 3
+  delay: 5
+  register: result
+  until: result.rc == 0
   when:
   - not (master_controllers_service_status_changed | default(false) | bool)
   - openshift.master.cluster_method == 'native'

+ 6 - 1
roles/openshift_metrics/handlers/main.yml

@@ -4,8 +4,13 @@
   when: (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
   notify: Verify API Server
 
+# We retry the controllers because the API may not be 100% initialized yet.
 - name: restart master controllers
-  systemd: name={{ openshift.common.service_type }}-master-controllers state=restarted
+  command: "systemctl restart {{ openshift.common.service_type }}-master-controllers"
+  retries: 3
+  delay: 5
+  register: result
+  until: result.rc == 0
   when: (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
 
 - name: Verify API Server