Browse Source

Merge pull request #2571 from sdodson/node-retry-improvement

Retry starting master and node services once
Scott Dodson 8 years ago
parent
commit
5e07a17285
2 changed files with 15 additions and 11 deletions
  1. 12 0
      roles/openshift_master/tasks/main.yml
  2. 3 11
      roles/openshift_node/tasks/main.yml

+ 12 - 0
roles/openshift_master/tasks/main.yml

@@ -213,6 +213,9 @@
     state: started
   when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname == openshift_master_hosts[0]
   register: start_result
+  until: not start_result | failed
+  retries: 1
+  delay: 60
 
 - set_fact:
     master_api_service_status_changed: "{{ start_result | changed }}"
@@ -229,6 +232,9 @@
     state: started
   when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname != openshift_master_hosts[0]
   register: start_result
+  until: not start_result | failed
+  retries: 1
+  delay: 60
 
 - set_fact:
     master_api_service_status_changed: "{{ start_result | changed }}"
@@ -262,6 +268,9 @@
     state: started
   when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname == openshift_master_hosts[0]
   register: start_result
+  until: not start_result | failed
+  retries: 1
+  delay: 60
 
 - pause:
     seconds: 15
@@ -274,6 +283,9 @@
     state: started
   when: openshift_master_ha | bool and openshift.master.cluster_method == 'native' and inventory_hostname != openshift_master_hosts[0]
   register: start_result
+  until: not start_result | failed
+  retries: 1
+  delay: 60
 
 - set_fact:
     master_controllers_service_status_changed: "{{ start_result | changed }}"

+ 3 - 11
roles/openshift_node/tasks/main.yml

@@ -152,17 +152,9 @@
 - name: Start and enable node
   service: name={{ openshift.common.service_type }}-node enabled=yes state=started
   register: node_start_result
-  ignore_errors: yes
-
-- name: Wait 30 seconds for docker initialization whenever node has failed
-  pause:
-    seconds: 30
-  when: node_start_result | failed
-
-- name: Start and enable node again
-  service: name={{ openshift.common.service_type }}-node enabled=yes state=started
-  register: node_start_result
-  when: node_start_result | failed
+  until: not node_start_result | failed
+  retries: 1
+  delay: 30
 
 - set_fact:
     node_service_status_changed: "{{ node_start_result | changed }}"