Browse Source

Retry our etcd health check

This is a followup attempt to fix a bug. Originally dns was failing due
to problems with the sdn pod. Now it appears that etcd is just prone to
taking longer than expected to become healthy. Wait up to 3 minutes for
etcd to come back online

Fixes https://bugzilla.redhat.com/show_bug.cgi?id=1623145
Scott Dodson 6 years ago
parent
commit
430894ea8f

+ 4 - 0
roles/etcd/tasks/remove-etcd-v2-data.yml

@@ -1,6 +1,10 @@
 ---
 - name: Verify cluster is healthy pre-upgrade
   command: "{{ etcdctlv2 }} cluster-health"
+  register: cluster_health
+  retries: 30
+  delay: 6
+  until: cluster_health.rc == 0
 
 - name: Check migrated status
   command: "{{ etcdctlv2 }} get /kubernetes.io"

+ 4 - 0
roles/etcd/tasks/upgrade_rpm.yml

@@ -10,6 +10,10 @@
 
 - name: Verify cluster is healthy pre-upgrade
   command: "{{ etcdctlv2 }} cluster-health"
+  register: cluster_health
+  retries: 30
+  delay: 6
+  until: cluster_health.rc == 0
 
 - set_fact:
     l_etcd_target_package: "{{ 'etcd' if r_etcd_upgrade_version is not defined else 'etcd-'+r_etcd_upgrade_version+'*' }}"

+ 4 - 0
roles/etcd/tasks/upgrade_static.yml

@@ -5,6 +5,10 @@
 
 - name: Verify cluster is healthy pre-upgrade
   command: "{{ etcdctlv2 }} cluster-health"
+  register: cluster_health
+  retries: 30
+  delay: 6
+  until: cluster_health.rc == 0
 
 - name: Check for old etcd service files
   stat: