Browse Source

Allow a couple retries when unscheduling/rescheduling nodes in upgrade.

This can fail with a transient "object has been modified" error asking
you to re-try your changes on the latest version of the object.

Allow up to three retries to see if we can get the change to take
effect.
Devan Goodwin 8 years ago
parent
commit
ad47ff1df5
1 changed files with 12 additions and 0 deletions
  1. 12 0
      playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml

+ 12 - 0
playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml

@@ -32,6 +32,12 @@
       {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename | lower }} --schedulable=false
     delegate_to: "{{ groups.oo_first_master.0 }}"
     when: inventory_hostname in groups.oo_nodes_to_upgrade
+    # NOTE: There is a transient "object has been modified" error here, allow a couple
+    # retries for a more reliable upgrade.
+    register: node_unsched
+    until: node_unsched.rc == 0
+    retries: 3
+    delay: 1
 
   - name: Evacuate Node for Kubelet upgrade
     command: >
@@ -61,3 +67,9 @@
       {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename | lower }} --schedulable=true
     delegate_to: "{{ groups.oo_first_master.0 }}"
     when: inventory_hostname in groups.oo_nodes_to_upgrade and was_schedulable | bool
+    register: node_sched
+    until: node_sched.rc == 0
+    retries: 3
+    delay: 1
+
+