Browse Source

Merge pull request #9090 from sdodson/wait-for-storage-migration

Wait for API availability before migrating storage, add retries
Scott Dodson 6 years ago
parent
commit
c03b777de7

+ 10 - 2
playbooks/openshift-master/private/upgrade.yml

@@ -60,6 +60,10 @@
   roles:
   - openshift_facts
   tasks:
+  - name: Wait for API health
+    import_role:
+      name: openshift_control_plane
+      tasks_from: check_master_api_is_ready.yml
   - name: Upgrade all storage
     command: >
       {{ openshift_client_binary }} adm --config={{ openshift.common.config_base }}/master/admin.kubeconfig
@@ -70,7 +74,7 @@
     failed_when:
     - l_pb_upgrade_control_plane_pre_upgrade_storage.rc != 0
     - openshift_upgrade_pre_storage_migration_fatal | default(true) | bool
-    retries: 2
+    retries: 6
     delay: 30
 
   - name: Migrate legacy HPA scale target refs
@@ -186,6 +190,10 @@
   vars:
     __master_shared_resource_viewer_file: "shared_resource_viewer_role.yaml"
   tasks:
+  - name: Wait for API health
+    import_role:
+      name: openshift_control_plane
+      tasks_from: check_master_api_is_ready.yml
   - name: Reconcile Security Context Constraints
     command: >
       {{ openshift_client_binary }} adm policy --config={{ openshift.common.config_base }}/master/admin.kubeconfig reconcile-sccs --confirm --additive-only=true -o name
@@ -206,7 +214,7 @@
     failed_when:
     - l_pb_upgrade_control_plane_post_upgrade_storage.rc != 0
     - openshift_upgrade_post_storage_migration_fatal | default(false) | bool
-    retries: 2
+    retries: 6
     delay: 30
 
   - set_fact:

+ 10 - 32
roles/openshift_control_plane/tasks/check_master_api_is_ready.yml

@@ -1,47 +1,25 @@
 ---
-- name: Wait for API to become available
-  # Using curl here since the uri module requires python-httplib2 and
-  # wait_for port doesn't provide health information.
+- name: Wait for APIs to become available
   command: >
-    curl --silent --tlsv1.2 --max-time 2
-    --cacert {{ openshift.common.config_base }}/master/ca-bundle.crt
-    {{ openshift.master.api_url }}/healthz/ready
-  register: l_api_available_output
-  until: l_api_available_output.stdout == 'ok'
-  retries: 120
-  delay: 1
-  run_once: true
-  changed_when: false
-  failed_when: false
-
-- name: "Collect verbose curl output when API didn't become available"
-  command: >-
-    curl --verbose --tlsv1.2
-    --cacert {{ openshift.common.config_base }}/master/ca-bundle.crt
-    {{ openshift.master.api_url }}/healthz/ready
-  register: l_api_available_verbose_output
-  failed_when: false
+    {{ openshift_client_binary }} get --raw /apis/{{ item }}/v1
+  register: openshift_apis
+  until: openshift_apis.rc == 0
+  with_items: "{{ l_core_api_list }}"
+  retries: 60
+  delay: 5
 
 - name: "Collect API logs when API didn't become available"
   command: journalctl --no-pager -n 100 -u {{ openshift_service_type }}-master-api
   register: l_api_log_output
-  when:
-  - l_api_available_output.stdout != 'ok'
-
-- name: "Dump verbose curl output when the API didn't become available"
-  debug:
-    msg: "{{ l_api_available_verbose_output.stderr_lines }}"
-  when: l_api_available_output.stdout != 'ok'
+  when: openshift_apis is failed
 
 - name: "Dump API logs when the API didn't become availabale"
   debug:
     msg: "{{ l_api_log_output.stdout_lines }}"
-  when:
-  - l_api_available_output.stdout != 'ok'
+  when: openshift_apis is failed
 
 - fail:
     msg: >
       API did not become available. Verbose curl output and API logs
       have been collected above to assist with debugging.
-  when:
-  - l_api_available_output.stdout != 'ok'
+  when: openshift_apis is failed

+ 1 - 8
roles/openshift_control_plane/tasks/main.yml

@@ -320,14 +320,7 @@
     fail:
       msg: Control plane pods didn't pass health check
 
-- name: Wait for Openshift APIs to register themselves
-  command: >
-    {{ openshift_client_binary }} get --raw /apis/{{ item }}/v1
-  register: openshift_apis
-  until: openshift_apis.rc == 0
-  with_items: "{{ l_core_api_list }}"
-  retries: 60
-  delay: 5
+- import_tasks: check_master_api_is_ready.yml
 
 - name: Remove oc cache to refresh a list of APIs
   file: