Ver Fonte

Merge pull request #9090 from sdodson/wait-for-storage-migration

Wait for API availability before migrating storage, add retries
Scott Dodson há 6 anos atrás
pai
commit
c03b777de7

+ 10 - 2
playbooks/openshift-master/private/upgrade.yml

@@ -60,6 +60,10 @@
   roles:
   roles:
   - openshift_facts
   - openshift_facts
   tasks:
   tasks:
+  - name: Wait for API health
+    import_role:
+      name: openshift_control_plane
+      tasks_from: check_master_api_is_ready.yml
   - name: Upgrade all storage
   - name: Upgrade all storage
     command: >
     command: >
       {{ openshift_client_binary }} adm --config={{ openshift.common.config_base }}/master/admin.kubeconfig
       {{ openshift_client_binary }} adm --config={{ openshift.common.config_base }}/master/admin.kubeconfig
@@ -70,7 +74,7 @@
     failed_when:
     failed_when:
     - l_pb_upgrade_control_plane_pre_upgrade_storage.rc != 0
     - l_pb_upgrade_control_plane_pre_upgrade_storage.rc != 0
     - openshift_upgrade_pre_storage_migration_fatal | default(true) | bool
     - openshift_upgrade_pre_storage_migration_fatal | default(true) | bool
-    retries: 2
+    retries: 6
     delay: 30
     delay: 30
 
 
   - name: Migrate legacy HPA scale target refs
   - name: Migrate legacy HPA scale target refs
@@ -186,6 +190,10 @@
   vars:
   vars:
     __master_shared_resource_viewer_file: "shared_resource_viewer_role.yaml"
     __master_shared_resource_viewer_file: "shared_resource_viewer_role.yaml"
   tasks:
   tasks:
+  - name: Wait for API health
+    import_role:
+      name: openshift_control_plane
+      tasks_from: check_master_api_is_ready.yml
   - name: Reconcile Security Context Constraints
   - name: Reconcile Security Context Constraints
     command: >
     command: >
       {{ openshift_client_binary }} adm policy --config={{ openshift.common.config_base }}/master/admin.kubeconfig reconcile-sccs --confirm --additive-only=true -o name
       {{ openshift_client_binary }} adm policy --config={{ openshift.common.config_base }}/master/admin.kubeconfig reconcile-sccs --confirm --additive-only=true -o name
@@ -206,7 +214,7 @@
     failed_when:
     failed_when:
     - l_pb_upgrade_control_plane_post_upgrade_storage.rc != 0
     - l_pb_upgrade_control_plane_post_upgrade_storage.rc != 0
     - openshift_upgrade_post_storage_migration_fatal | default(false) | bool
     - openshift_upgrade_post_storage_migration_fatal | default(false) | bool
-    retries: 2
+    retries: 6
     delay: 30
     delay: 30
 
 
   - set_fact:
   - set_fact:

+ 10 - 32
roles/openshift_control_plane/tasks/check_master_api_is_ready.yml

@@ -1,47 +1,25 @@
 ---
 ---
-- name: Wait for API to become available
-  # Using curl here since the uri module requires python-httplib2 and
-  # wait_for port doesn't provide health information.
+- name: Wait for APIs to become available
   command: >
   command: >
-    curl --silent --tlsv1.2 --max-time 2
-    --cacert {{ openshift.common.config_base }}/master/ca-bundle.crt
-    {{ openshift.master.api_url }}/healthz/ready
-  register: l_api_available_output
-  until: l_api_available_output.stdout == 'ok'
-  retries: 120
-  delay: 1
-  run_once: true
-  changed_when: false
-  failed_when: false
-
-- name: "Collect verbose curl output when API didn't become available"
-  command: >-
-    curl --verbose --tlsv1.2
-    --cacert {{ openshift.common.config_base }}/master/ca-bundle.crt
-    {{ openshift.master.api_url }}/healthz/ready
-  register: l_api_available_verbose_output
-  failed_when: false
+    {{ openshift_client_binary }} get --raw /apis/{{ item }}/v1
+  register: openshift_apis
+  until: openshift_apis.rc == 0
+  with_items: "{{ l_core_api_list }}"
+  retries: 60
+  delay: 5
 
 
 - name: "Collect API logs when API didn't become available"
 - name: "Collect API logs when API didn't become available"
   command: journalctl --no-pager -n 100 -u {{ openshift_service_type }}-master-api
   command: journalctl --no-pager -n 100 -u {{ openshift_service_type }}-master-api
   register: l_api_log_output
   register: l_api_log_output
-  when:
-  - l_api_available_output.stdout != 'ok'
-
-- name: "Dump verbose curl output when the API didn't become available"
-  debug:
-    msg: "{{ l_api_available_verbose_output.stderr_lines }}"
-  when: l_api_available_output.stdout != 'ok'
+  when: openshift_apis is failed
 
 
 - name: "Dump API logs when the API didn't become availabale"
 - name: "Dump API logs when the API didn't become availabale"
   debug:
   debug:
     msg: "{{ l_api_log_output.stdout_lines }}"
     msg: "{{ l_api_log_output.stdout_lines }}"
-  when:
-  - l_api_available_output.stdout != 'ok'
+  when: openshift_apis is failed
 
 
 - fail:
 - fail:
     msg: >
     msg: >
       API did not become available. Verbose curl output and API logs
       API did not become available. Verbose curl output and API logs
       have been collected above to assist with debugging.
       have been collected above to assist with debugging.
-  when:
-  - l_api_available_output.stdout != 'ok'
+  when: openshift_apis is failed

+ 1 - 8
roles/openshift_control_plane/tasks/main.yml

@@ -320,14 +320,7 @@
     fail:
     fail:
       msg: Control plane pods didn't pass health check
       msg: Control plane pods didn't pass health check
 
 
-- name: Wait for Openshift APIs to register themselves
-  command: >
-    {{ openshift_client_binary }} get --raw /apis/{{ item }}/v1
-  register: openshift_apis
-  until: openshift_apis.rc == 0
-  with_items: "{{ l_core_api_list }}"
-  retries: 60
-  delay: 5
+- import_tasks: check_master_api_is_ready.yml
 
 
 - name: Remove oc cache to refresh a list of APIs
 - name: Remove oc cache to refresh a list of APIs
   file:
   file: