|
@@ -183,23 +183,26 @@
|
|
|
- fail:
|
|
|
msg: Node start failed.
|
|
|
|
|
|
-- name: Verify that the control plane is running
|
|
|
- command: >
|
|
|
- curl -k {{ openshift.master.api_url }}/healthz/ready
|
|
|
- args:
|
|
|
- # Disables the following warning:
|
|
|
- # Consider using get_url or uri module rather than running curl
|
|
|
- warn: no
|
|
|
- register: control_plane_health
|
|
|
- until: control_plane_health.stdout == 'ok'
|
|
|
+- name: Wait for control plane pods to appear
|
|
|
+ oc_obj:
|
|
|
+ state: list
|
|
|
+ kind: pod
|
|
|
+ name: "master-{{ item }}-{{ openshift.node.nodename | lower }}"
|
|
|
+ namespace: kube-system
|
|
|
+ register: control_plane_pods
|
|
|
+ until:
|
|
|
+ - "'results' in control_plane_pods"
|
|
|
+ - "'results' in control_plane_pods.results"
|
|
|
+ - control_plane_pods.results.results | length > 0
|
|
|
retries: 60
|
|
|
delay: 5
|
|
|
- changed_when: false
|
|
|
- # Ignore errors so we can log troubleshooting info on failures.
|
|
|
- ignore_errors: yes
|
|
|
+ with_items:
|
|
|
+ - "{{ 'etcd' if inventory_hostname in groups['oo_etcd_to_config'] else omit }}"
|
|
|
+ - api
|
|
|
+ - controllers
|
|
|
+ ignore_errors: true
|
|
|
|
|
|
-# Capture debug output here to simplify triage
|
|
|
-- when: control_plane_health.stdout != 'ok'
|
|
|
+- when: control_plane_pods is failed
|
|
|
block:
|
|
|
- name: Check status in the kube-system namespace
|
|
|
command: >
|
|
@@ -211,10 +214,10 @@
|
|
|
- name: Get pods in the kube-system namespace
|
|
|
command: >
|
|
|
{{ openshift_client_binary }} get pods --config={{ openshift.common.config_base }}/master/admin.kubeconfig -n kube-system -o wide
|
|
|
- register: control_plane_pods
|
|
|
+ register: control_plane_pods_list
|
|
|
ignore_errors: true
|
|
|
- debug:
|
|
|
- msg: "{{ control_plane_pods.stdout_lines }}"
|
|
|
+ msg: "{{ control_plane_pods_list.stdout_lines }}"
|
|
|
- name: Get events in the kube-system namespace
|
|
|
command: >
|
|
|
{{ openshift_client_binary }} get events --config={{ openshift.common.config_base }}/master/admin.kubeconfig -n kube-system
|
|
@@ -222,6 +225,54 @@
|
|
|
ignore_errors: true
|
|
|
- debug:
|
|
|
msg: "{{ control_plane_events.stdout_lines }}"
|
|
|
+ - name: Get node logs
|
|
|
+ command: journalctl --no-pager -n 300 -u {{ openshift_service_type }}-node
|
|
|
+ register: logs_node
|
|
|
+ ignore_errors: true
|
|
|
+ - debug:
|
|
|
+ msg: "{{ logs_node.stdout_lines }}"
|
|
|
+ - name: Report control plane errors
|
|
|
+ fail:
|
|
|
+ msg: Control plane pods didn't come up
|
|
|
+
|
|
|
+- name: Wait for all control plane pods to become ready
|
|
|
+ oc_obj:
|
|
|
+ state: list
|
|
|
+ kind: pod
|
|
|
+ name: "master-{{ item }}-{{ openshift.node.nodename | lower }}"
|
|
|
+ namespace: kube-system
|
|
|
+ register: control_plane_health
|
|
|
+ until:
|
|
|
+ - "'results' in control_plane_health"
|
|
|
+ - "'results' in control_plane_health.results"
|
|
|
+ - control_plane_health.results.results | length > 0
|
|
|
+ - "'status' in control_plane_health.results.results[0]"
|
|
|
+ - "'conditions' in control_plane_health.results.results[0].status"
|
|
|
+ - control_plane_health.results.results[0].status.conditions | selectattr('type', 'match', '^Ready$') | map(attribute='status') | join | bool == True
|
|
|
+ retries: 60
|
|
|
+ delay: 5
|
|
|
+ with_items:
|
|
|
+ - "{{ 'etcd' if inventory_hostname in groups['oo_etcd_to_config'] else omit }}"
|
|
|
+ - api
|
|
|
+ - controllers
|
|
|
+
|
|
|
+- when: control_plane_health is failed
|
|
|
+ block:
|
|
|
+ - debug:
|
|
|
+ msg: "{{ control_plane_pods_list.stdout_lines }}"
|
|
|
+ - name: Get events in the kube-system namespace
|
|
|
+ command: >
|
|
|
+ {{ openshift_client_binary }} get events --config={{ openshift.common.config_base }}/master/admin.kubeconfig -n kube-system
|
|
|
+ register: control_plane_events
|
|
|
+ ignore_errors: true
|
|
|
+ - debug:
|
|
|
+ msg: "{{ control_plane_events.stdout_lines }}"
|
|
|
+ - name: Get node logs
|
|
|
+ command: journalctl --no-pager -n 300 -u {{ openshift_service_type }}-node
|
|
|
+ register: logs_node
|
|
|
+ ignore_errors: true
|
|
|
+ - debug:
|
|
|
+ msg: "{{ logs_node.stdout_lines }}"
|
|
|
- name: Get API logs
|
|
|
command: >
|
|
|
/usr/local/bin/master-logs api api
|
|
@@ -229,14 +280,36 @@
|
|
|
ignore_errors: true
|
|
|
- debug:
|
|
|
msg: "{{ control_plane_logs_api.stdout_lines }}"
|
|
|
- - name: Get node logs
|
|
|
- command: journalctl --no-pager -n 300 -u {{ openshift_service_type }}-node
|
|
|
- register: control_plane_logs_node
|
|
|
+ - name: Get controllers logs
|
|
|
+ command: >
|
|
|
+ /usr/local/bin/master-logs controllers controllers
|
|
|
+ register: control_plane_logs_controllers
|
|
|
+ ignore_errors: true
|
|
|
+ - debug:
|
|
|
+ msg: "{{ control_plane_logs_controllers.stdout_lines }}"
|
|
|
+ - name: Get etcd logs
|
|
|
+ command: >
|
|
|
+ /usr/local/bin/master-logs etcd etcd
|
|
|
+ register: control_plane_logs_etcd
|
|
|
+ when: inventory_hostname in groups['oo_etcd_to_config']
|
|
|
ignore_errors: true
|
|
|
- debug:
|
|
|
- msg: "{{ control_plane_logs_node.stdout_lines }}"
|
|
|
+ msg: "{{ control_plane_logs_controllers.stdout_lines }}"
|
|
|
+ when: inventory_hostname in groups['oo_etcd_to_config']
|
|
|
+ - name: Report control plane errors
|
|
|
+ fail:
|
|
|
+ msg: Control plane pods didn't pass health check
|
|
|
|
|
|
-- name: Report control plane errors
|
|
|
- fail:
|
|
|
- msg: Control plane install failed.
|
|
|
- when: control_plane_health.stdout != 'ok'
|
|
|
+- name: Wait for Openshift APIs to register themselves
|
|
|
+ command: >
|
|
|
+ {{ openshift_client_binary }} get --raw /apis/{{ item }}/v1
|
|
|
+ register: openshift_apis
|
|
|
+ until: openshift_apis.rc == 0
|
|
|
+ with_items: "{{ l_core_api_list }}"
|
|
|
+ retries: 60
|
|
|
+ delay: 5
|
|
|
+
|
|
|
+- name: Remove oc cache to refresh a list of APIs
|
|
|
+ file:
|
|
|
+ path: "~/.kube/cache"
|
|
|
+ state: absent
|