|
@@ -1,120 +1,83 @@
|
|
---
|
|
---
|
|
## get all pods for the cluster
|
|
## get all pods for the cluster
|
|
- command: >
|
|
- command: >
|
|
- {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
|
|
|
|
|
|
+ {{ openshift_client_binary }}
|
|
|
|
+ --config={{ openshift.common.config_base }}/master/admin.kubeconfig
|
|
|
|
+ get pod
|
|
|
|
+ -l component={{ _cluster_component }},provider=openshift
|
|
|
|
+ -n {{ openshift_logging_elasticsearch_namespace }}
|
|
|
|
+ -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
|
|
register: _cluster_pods
|
|
register: _cluster_pods
|
|
|
|
|
|
-### Check for cluster state before making changes -- if its red then we don't want to continue
|
|
|
|
|
|
+# make a temp dir for admin certs
|
|
|
|
+- command: mktemp -d /tmp/openshift-logging-ansible-XXXXXX
|
|
|
|
+ register: _logging_handler_tempdir
|
|
|
|
+ changed_when: False
|
|
|
|
+ check_mode: no
|
|
|
|
+
|
|
|
|
+- name: Exporting secrets to use communicating with the ES cluster
|
|
|
|
+ command: >
|
|
|
|
+ {{ openshift_client_binary }}
|
|
|
|
+ --config={{ openshift.common.config_base }}/master/admin.kubeconfig
|
|
|
|
+ extract secret/logging-elasticsearch
|
|
|
|
+ -n {{ openshift_logging_elasticsearch_namespace }}
|
|
|
|
+ --keys=admin-cert --keys=admin-key
|
|
|
|
+ --to={{ _logging_handler_tempdir.stdout }}
|
|
|
|
+
|
|
|
|
+### Check for cluster state before making changes -- if its red, yellow or missing nodes then we don't want to continue
|
|
- name: "Checking current health for {{ _es_node }} cluster"
|
|
- name: "Checking current health for {{ _es_node }} cluster"
|
|
- shell: >
|
|
|
|
- {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig exec "{{ _cluster_pods.stdout.split(' ')[0] }}" -c elasticsearch -n "{{ openshift_logging_elasticsearch_namespace }}" -- es_cluster_health
|
|
|
|
|
|
+ command: >
|
|
|
|
+ curl -s -k
|
|
|
|
+ --cert {{ _logging_handler_tempdir.stdout }}/admin-cert
|
|
|
|
+ --key {{ _logging_handler_tempdir.stdout }}/admin-key
|
|
|
|
+ https://logging-{{ _cluster_component }}.{{ openshift_logging_elasticsearch_namespace }}.svc:9200/_cluster/health?pretty
|
|
register: _pod_status
|
|
register: _pod_status
|
|
when: _cluster_pods.stdout_lines | count > 0
|
|
when: _cluster_pods.stdout_lines | count > 0
|
|
|
|
|
|
- when:
|
|
- when:
|
|
- _pod_status.stdout is defined
|
|
- _pod_status.stdout is defined
|
|
- - (_pod_status.stdout | from_json)['status'] in ['red']
|
|
|
|
|
|
+ - (_pod_status.stdout | from_json)['status'] in ['yellow', 'red'] or (_pod_status.stdout | from_json)['number_of_nodes'] != _cluster_pods.stdout_lines | count
|
|
block:
|
|
block:
|
|
- name: Set Logging message to manually restart
|
|
- name: Set Logging message to manually restart
|
|
run_once: true
|
|
run_once: true
|
|
set_stats:
|
|
set_stats:
|
|
data:
|
|
data:
|
|
installer_phase_logging:
|
|
installer_phase_logging:
|
|
- message: "Cluster logging-{{ _cluster_component }} was in a red state and will not be automatically restarted. Please see documentation regarding doing a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."
|
|
|
|
|
|
+ message: "Cluster logging-{{ _cluster_component }} was not in an optimal state and will not be automatically restarted. Please see documentation regarding doing a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."
|
|
|
|
|
|
- - debug: msg="Cluster logging-{{ _cluster_component }} was in a red state and will not be automatically restarted. Please see documentation regarding doing a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."
|
|
|
|
|
|
+ - debug: msg="Cluster logging-{{ _cluster_component }} was not in an optimal state and will not be automatically restarted. Please see documentation regarding doing a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."
|
|
|
|
|
|
-- when: _pod_status.stdout is undefined or (_pod_status.stdout | from_json)['status'] in ['green', 'yellow']
|
|
|
|
|
|
+- when: _pod_status.stdout is undefined or ( (_pod_status.stdout | from_json)['status'] in ['green'] and (_pod_status.stdout | from_json)['number_of_nodes'] == _cluster_pods.stdout_lines | count )
|
|
block:
|
|
block:
|
|
- # Disable external communication for {{ _cluster_component }}
|
|
|
|
- - name: Disable external communication for logging-{{ _cluster_component }}
|
|
|
|
- oc_service:
|
|
|
|
- state: present
|
|
|
|
- name: "logging-{{ _cluster_component }}"
|
|
|
|
- namespace: "{{ openshift_logging_elasticsearch_namespace }}"
|
|
|
|
- selector:
|
|
|
|
- component: "{{ _cluster_component }}"
|
|
|
|
- provider: openshift
|
|
|
|
- connection: blocked
|
|
|
|
- labels:
|
|
|
|
- logging-infra: 'support'
|
|
|
|
- ports:
|
|
|
|
- - port: 9200
|
|
|
|
- targetPort: "restapi"
|
|
|
|
- when:
|
|
|
|
- - full_restart_cluster | bool
|
|
|
|
-
|
|
|
|
- - name: "Disable shard balancing for logging-{{ _cluster_component }} cluster"
|
|
|
|
- command: >
|
|
|
|
- {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_cluster/settings' -d '{ "transient": { "cluster.routing.allocation.enable" : "none" } }'
|
|
|
|
- register: _disable_output
|
|
|
|
- changed_when: "'\"acknowledged\":true' in _disable_output.stdout"
|
|
|
|
- when: _cluster_pods.stdout_lines | count > 0
|
|
|
|
-
|
|
|
|
- # Flush ES
|
|
|
|
- - name: "Flushing for logging-{{ _cluster_component }} cluster"
|
|
|
|
- command: >
|
|
|
|
- {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_flush/synced'
|
|
|
|
- register: _flush_output
|
|
|
|
- changed_when: "'\"acknowledged\":true' in _flush_output.stdout"
|
|
|
|
- when:
|
|
|
|
- - _cluster_pods.stdout_lines | count > 0
|
|
|
|
- - full_restart_cluster | bool
|
|
|
|
-
|
|
|
|
- command: >
|
|
- command: >
|
|
- {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig get dc -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name}
|
|
|
|
|
|
+ {{ openshift_client_binary }}
|
|
|
|
+ --config={{ openshift.common.config_base }}/master/admin.kubeconfig
|
|
|
|
+ get dc
|
|
|
|
+ -l component={{ _cluster_component }},provider=openshift
|
|
|
|
+ -n {{ openshift_logging_elasticsearch_namespace }}
|
|
|
|
+ -o jsonpath={.items[*].metadata.name}
|
|
register: _cluster_dcs
|
|
register: _cluster_dcs
|
|
|
|
|
|
- # If we are currently restarting the "es" cluster we want to check if we are scaling up the number of es nodes
|
|
|
|
- # If we are currently restarting the "es-ops" cluster we want to check if we are scaling up the number of ops nodes
|
|
|
|
- # If we've created a new node for that cluster then the appropriate variable will be true, otherwise we default to false
|
|
|
|
- - set_fact:
|
|
|
|
- _skip_healthcheck: "{{ ( __logging_scale_up | default(false) ) if _cluster_component == 'es' else ( __logging_ops_scale_up | default(false) ) }}"
|
|
|
|
-
|
|
|
|
## restart all dcs for full restart
|
|
## restart all dcs for full restart
|
|
- - name: "Restart ES node {{ _es_node }}"
|
|
|
|
- include_tasks: restart_es_node.yml
|
|
|
|
- with_items: "{{ _cluster_dcs.stdout_lines }}"
|
|
|
|
- loop_control:
|
|
|
|
- loop_var: _es_node
|
|
|
|
|
|
+ - name: "Performing full cluster restart for {{ _cluster_component }} cluster"
|
|
|
|
+ include_tasks: full_cluster_restart.yml
|
|
|
|
+ vars:
|
|
|
|
+ logging_restart_cluster_dcs: "{{ _cluster_dcs.stdout_lines }}"
|
|
when:
|
|
when:
|
|
- full_restart_cluster | bool
|
|
- full_restart_cluster | bool
|
|
|
|
|
|
- ## restart the node if it's dc is in the list of nodes to restart?
|
|
|
|
- - name: "Restart ES node {{ _es_node }}"
|
|
|
|
- include_tasks: restart_es_node.yml
|
|
|
|
- with_items: "{{ _restart_logging_nodes }}"
|
|
|
|
- loop_control:
|
|
|
|
- loop_var: _es_node
|
|
|
|
|
|
+ ## restart the node if it's dc is in the list of nodes to restart
|
|
|
|
+ - name: "Performing rolling cluster restart for {{ _cluster_component }} cluster"
|
|
|
|
+ include_tasks: rolling_cluster_restart.yml
|
|
|
|
+ vars:
|
|
|
|
+ logging_restart_cluster_dcs: "{{ _restart_logging_nodes | intersect(_cluster_dcs.stdout) }}"
|
|
when:
|
|
when:
|
|
- not full_restart_cluster | bool
|
|
- not full_restart_cluster | bool
|
|
- - _es_node in _cluster_dcs.stdout
|
|
|
|
-
|
|
|
|
- ## we may need a new first pod to run against -- fetch them all again
|
|
|
|
- - command: >
|
|
|
|
- {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
|
|
|
|
- register: _cluster_pods
|
|
|
|
|
|
|
|
- - name: "Enable shard balancing for logging-{{ _cluster_component }} cluster"
|
|
|
|
- command: >
|
|
|
|
- {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_cluster/settings' -d '{ "transient": { "cluster.routing.allocation.enable" : "all" } }'
|
|
|
|
- register: _enable_output
|
|
|
|
- changed_when: "'\"acknowledged\":true' in _enable_output.stdout"
|
|
|
|
- when: _cluster_pods.stdout != ""
|
|
|
|
-
|
|
|
|
- # Reenable external communication for {{ _cluster_component }}
|
|
|
|
- - name: Reenable external communication for logging-{{ _cluster_component }}
|
|
|
|
- oc_service:
|
|
|
|
- state: present
|
|
|
|
- name: "logging-{{ _cluster_component }}"
|
|
|
|
- namespace: "{{ openshift_logging_elasticsearch_namespace }}"
|
|
|
|
- selector:
|
|
|
|
- component: "{{ _cluster_component }}"
|
|
|
|
- provider: openshift
|
|
|
|
- labels:
|
|
|
|
- logging-infra: 'support'
|
|
|
|
- ports:
|
|
|
|
- - port: 9200
|
|
|
|
- targetPort: "restapi"
|
|
|
|
- when:
|
|
|
|
- - full_restart_cluster | bool
|
|
|
|
|
|
+# remove temp dir
|
|
|
|
+- name: Cleaning up local temp dir
|
|
|
|
+ file:
|
|
|
|
+ path: "{{ _logging_handler_tempdir.stdout }}"
|
|
|
|
+ state: absent
|
|
|
|
+ changed_when: False
|
|
|
|
+ become: false
|