--- # we want to ignore if this fails because it is possible to fail on the first node when installing -- should we not do this if we're skipping health checks? -- is this required for cluster sanity? - name: "Disable shard balancing for logging-{{ _cluster_component }} cluster" command: > curl -s -k --cert {{ _logging_handler_tempdir.stdout }}/admin-cert --key {{ _logging_handler_tempdir.stdout }}/admin-key -XPUT 'https://logging-{{ _cluster_component }}.{{ openshift_logging_elasticsearch_namespace }}.svc:9200/_cluster/settings' -d '{ "transient": { "cluster.routing.allocation.enable" : "none" } }' register: _disable_output changed_when: - "_disable_output.stdout != ''" - (_disable_output.stdout | from_json)['acknowledged'] | bool failed_when: false - name: "Rolling out new pod(s) for {{ _es_node }}" command: > {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig rollout latest {{ _es_node }} -n {{ openshift_logging_elasticsearch_namespace }} # always wait for this to scale up - name: "Waiting for {{ _es_node }} to finish scaling up" oc_obj: state: list name: "{{ _es_node }}" namespace: "{{ openshift_logging_elasticsearch_namespace }}" kind: dc register: _dc_output until: - _dc_output.results.results[0].status is defined - _dc_output.results.results[0].status.readyReplicas is defined - _dc_output.results.results[0].status.readyReplicas > 0 - _dc_output.results.results[0].status.updatedReplicas is defined - _dc_output.results.results[0].status.updatedReplicas > 0 retries: 60 delay: 30 failed_when: false - when: - _dc_output.failed is defined - _dc_output.failed run_once: true set_stats: data: installer_phase_logging: message: "Node {{ _es_node}} in cluster logging-{{ _cluster_component }} was unable to rollout. Please see documentation regarding recovering during a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart." - name: "Enable shard balancing for logging-{{ _cluster_component }} cluster" command: > curl -s -k --cert {{ _logging_handler_tempdir.stdout }}/admin-cert --key {{ _logging_handler_tempdir.stdout }}/admin-key -XPUT 'https://logging-{{ _cluster_component }}.{{ openshift_logging_elasticsearch_namespace }}.svc:9200/_cluster/settings' -d '{ "transient": { "cluster.routing.allocation.enable" : "all" } }' register: _enable_output changed_when: - "_enable_output.stdout != ''" - (_enable_output.stdout | from_json)['acknowledged'] | bool # evaluate the RC for _dc_output - name: Evaluating status of rolled out pod assert: that: _dc_output.failed is undefined or not _dc_output.failed msg: "Node {{ _es_node}} in cluster logging-{{ _cluster_component }} was unable to rollout. Please see documentation regarding recovering during a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart." - when: not _skip_healthcheck | bool name: "Waiting for ES node {{ _es_node }} health to be in ['green']" command: > curl -s -k --cert {{ _logging_handler_tempdir.stdout }}/admin-cert --key {{ _logging_handler_tempdir.stdout }}/admin-key https://logging-{{ _cluster_component }}.{{ openshift_logging_elasticsearch_namespace }}.svc:9200/_cluster/health?pretty register: _pod_status until: - "_pod_status.stdout != ''" - (_pod_status.stdout | from_json)['status'] in ['green'] retries: "{{ __elasticsearch_ready_retries }}" delay: 30 changed_when: false failed_when: false # evaluate RC for _pod_status - when: - _pod_status.failed is defined - _pod_status.failed run_once: true set_stats: data: installer_phase_logging: message: "Cluster logging-{{ _cluster_component }} was unable to recover to a green state. Please see documentation regarding recovering during a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart." - name: Evaluating cluster health assert: that: _pod_status.failed is undefined or not _pod_status.failed msg: "Cluster logging-{{ _cluster_component }} was unable to recover to a green state. Please see documentation regarding recovering during a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."