7 years ago · 770bc1d3d9
--- a/roles/openshift_logging/tasks/main.yaml
+++ b/roles/openshift_logging/tasks/main.yaml
@@ -97,7 +97,15 @@
 
				   - not openshift_logging_install_logging | default(false) | bool
			
 
				 
			
 
				 - name: Cleaning up local temp dir
			
 
				-  local_action: file path="{{local_tmp.stdout}}" state=absent
			
 
				+  local_action: file path="{{ local_tmp.stdout }}" state=absent
			
 
				+  tags: logging_cleanup
			
 
				+  changed_when: False
			
 
				+  become: false
			
 
				+
			
 
				+- name: Cleaning up temp dir
			
 
				+  file:
			
 
				+    path: "{{ mktemp.stdout }}"
			
 
				+    state: absent
			
 
				   tags: logging_cleanup
			
 
				   changed_when: False
			
 
				   become: false
			
--- a/roles/openshift_logging_elasticsearch/tasks/full_cluster_restart.yml
+++ b/roles/openshift_logging_elasticsearch/tasks/full_cluster_restart.yml
@@ -0,0 +1,219 @@
 
				+---
			
 
				+# Disable external communication for {{ _cluster_component }}
			
 
				+- name: Disable external communication for logging-{{ _cluster_component }}
			
 
				+  oc_service:
			
 
				+    state: present
			
 
				+    name: "logging-{{ _cluster_component }}"
			
 
				+    namespace: "{{ openshift_logging_elasticsearch_namespace }}"
			
 
				+    selector:
			
 
				+      component: "{{ _cluster_component }}"
			
 
				+      provider: openshift
			
 
				+      connection: blocked
			
 
				+    labels:
			
 
				+      logging-infra: 'support'
			
 
				+    ports:
			
 
				+    - port: 9200
			
 
				+      targetPort: "restapi"
			
 
				+
			
 
				+- command: >
			
 
				+    {{ openshift_client_binary }}
			
 
				+    --config={{ openshift.common.config_base }}/master/admin.kubeconfig
			
 
				+    get pod
			
 
				+    -l component={{ _cluster_component }},provider=openshift
			
 
				+    -n {{ openshift_logging_elasticsearch_namespace }}
			
 
				+    -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
			
 
				+  register: _cluster_pods
			
 
				+
			
 
				+- name: "Disable shard balancing for logging-{{ _cluster_component }} cluster"
			
 
				+  command: >
			
 
				+    {{ openshift_client_binary }}
			
 
				+    --config={{ openshift.common.config_base }}/master/admin.kubeconfig
			
 
				+    exec {{ _cluster_pods.stdout.split(' ')[0] }}
			
 
				+    -c elasticsearch
			
 
				+    -n {{ openshift_logging_elasticsearch_namespace }}
			
 
				+    -- es_util --query=_cluster/settings -XPUT -d '{ "transient": { "cluster.routing.allocation.enable" : "none" } }'
			
 
				+  register: _disable_output
			
 
				+  changed_when:
			
 
				+  - "_disable_output.stdout != ''"
			
 
				+  - (_disable_output.stdout | from_json)['acknowledged'] | bool
			
 
				+
			
 
				+# Flush ES
			
 
				+# This is documented as a best effort, if it fails, we are okay with that
			
 
				+- name: "Flushing for logging-{{ _cluster_component }} cluster"
			
 
				+  command: >
			
 
				+    {{ openshift_client_binary }}
			
 
				+    --config={{ openshift.common.config_base }}/master/admin.kubeconfig
			
 
				+    exec {{ _cluster_pods.stdout.split(' ')[0] }}
			
 
				+    -c elasticsearch
			
 
				+    -n {{ openshift_logging_elasticsearch_namespace }}
			
 
				+    -- es_util --query=_flush/synced -XPOST
			
 
				+  register: _flush_output
			
 
				+  changed_when:
			
 
				+  - "_flush_output.stdout != ''"
			
 
				+  - (_flush_output.stdout | from_json)['_shards']['successful'] > 0
			
 
				+  failed_when: false
			
 
				+
			
 
				+# Stop all nodes, then rollout all nodes
			
 
				+- name: Ready all nodes for scale down
			
 
				+  shell: >
			
 
				+    {{ openshift_client_binary }}
			
 
				+    --config={{ openshift.common.config_base }}/master/admin.kubeconfig
			
 
				+    -n {{ openshift_logging_elasticsearch_namespace }}
			
 
				+    annotate "dc/{{ _es_node }}"
			
 
				+    prior-replica-count=$({{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig -n {{ openshift_logging_elasticsearch_namespace }} get "dc/{{ _es_node }}" -o jsonpath='{.spec.replicas}')
			
 
				+    --overwrite
			
 
				+  with_items: "{{ logging_restart_cluster_dcs }}"
			
 
				+  loop_control:
			
 
				+    loop_var: _es_node
			
 
				+
			
 
				+- name: Scale down all nodes
			
 
				+  oc_scale:
			
 
				+    namespace: "{{ openshift_logging_elasticsearch_namespace }}"
			
 
				+    kind: dc
			
 
				+    name: "{{ _es_node }}"
			
 
				+    replicas: 0
			
 
				+  with_items: "{{ logging_restart_cluster_dcs }}"
			
 
				+  loop_control:
			
 
				+    loop_var: _es_node
			
 
				+
			
 
				+- name: Rollout all updated DCs
			
 
				+  command: >
			
 
				+    {{ openshift_client_binary }}
			
 
				+    --config={{ openshift.common.config_base }}/master/admin.kubeconfig
			
 
				+    rollout latest {{ _es_node }}
			
 
				+    -n {{ openshift_logging_elasticsearch_namespace }}
			
 
				+  with_items: "{{ logging_restart_cluster_dcs }}"
			
 
				+  loop_control:
			
 
				+    loop_var: _es_node
			
 
				+
			
 
				+- name: Scale up all nodes to previous replicas
			
 
				+  shell: >
			
 
				+    {{ openshift_client_binary }}
			
 
				+    --config={{ openshift.common.config_base }}/master/admin.kubeconfig
			
 
				+    -n {{ openshift_logging_elasticsearch_namespace }}
			
 
				+    scale "dc/{{ _es_node }}"
			
 
				+    --replicas=$({{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig -n {{ openshift_logging_elasticsearch_namespace }} get "dc/{{ _es_node }}" -o jsonpath='{.metadata.annotations.prior-replica-count}')
			
 
				+  with_items: "{{ logging_restart_cluster_dcs }}"
			
 
				+  loop_control:
			
 
				+    loop_var: _es_node
			
 
				+
			
 
				+# Wait for all nodes to be deployed/ready again
			
 
				+- name: "Waiting for {{ _es_node }} to finish scaling up"
			
 
				+  oc_obj:
			
 
				+    state: list
			
 
				+    name: "{{ _es_node }}"
			
 
				+    namespace: "{{ openshift_logging_elasticsearch_namespace }}"
			
 
				+    kind: dc
			
 
				+  register: _dc_output
			
 
				+  until:
			
 
				+  - _dc_output.results.results[0].status is defined
			
 
				+  - _dc_output.results.results[0].status.readyReplicas is defined
			
 
				+  - _dc_output.results.results[0].status.readyReplicas > 0
			
 
				+  - _dc_output.results.results[0].status.updatedReplicas is defined
			
 
				+  - _dc_output.results.results[0].status.updatedReplicas > 0
			
 
				+  retries: 60
			
 
				+  delay: 30
			
 
				+  with_items: "{{ logging_restart_cluster_dcs }}"
			
 
				+  loop_control:
			
 
				+    loop_var: _es_node
			
 
				+  failed_when: false
			
 
				+
			
 
				+- when:
			
 
				+  - _dc_output.failed is defined
			
 
				+  - _dc_output.failed
			
 
				+  name: Manual intervention required
			
 
				+  run_once: true
			
 
				+  set_stats:
			
 
				+    data:
			
 
				+      installer_phase_logging:
			
 
				+        message: "Node in cluster logging-{{ _cluster_component }} was unable to rollout. Please see documentation regarding recovering during a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."
			
 
				+
			
 
				+- command: >
			
 
				+    {{ openshift_client_binary }}
			
 
				+    --config={{ openshift.common.config_base }}/master/admin.kubeconfig
			
 
				+    get pod
			
 
				+    -l component={{ _cluster_component }},provider=openshift
			
 
				+    -n {{ openshift_logging_elasticsearch_namespace }}
			
 
				+    -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
			
 
				+  register: _cluster_pods
			
 
				+
			
 
				+- name: Wait for cluster to be in at least yellow state
			
 
				+  command: >
			
 
				+    {{ openshift_client_binary }}
			
 
				+    --config={{ openshift.common.config_base }}/master/admin.kubeconfig
			
 
				+    exec {{ _cluster_pods.stdout.split(' ')[0] }}
			
 
				+    -c elasticsearch
			
 
				+    -n {{ openshift_logging_elasticsearch_namespace }}
			
 
				+    -- es_cluster_health
			
 
				+  register: _pod_status
			
 
				+  until:
			
 
				+  - "_pod_status.stdout != ''"
			
 
				+  - (_pod_status.stdout | from_json)['status'] in ['yellow', 'green']
			
 
				+  retries: "{{ __elasticsearch_ready_retries }}"
			
 
				+  delay: 30
			
 
				+  changed_when: false
			
 
				+  failed_when: false
			
 
				+
			
 
				+- when:
			
 
				+  - _pod_status.failed is defined
			
 
				+  - _pod_status.failed
			
 
				+  run_once: true
			
 
				+  set_stats:
			
 
				+    data:
			
 
				+      installer_phase_logging:
			
 
				+        message: "Cluster logging-{{ _cluster_component }} was unable to recover to at least a yellow state. Please see documentation regarding recovering during a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."
			
 
				+
			
 
				+- name: "Enable shard balancing for logging-{{ _cluster_component }} cluster"
			
 
				+  command: >
			
 
				+    {{ openshift_client_binary }}
			
 
				+    --config={{ openshift.common.config_base }}/master/admin.kubeconfig
			
 
				+    exec {{ _cluster_pods.stdout.split(' ')[0] }}
			
 
				+    -c elasticsearch
			
 
				+    -n {{ openshift_logging_elasticsearch_namespace }}
			
 
				+    -- es_util --query=_cluster/settings -XPUT -d '{ "transient": { "cluster.routing.allocation.enable" : "all" } }'
			
 
				+  register: _enable_output
			
 
				+  changed_when:
			
 
				+  - "_enable_output.stdout != ''"
			
 
				+  - (_enable_output.stdout | from_json)['acknowledged'] | bool
			
 
				+
			
 
				+# Skip healthcheck for a full cluster restart always since it could take a long time to recover?
			
 
				+- name: "Waiting for ES node {{ _es_node }} health to be in ['green']"
			
 
				+  command: >
			
 
				+    {{ openshift_client_binary }}
			
 
				+    --config={{ openshift.common.config_base }}/master/admin.kubeconfig
			
 
				+    exec {{ _cluster_pods.stdout.split(' ')[0] }}
			
 
				+    -c elasticsearch
			
 
				+    -n {{ openshift_logging_elasticsearch_namespace }}
			
 
				+    -- es_cluster_health
			
 
				+  register: _pod_status
			
 
				+  until:
			
 
				+  - "_pod_status.stdout != ''"
			
 
				+  - (_pod_status.stdout | from_json)['status'] in ['green']
			
 
				+  retries: "{{ __elasticsearch_ready_retries }}"
			
 
				+  delay: 30
			
 
				+  changed_when: false
			
 
				+  failed_when: false
			
 
				+
			
 
				+- when:
			
 
				+  - _pod_status.failed is defined
			
 
				+  - _pod_status.failed
			
 
				+  run_once: true
			
 
				+  set_stats:
			
 
				+    data:
			
 
				+      installer_phase_logging:
			
 
				+        message: "Cluster logging-{{ _cluster_component }} was unable to recover to a green state. Please see documentation regarding recovering during a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."
			
 
				+
			
 
				+# Reenable external communication for {{ _cluster_component }}
			
 
				+- name: Reenable external communication for logging-{{ _cluster_component }}
			
 
				+  oc_service:
			
 
				+    state: present
			
 
				+    name: "logging-{{ _cluster_component }}"
			
 
				+    namespace: "{{ openshift_logging_elasticsearch_namespace }}"
			
 
				+    selector:
			
 
				+      component: "{{ _cluster_component }}"
			
 
				+      provider: openshift
			
 
				+    labels:
			
 
				+      logging-infra: 'support'
			
 
				+    ports:
			
 
				+    - port: 9200
			
 
				+      targetPort: "restapi"
			
--- a/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml
+++ b/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml
@@ -1,120 +1,83 @@
 
				 ---
			
 
				 ## get all pods for the cluster
			
 
				 - command: >
			
 
				-    {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
			
 
				+    {{ openshift_client_binary }}
			
 
				+    --config={{ openshift.common.config_base }}/master/admin.kubeconfig
			
 
				+    get pod
			
 
				+    -l component={{ _cluster_component }},provider=openshift
			
 
				+    -n {{ openshift_logging_elasticsearch_namespace }}
			
 
				+    -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
			
 
				   register: _cluster_pods
			
 
				 
			
 
				-### Check for cluster state before making changes -- if its red then we don't want to continue
			
 
				+# make a temp dir for admin certs
			
 
				+- command: mktemp -d /tmp/openshift-logging-ansible-XXXXXX
			
 
				+  register: _logging_handler_tempdir
			
 
				+  changed_when: False
			
 
				+  check_mode: no
			
 
				+
			
 
				+- name: Exporting secrets to use communicating with the ES cluster
			
 
				+  command: >
			
 
				+    {{ openshift_client_binary }}
			
 
				+    --config={{ openshift.common.config_base }}/master/admin.kubeconfig
			
 
				+    extract secret/logging-elasticsearch
			
 
				+    -n {{ openshift_logging_elasticsearch_namespace }}
			
 
				+    --keys=admin-cert --keys=admin-key
			
 
				+    --to={{ _logging_handler_tempdir.stdout }}
			
 
				+
			
 
				+### Check for cluster state before making changes -- if its red, yellow or missing nodes then we don't want to continue
			
 
				 - name: "Checking current health for {{ _es_node }} cluster"
			
 
				-  shell: >
			
 
				-    {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig exec "{{ _cluster_pods.stdout.split(' ')[0] }}" -c elasticsearch -n "{{ openshift_logging_elasticsearch_namespace }}" -- es_cluster_health
			
 
				+  command: >
			
 
				+    curl -s -k
			
 
				+    --cert {{ _logging_handler_tempdir.stdout }}/admin-cert
			
 
				+    --key {{ _logging_handler_tempdir.stdout }}/admin-key
			
 
				+    https://logging-{{ _cluster_component }}.{{ openshift_logging_elasticsearch_namespace }}.svc:9200/_cluster/health?pretty
			
 
				   register: _pod_status
			
 
				   when: _cluster_pods.stdout_lines | count > 0
			
 
				 
			
 
				 - when:
			
 
				   - _pod_status.stdout is defined
			
 
				-  - (_pod_status.stdout | from_json)['status'] in ['red']
			
 
				+  - (_pod_status.stdout | from_json)['status'] in ['yellow', 'red'] or (_pod_status.stdout | from_json)['number_of_nodes'] != _cluster_pods.stdout_lines | count
			
 
				   block:
			
 
				   - name: Set Logging message to manually restart
			
 
				     run_once: true
			
 
				     set_stats:
			
 
				       data:
			
 
				         installer_phase_logging:
			
 
				-          message: "Cluster logging-{{ _cluster_component }} was in a red state and will not be automatically restarted. Please see documentation regarding doing a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."
			
 
				+          message: "Cluster logging-{{ _cluster_component }} was not in an optimal state and will not be automatically restarted. Please see documentation regarding doing a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."
			
 
				 
			
 
				-  - debug: msg="Cluster logging-{{ _cluster_component }} was in a red state and will not be automatically restarted. Please see documentation regarding doing a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."
			
 
				+  - debug: msg="Cluster logging-{{ _cluster_component }} was not in an optimal state and will not be automatically restarted. Please see documentation regarding doing a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."
			
 
				 
			
 
				-- when: _pod_status.stdout is undefined or (_pod_status.stdout | from_json)['status'] in ['green', 'yellow']
			
 
				+- when: _pod_status.stdout is undefined or ( (_pod_status.stdout | from_json)['status'] in ['green'] and (_pod_status.stdout | from_json)['number_of_nodes'] == _cluster_pods.stdout_lines | count )
			
 
				   block:
			
 
				-  # Disable external communication for {{ _cluster_component }}
			
 
				-  - name: Disable external communication for logging-{{ _cluster_component }}
			
 
				-    oc_service:
			
 
				-      state: present
			
 
				-      name: "logging-{{ _cluster_component }}"
			
 
				-      namespace: "{{ openshift_logging_elasticsearch_namespace }}"
			
 
				-      selector:
			
 
				-        component: "{{ _cluster_component }}"
			
 
				-        provider: openshift
			
 
				-        connection: blocked
			
 
				-      labels:
			
 
				-        logging-infra: 'support'
			
 
				-      ports:
			
 
				-      - port: 9200
			
 
				-        targetPort: "restapi"
			
 
				-    when:
			
 
				-    - full_restart_cluster | bool
			
 
				-
			
 
				-  - name: "Disable shard balancing for logging-{{ _cluster_component }} cluster"
			
 
				-    command: >
			
 
				-      {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_cluster/settings' -d '{ "transient": { "cluster.routing.allocation.enable" : "none" } }'
			
 
				-    register: _disable_output
			
 
				-    changed_when: "'\"acknowledged\":true' in _disable_output.stdout"
			
 
				-    when: _cluster_pods.stdout_lines | count > 0
			
 
				-
			
 
				-  # Flush ES
			
 
				-  - name: "Flushing for logging-{{ _cluster_component }} cluster"
			
 
				-    command: >
			
 
				-      {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_flush/synced'
			
 
				-    register: _flush_output
			
 
				-    changed_when: "'\"acknowledged\":true' in _flush_output.stdout"
			
 
				-    when:
			
 
				-    - _cluster_pods.stdout_lines | count > 0
			
 
				-    - full_restart_cluster | bool
			
 
				-
			
 
				   - command: >
			
 
				-      {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig get dc -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name}
			
 
				+      {{ openshift_client_binary }}
			
 
				+      --config={{ openshift.common.config_base }}/master/admin.kubeconfig
			
 
				+      get dc
			
 
				+      -l component={{ _cluster_component }},provider=openshift
			
 
				+      -n {{ openshift_logging_elasticsearch_namespace }}
			
 
				+      -o jsonpath={.items[*].metadata.name}
			
 
				     register: _cluster_dcs
			
 
				 
			
 
				-  # If we are currently restarting the "es" cluster we want to check if we are scaling up the number of es nodes
			
 
				-  # If we are currently restarting the "es-ops" cluster we want to check if we are scaling up the number of ops nodes
			
 
				-  # If we've created a new node for that cluster then the appropriate variable will be true, otherwise we default to false
			
 
				-  - set_fact:
			
 
				-      _skip_healthcheck: "{{ ( __logging_scale_up | default(false) ) if _cluster_component == 'es' else ( __logging_ops_scale_up | default(false) ) }}"
			
 
				-
			
 
				   ## restart all dcs for full restart
			
 
				-  - name: "Restart ES node {{ _es_node }}"
			
 
				-    include_tasks: restart_es_node.yml
			
 
				-    with_items: "{{ _cluster_dcs.stdout_lines }}"
			
 
				-    loop_control:
			
 
				-      loop_var: _es_node
			
 
				+  - name: "Performing full cluster restart for {{ _cluster_component }} cluster"
			
 
				+    include_tasks: full_cluster_restart.yml
			
 
				+    vars:
			
 
				+      logging_restart_cluster_dcs: "{{ _cluster_dcs.stdout_lines }}"
			
 
				     when:
			
 
				     - full_restart_cluster | bool
			
 
				 
			
 
				-  ## restart the node if it's dc is in the list of nodes to restart?
			
 
				-  - name: "Restart ES node {{ _es_node }}"
			
 
				-    include_tasks: restart_es_node.yml
			
 
				-    with_items: "{{ _restart_logging_nodes }}"
			
 
				-    loop_control:
			
 
				-      loop_var: _es_node
			
 
				+  ## restart the node if it's dc is in the list of nodes to restart
			
 
				+  - name: "Performing rolling cluster restart for {{ _cluster_component }} cluster"
			
 
				+    include_tasks: rolling_cluster_restart.yml
			
 
				+    vars:
			
 
				+      logging_restart_cluster_dcs: "{{ _restart_logging_nodes | intersect(_cluster_dcs.stdout) }}"
			
 
				     when:
			
 
				     - not full_restart_cluster | bool
			
 
				-    - _es_node in _cluster_dcs.stdout
			
 
				-
			
 
				-  ## we may need a new first pod to run against -- fetch them all again
			
 
				-  - command: >
			
 
				-      {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
			
 
				-    register: _cluster_pods
			
 
				 
			
 
				-  - name: "Enable shard balancing for logging-{{ _cluster_component }} cluster"
			
 
				-    command: >
			
 
				-      {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_cluster/settings' -d '{ "transient": { "cluster.routing.allocation.enable" : "all" } }'
			
 
				-    register: _enable_output
			
 
				-    changed_when: "'\"acknowledged\":true' in _enable_output.stdout"
			
 
				-    when: _cluster_pods.stdout != ""
			
 
				-
			
 
				-  # Reenable external communication for {{ _cluster_component }}
			
 
				-  - name: Reenable external communication for logging-{{ _cluster_component }}
			
 
				-    oc_service:
			
 
				-      state: present
			
 
				-      name: "logging-{{ _cluster_component }}"
			
 
				-      namespace: "{{ openshift_logging_elasticsearch_namespace }}"
			
 
				-      selector:
			
 
				-        component: "{{ _cluster_component }}"
			
 
				-        provider: openshift
			
 
				-      labels:
			
 
				-        logging-infra: 'support'
			
 
				-      ports:
			
 
				-      - port: 9200
			
 
				-        targetPort: "restapi"
			
 
				-    when:
			
 
				-    - full_restart_cluster | bool
			
 
				+# remove temp dir
			
 
				+- name: Cleaning up local temp dir
			
 
				+  file:
			
 
				+    path: "{{ _logging_handler_tempdir.stdout }}"
			
 
				+    state: absent
			
 
				+  changed_when: False
			
 
				+  become: false
			
--- a/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml
+++ b/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml
@@ -1,10 +1,27 @@
 
				 ---
			
 
				+# we want to ignore if this fails because it is possible to fail on the first node when installing -- should we not do this if we're skipping health checks? -- is this required for cluster sanity?
			
 
				+- name: "Disable shard balancing for logging-{{ _cluster_component }} cluster"
			
 
				+  command: >
			
 
				+    curl -s -k
			
 
				+    --cert {{ _logging_handler_tempdir.stdout }}/admin-cert
			
 
				+    --key {{ _logging_handler_tempdir.stdout }}/admin-key
			
 
				+    -XPUT 'https://logging-{{ _cluster_component }}.{{ openshift_logging_elasticsearch_namespace }}.svc:9200/_cluster/settings'
			
 
				+    -d '{ "transient": { "cluster.routing.allocation.enable" : "none" } }'
			
 
				+  register: _disable_output
			
 
				+  changed_when:
			
 
				+    - "_disable_output.stdout != ''"
			
 
				+    - (_disable_output.stdout | from_json)['acknowledged'] | bool
			
 
				+  failed_when: false
			
 
				+
			
 
				 - name: "Rolling out new pod(s) for {{ _es_node }}"
			
 
				   command: >
			
 
				-    {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig rollout latest {{ _es_node }} -n {{ openshift_logging_elasticsearch_namespace }}
			
 
				+    {{ openshift_client_binary }}
			
 
				+    --config={{ openshift.common.config_base }}/master/admin.kubeconfig
			
 
				+    rollout latest {{ _es_node }}
			
 
				+    -n {{ openshift_logging_elasticsearch_namespace }}
			
 
				 
			
 
				-- when: not _skip_healthcheck | bool
			
 
				-  name: "Waiting for {{ _es_node }} to finish scaling up"
			
 
				+# always wait for this to scale up
			
 
				+- name: "Waiting for {{ _es_node }} to finish scaling up"
			
 
				   oc_obj:
			
 
				     state: list
			
 
				     name: "{{ _es_node }}"
			
@@ -19,23 +36,62 @@
 
				     - _dc_output.results.results[0].status.updatedReplicas > 0
			
 
				   retries: 60
			
 
				   delay: 30
			
 
				+  failed_when: false
			
 
				 
			
 
				-- when: not _skip_healthcheck | bool
			
 
				-  name: Gettings name(s) of replica pod(s)
			
 
				+- when:
			
 
				+    - _dc_output.failed is defined
			
 
				+    - _dc_output.failed
			
 
				+  run_once: true
			
 
				+  set_stats:
			
 
				+    data:
			
 
				+      installer_phase_logging:
			
 
				+        message: "Node {{ _es_node}} in cluster logging-{{ _cluster_component }} was unable to rollout. Please see documentation regarding recovering during a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."
			
 
				+
			
 
				+- name: "Enable shard balancing for logging-{{ _cluster_component }} cluster"
			
 
				   command: >
			
 
				-    {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig get pods -l deploymentconfig={{ _es_node }} -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name}
			
 
				-  register: _pods
			
 
				-  changed_when: false
			
 
				+    curl -s -k
			
 
				+    --cert {{ _logging_handler_tempdir.stdout }}/admin-cert
			
 
				+    --key {{ _logging_handler_tempdir.stdout }}/admin-key
			
 
				+    -XPUT 'https://logging-{{ _cluster_component }}.{{ openshift_logging_elasticsearch_namespace }}.svc:9200/_cluster/settings'
			
 
				+    -d '{ "transient": { "cluster.routing.allocation.enable" : "all" } }'
			
 
				+  register: _enable_output
			
 
				+  changed_when:
			
 
				+    - "_enable_output.stdout != ''"
			
 
				+    - (_enable_output.stdout | from_json)['acknowledged'] | bool
			
 
				+
			
 
				+# evaluate the RC for _dc_output
			
 
				+- name: Evaluating status of rolled out pod
			
 
				+  assert:
			
 
				+    that: not _dc_output.failed
			
 
				+    msg: "Node {{ _es_node}} in cluster logging-{{ _cluster_component }} was unable to rollout. Please see documentation regarding recovering during a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."
			
 
				 
			
 
				 - when: not _skip_healthcheck | bool
			
 
				-  name: "Waiting for ES node {{ _es_node }} health to be in ['green', 'yellow']"
			
 
				-  shell: >
			
 
				-    {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig exec "{{ _pod }}" -c elasticsearch -n "{{ openshift_logging_elasticsearch_namespace }}" -- es_cluster_health
			
 
				-  with_items: "{{ _pods.stdout.split(' ') }}"
			
 
				-  loop_control:
			
 
				-    loop_var: _pod
			
 
				+  name: "Waiting for ES node {{ _es_node }} health to be in ['green']"
			
 
				+  command: >
			
 
				+    curl -s -k
			
 
				+    --cert {{ _logging_handler_tempdir.stdout }}/admin-cert
			
 
				+    --key {{ _logging_handler_tempdir.stdout }}/admin-key
			
 
				+    https://logging-{{ _cluster_component }}.{{ openshift_logging_elasticsearch_namespace }}.svc:9200/_cluster/health?pretty
			
 
				   register: _pod_status
			
 
				-  until: (_pod_status.stdout | from_json)['status'] in ['green', 'yellow']
			
 
				+  until:
			
 
				+    - "_pod_status.stdout != ''"
			
 
				+    - (_pod_status.stdout | from_json)['status'] in ['green']
			
 
				   retries: "{{ __elasticsearch_ready_retries }}"
			
 
				   delay: 30
			
 
				   changed_when: false
			
 
				+  failed_when: false
			
 
				+
			
 
				+# evaluate RC for _pod_status
			
 
				+- when:
			
 
				+    - _pod_status.failed is defined
			
 
				+    - _pod_status.failed
			
 
				+  run_once: true
			
 
				+  set_stats:
			
 
				+    data:
			
 
				+      installer_phase_logging:
			
 
				+        message: "Cluster logging-{{ _cluster_component }} was unable to recover to a green state. Please see documentation regarding recovering during a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."
			
 
				+
			
 
				+- name: Evaluating cluster health
			
 
				+  assert:
			
 
				+    that: not _pod_status.failed
			
 
				+    msg: "Cluster logging-{{ _cluster_component }} was unable to recover to a green state. Please see documentation regarding recovering during a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."
			
--- a/roles/openshift_logging_elasticsearch/tasks/rolling_cluster_restart.yml
+++ b/roles/openshift_logging_elasticsearch/tasks/rolling_cluster_restart.yml
@@ -0,0 +1,26 @@
 
				+---
			
 
				+# If we are currently restarting the "es" cluster we want to check if we are scaling up the number of es nodes
			
 
				+# If we are currently restarting the "es-ops" cluster we want to check if we are scaling up the number of ops nodes
			
 
				+# If we've created a new node for that cluster then the appropriate variable will be true, otherwise we default to false
			
 
				+- set_fact:
			
 
				+    _skip_healthcheck: "{{ ( __logging_scale_up | default(false) ) if _cluster_component == 'es' else ( __logging_ops_scale_up | default(false) ) }}"
			
 
				+
			
 
				+# Flush ES
			
 
				+# It is possible for this to fail on a brand new cluster, so don't fail then
			
 
				+- name: "Flushing for logging-{{ _cluster_component }} cluster"
			
 
				+  command: >
			
 
				+    curl -s -k
			
 
				+    --cert {{ _logging_handler_tempdir.stdout }}/admin-cert
			
 
				+    --key {{ _logging_handler_tempdir.stdout }}/admin-key
			
 
				+    -XPOST 'https://logging-{{ _cluster_component }}.{{ openshift_logging_elasticsearch_namespace }}.svc:9200/_flush/synced'
			
 
				+  register: _flush_output
			
 
				+  changed_when:
			
 
				+  - "_flush_output.stdout != ''"
			
 
				+  - (_flush_output.stdout | from_json)['_shards']['successful'] > 0
			
 
				+  failed_when: false
			
 
				+
			
 
				+# Loop over each DC for restart_es_node.yml
			
 
				+- include_tasks: restart_es_node.yml
			
 
				+  with_items: "{{ logging_restart_cluster_dcs }}"
			
 
				+  loop_control:
			
 
				+    loop_var: _es_node
			
--- a/roles/openshift_logging_elasticsearch/vars/main.yml
+++ b/roles/openshift_logging_elasticsearch/vars/main.yml
@@ -5,7 +5,7 @@ __kibana_index_modes: ["unique", "shared_ops"]
 
				 
			
 
				 __es_local_curl: "curl -s --cacert /etc/elasticsearch/secret/admin-ca --cert /etc/elasticsearch/secret/admin-cert --key /etc/elasticsearch/secret/admin-key"
			
 
				 
			
 
				-__elasticsearch_ready_retries: "{{ openshift_logging_elasticsearch_poll_timeout_minutes | default(20) | int * 2 }}"
			
 
				+__elasticsearch_ready_retries: "{{ openshift_logging_elasticsearch_poll_timeout_minutes | default(60) | int * 2 }}"
			
 
				 
			
 
				 # TODO: integrate these
			
 
				 es_node_quorum: "{{ openshift_logging_elasticsearch_replica_count | int/2 + 1 }}"