Browse Source

Adding logic to do a full cluster restart if we are incrementing our major versions of ES

Eric Wolinetz 7 years ago
parent
commit
72c57c0d9c

+ 2 - 0
roles/openshift_logging_elasticsearch/tasks/determine_version.yaml

@@ -15,3 +15,5 @@
 - fail:
     msg: Invalid version specified for Elasticsearch
   when: es_version not in __allowed_es_versions
+
+- include_tasks: get_es_version.yml

+ 42 - 0
roles/openshift_logging_elasticsearch/tasks/get_es_version.yml

@@ -0,0 +1,42 @@
+---
+- command: >
+    oc get pod -l component=es,provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name}
+  register: _cluster_pods
+
+- name: "Getting ES version for logging-es cluster"
+  command: >
+    oc exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XGET 'https://localhost:9200/'
+  register: _curl_output
+  when: _cluster_pods.stdout_lines | count > 0
+
+- command: >
+    oc get pod -l component=es-ops,provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name}
+  register: _ops_cluster_pods
+
+- name: "Getting ES version for logging-es-ops cluster"
+  command: >
+    oc exec {{ _ops_cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XGET 'https://localhost:9200/'
+  register: _ops_curl_output
+  when: _ops_cluster_pods.stdout_lines | count > 0
+
+- set_fact:
+    _es_output: "{{ _curl_output.stdout | from_json }}"
+  when: _curl_output.stdout is defined
+
+- set_fact:
+    _es_ops_output: "{{ _ops_curl_output.stdout | from_json }}"
+  when: _ops_curl_output.stdout is defined
+
+- set_fact:
+    _es_installed_version: "{{ _es_output.version.number }}"
+  when:
+    - _es_output is defined
+    - _es_output.version is defined
+    - _es_output.version.number is defined
+
+- set_fact:
+    _es_ops_installed_version: "{{ _es_ops_output.version.number }}"
+  when:
+    - _es_ops_output is defined
+    - _es_ops_output.version is defined
+    - _es_ops_output.version.number is defined

+ 14 - 0
roles/openshift_logging_elasticsearch/tasks/main.yaml

@@ -32,6 +32,20 @@
 
 - include_tasks: determine_version.yaml
 
+- set_fact:
+    full_restart_cluster: True
+  when:
+    - _es_installed_version is defined
+    - __es_version.split('.')[0] | int >= 5
+    - _es_installed_version.split('.')[0] | int < __es_version.split('.')[0] | int
+
+- set_fact:
+    full_restart_cluster: True
+  when:
+    - _es_ops_installed_version is defined
+    - __es_version.split('.')[0] | int >= 5
+    - _es_ops_installed_version.split('.')[0] | int < __es_version.split('.')[0] | int
+
 # allow passing in a tempdir
 - name: Create temp directory for doing work in
   command: mktemp -d /tmp/openshift-logging-ansible-XXXXXX

+ 22 - 1
roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml

@@ -11,17 +11,38 @@
   changed_when: "'\"acknowledged\":true' in _disable_output.stdout"
   when: _cluster_pods.stdout_lines | count > 0
 
+# Flush ES
+- name: "Flushing for logging-{{ _cluster_component }} cluster"
+  command: >
+    oc exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_flush/synced'
+  register: _flush_output
+  changed_when: "'\"acknowledged\":true' in _flush_output.stdout"
+  when:
+    - _cluster_pods.stdout_lines | count > 0
+    - full_restart_cluster | bool
+
 - command: >
     oc get dc -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name}
   register: _cluster_dcs
 
+## restart all dcs for full restart
+- name: "Restart ES node {{ _es_node }}"
+  include_tasks: restart_es_node.yml
+  with_items: "{{ _cluster_dcs }}"
+  loop_control:
+    loop_var: _es_node
+  when:
+    - full_restart_cluster | bool
+
 ## restart the node if it's dc is in the list of nodes to restart?
 - name: "Restart ES node {{ _es_node }}"
   include_tasks: restart_es_node.yml
   with_items: "{{ _restart_logging_nodes }}"
   loop_control:
     loop_var: _es_node
-  when: _es_node in _cluster_dcs.stdout
+  when:
+    - not full_restart_cluster | bool
+    - _es_node in _cluster_dcs.stdout
 
 ## we may need a new first pod to run against -- fetch them all again
 - command: >

+ 2 - 0
roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml

@@ -14,6 +14,8 @@
     - _dc_output.results.results[0].status is defined
     - _dc_output.results.results[0].status.readyReplicas is defined
     - _dc_output.results.results[0].status.readyReplicas > 0
+    - _dc_output.results.results[0].status.updatedReplicas is defined
+    - _dc_output.results.results[0].status.updatedReplicas > 0
   retries: 60
   delay: 30
 

+ 2 - 0
roles/openshift_logging_elasticsearch/vars/main.yml

@@ -4,6 +4,7 @@ __allowed_es_versions: ["3_5", "3_6", "3_7", "3_8"]
 __allowed_es_types: ["data-master", "data-client", "master", "client"]
 __es_log_appenders: ['file', 'console']
 __kibana_index_modes: ["unique", "shared_ops"]
+__es_version: "2.4.4"
 
 __es_local_curl: "curl -s --cacert /etc/elasticsearch/secret/admin-ca --cert /etc/elasticsearch/secret/admin-cert --key /etc/elasticsearch/secret/admin-key"
 
@@ -14,3 +15,4 @@ es_min_masters_default: "{{ (openshift_logging_elasticsearch_replica_count | int
 es_min_masters: "{{ (openshift_logging_elasticsearch_replica_count == 1) | ternary(1, es_min_masters_default) }}"
 es_recover_after_nodes: "{{ openshift_logging_elasticsearch_replica_count | int }}"
 es_recover_expected_nodes: "{{ openshift_logging_elasticsearch_replica_count | int }}"
+full_restart_cluster: False