rolling_cluster_restart.yml 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. ---
  2. # If we are currently restarting the "es" cluster we want to check if we are scaling up the number of es nodes
  3. # If we are currently restarting the "es-ops" cluster we want to check if we are scaling up the number of ops nodes
  4. # If we've created a new node for that cluster then the appropriate variable will be true, otherwise we default to false
  5. - set_fact:
  6. _skip_healthcheck: "{{ ( __logging_scale_up | default(false) ) if _cluster_component == 'es' else ( __logging_ops_scale_up | default(false) ) }}"
  7. # Flush ES
  8. # It is possible for this to fail on a brand new cluster, so don't fail then
  9. - name: "Flushing for logging-{{ _cluster_component }} cluster"
  10. command: >
  11. curl -s -k
  12. --cert {{ _logging_handler_tempdir.stdout }}/admin-cert
  13. --key {{ _logging_handler_tempdir.stdout }}/admin-key
  14. -XPOST 'https://logging-{{ _cluster_component }}.{{ openshift_logging_elasticsearch_namespace }}.svc:9200/_flush/synced'
  15. register: _flush_output
  16. changed_when:
  17. - "_flush_output.stdout != ''"
  18. - (_flush_output.stdout | from_json)['_shards']['successful'] > 0
  19. failed_when: false
  20. # if we are skipping the health check, then we should only disable and enable shard allocation once for the cluster
  21. - when: _skip_healthcheck | bool
  22. name: "Disable shard balancing for logging-{{ _cluster_component }} cluster"
  23. command: >
  24. curl -s -k
  25. --cert {{ _logging_handler_tempdir.stdout }}/admin-cert
  26. --key {{ _logging_handler_tempdir.stdout }}/admin-key
  27. -XPUT 'https://logging-{{ _cluster_component }}.{{ openshift_logging_elasticsearch_namespace }}.svc:9200/_cluster/settings'
  28. -d '{ "transient": { "cluster.routing.allocation.enable" : "none" } }'
  29. register: _cluster_disable_output
  30. changed_when:
  31. - "_cluster_disable_output.stdout != ''"
  32. - (_cluster_disable_output.stdout | from_json)['acknowledged'] | bool
  33. failed_when: false
  34. # Loop over each DC for restart_es_node.yml
  35. - include_tasks: restart_es_node.yml
  36. with_items: "{{ logging_restart_cluster_dcs }}"
  37. loop_control:
  38. loop_var: _es_node
  39. # if we are skipping the health check, then we should only disable and enable shard allocation once for the cluster
  40. - when:
  41. - _skip_healthcheck | bool
  42. - "_cluster_disable_output.stdout != ''"
  43. - (_cluster_disable_output.stdout | from_json)['acknowledged'] | bool
  44. name: "Waiting for ES cluster logging{{ _cluster_component }} to be up"
  45. command: >
  46. curl -s -k
  47. --cert {{ _logging_handler_tempdir.stdout }}/admin-cert
  48. --key {{ _logging_handler_tempdir.stdout }}/admin-key
  49. --max-time 30
  50. -o /dev/null \
  51. -w '%{response_code}'
  52. https://logging-{{ _cluster_component }}.{{ openshift_logging_elasticsearch_namespace }}.svc:9200/
  53. register: _cluster_status
  54. until: "_cluster_status.stdout == '200'"
  55. retries: "{{ __elasticsearch_ready_retries }}"
  56. delay: 30
  57. changed_when: false
  58. failed_when: false
  59. - when:
  60. - _skip_healthcheck | bool
  61. - "_cluster_disable_output.stdout != ''"
  62. - (_cluster_disable_output.stdout | from_json)['acknowledged'] | bool
  63. name: "Enable shard balancing for logging-{{ _cluster_component }} cluster"
  64. command: >
  65. curl -s -k
  66. --cert {{ _logging_handler_tempdir.stdout }}/admin-cert
  67. --key {{ _logging_handler_tempdir.stdout }}/admin-key
  68. -XPUT 'https://logging-{{ _cluster_component }}.{{ openshift_logging_elasticsearch_namespace }}.svc:9200/_cluster/settings'
  69. -d '{ "transient": { "cluster.routing.allocation.enable" : "all" } }'
  70. register: _cluster_enable_output
  71. changed_when:
  72. - "_cluster_enable_output.stdout != ''"
  73. - (_cluster_enable_output.stdout | from_json)['acknowledged'] | bool