restart_cluster.yml 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. ---
  2. ## get all pods for the cluster
  3. - command: >
  4. {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
  5. register: _cluster_pods
  6. ### Check for cluster state before making changes -- if its red then we don't want to continue
  7. - name: "Checking current health for {{ _es_node }} cluster"
  8. shell: >
  9. {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig exec "{{ _cluster_pods.stdout.split(' ')[0] }}" -c elasticsearch -n "{{ openshift_logging_elasticsearch_namespace }}" -- es_cluster_health
  10. register: _pod_status
  11. when: _cluster_pods.stdout_lines | count > 0
  12. - when:
  13. - _pod_status.stdout is defined
  14. - (_pod_status.stdout | from_json)['status'] in ['red']
  15. block:
  16. - name: Set Logging message to manually restart
  17. run_once: true
  18. set_stats:
  19. data:
  20. installer_phase_logging:
  21. message: "Cluster logging-{{ _cluster_component }} was in a red state and will not be automatically restarted. Please see documentation regarding doing a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."
  22. - debug: msg="Cluster logging-{{ _cluster_component }} was in a red state and will not be automatically restarted. Please see documentation regarding doing a {{ 'full' if full_restart_cluster | bool else 'rolling'}} cluster restart."
  23. - when: _pod_status.stdout is undefined or (_pod_status.stdout | from_json)['status'] in ['green', 'yellow']
  24. block:
  25. # Disable external communication for {{ _cluster_component }}
  26. - name: Disable external communication for logging-{{ _cluster_component }}
  27. oc_service:
  28. state: present
  29. name: "logging-{{ _cluster_component }}"
  30. namespace: "{{ openshift_logging_elasticsearch_namespace }}"
  31. selector:
  32. component: "{{ _cluster_component }}"
  33. provider: openshift
  34. connection: blocked
  35. labels:
  36. logging-infra: 'support'
  37. ports:
  38. - port: 9200
  39. targetPort: "restapi"
  40. when:
  41. - full_restart_cluster | bool
  42. - name: "Disable shard balancing for logging-{{ _cluster_component }} cluster"
  43. command: >
  44. {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_cluster/settings' -d '{ "transient": { "cluster.routing.allocation.enable" : "none" } }'
  45. register: _disable_output
  46. changed_when: "'\"acknowledged\":true' in _disable_output.stdout"
  47. when: _cluster_pods.stdout_lines | count > 0
  48. # Flush ES
  49. - name: "Flushing for logging-{{ _cluster_component }} cluster"
  50. command: >
  51. {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_flush/synced'
  52. register: _flush_output
  53. changed_when: "'\"acknowledged\":true' in _flush_output.stdout"
  54. when:
  55. - _cluster_pods.stdout_lines | count > 0
  56. - full_restart_cluster | bool
  57. - command: >
  58. {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig get dc -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name}
  59. register: _cluster_dcs
  60. # If we are currently restarting the "es" cluster we want to check if we are scaling up the number of es nodes
  61. # If we are currently restarting the "es-ops" cluster we want to check if we are scaling up the number of ops nodes
  62. # If we've created a new node for that cluster then the appropriate variable will be true, otherwise we default to false
  63. - set_fact:
  64. _skip_healthcheck: "{{ ( __logging_scale_up | default(false) ) if _cluster_component == 'es' else ( __logging_ops_scale_up | default(false) ) }}"
  65. ## restart all dcs for full restart
  66. - name: "Restart ES node {{ _es_node }}"
  67. include_tasks: restart_es_node.yml
  68. with_items: "{{ _cluster_dcs.stdout_lines }}"
  69. loop_control:
  70. loop_var: _es_node
  71. when:
  72. - full_restart_cluster | bool
  73. ## restart the node if it's dc is in the list of nodes to restart?
  74. - name: "Restart ES node {{ _es_node }}"
  75. include_tasks: restart_es_node.yml
  76. with_items: "{{ _restart_logging_nodes }}"
  77. loop_control:
  78. loop_var: _es_node
  79. when:
  80. - not full_restart_cluster | bool
  81. - _es_node in _cluster_dcs.stdout
  82. ## we may need a new first pod to run against -- fetch them all again
  83. - command: >
  84. {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}
  85. register: _cluster_pods
  86. - name: "Enable shard balancing for logging-{{ _cluster_component }} cluster"
  87. command: >
  88. {{ openshift_client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_cluster/settings' -d '{ "transient": { "cluster.routing.allocation.enable" : "all" } }'
  89. register: _enable_output
  90. changed_when: "'\"acknowledged\":true' in _enable_output.stdout"
  91. when: _cluster_pods.stdout != ""
  92. # Reenable external communication for {{ _cluster_component }}
  93. - name: Reenable external communication for logging-{{ _cluster_component }}
  94. oc_service:
  95. state: present
  96. name: "logging-{{ _cluster_component }}"
  97. namespace: "{{ openshift_logging_elasticsearch_namespace }}"
  98. selector:
  99. component: "{{ _cluster_component }}"
  100. provider: openshift
  101. labels:
  102. logging-infra: 'support'
  103. ports:
  104. - port: 9200
  105. targetPort: "restapi"
  106. when:
  107. - full_restart_cluster | bool