Bläddra i källkod

Merge pull request #5495 from abutcher/ca-redeploy-expired-etcd

Automatic merge from submit-queue

Improve CA redeploy restart logic

Expired etcd certificates require special casing around restarts in the certificate redeploy playbooks. When etcd certificates are expired we can't restart masters or nodes. We also can't simply restart etcd because peers also had expired certificates so we must start/stop etcd when we detect expired etcd certificates.

`openshift-ca.yml`:
* No longer restart master services when etcd certificates were previously expired.
* No longer restart node services when master or etcd certificates were previously expired.

`etcd-ca.yml`:
* No longer restart master services when etcd certificates were previously expired.

Tested using [gen_expired_tls.sh](https://gist.github.com/abutcher/bdd20b9d582675d89fb22658689c49e4) on one of my master/etcd hosts to ensure that restart logic changes caused us to skip the right restarts and do a full start/stop of etcd in the `redeploy-certificates.yml` and `redeploy-etcd-certificates.yml` playbooks.

Note: When this happens with a cluster and you want to replace all certificates, you can run: `redeploy-etcd-ca.yml`, `redeploy-openshift-ca.yml` (which will both skip restarts) and then run `redeploy-certificates.yml` which will now be able to full/stop start etcd.
OpenShift Merge Robot 7 år sedan
förälder
incheckning
02485be8fe

+ 6 - 0
playbooks/byo/openshift-cluster/redeploy-certificates.yml

@@ -7,6 +7,10 @@
   tags:
   - always
 
+- include: ../../common/openshift-cluster/redeploy-certificates/check-expiry.yml
+  vars:
+    g_check_expiry_hosts: 'oo_etcd_to_config'
+
 - include: ../../common/openshift-cluster/redeploy-certificates/etcd.yml
 
 - include: ../../common/openshift-cluster/redeploy-certificates/masters.yml
@@ -14,6 +18,8 @@
 - include: ../../common/openshift-cluster/redeploy-certificates/nodes.yml
 
 - include: ../../common/openshift-etcd/restart.yml
+  vars:
+    g_etcd_certificates_expired: "{{ ('expired' in (hostvars | oo_select_keys(groups['etcd']) | oo_collect('check_results.check_results.etcd') | oo_collect('health'))) | bool }}"
 
 - include: ../../common/openshift-master/restart.yml
 

+ 6 - 0
playbooks/byo/openshift-cluster/redeploy-etcd-certificates.yml

@@ -7,8 +7,14 @@
   tags:
   - always
 
+- include: ../../common/openshift-cluster/redeploy-certificates/check-expiry.yml
+  vars:
+    g_check_expiry_hosts: 'oo_etcd_to_config'
+
 - include: ../../common/openshift-cluster/redeploy-certificates/etcd.yml
 
 - include: ../../common/openshift-etcd/restart.yml
+  vars:
+    g_etcd_certificates_expired: "{{ ('expired' in (hostvars | oo_select_keys(groups['etcd']) | oo_collect('check_results.check_results.etcd') | oo_collect('health'))) | bool }}"
 
 - include: ../../common/openshift-master/restart.yml

+ 12 - 0
playbooks/common/openshift-cluster/redeploy-certificates/check-expiry.yml

@@ -0,0 +1,12 @@
+---
+- name: Check cert expirys
+  hosts: "{{ g_check_expiry_hosts }}"
+  vars:
+    openshift_certificate_expiry_show_all: yes
+  roles:
+  # Sets 'check_results' per host which contains health status for
+  # etcd, master and node certificates.  We will use 'check_results'
+  # to determine if any certificates were expired prior to running
+  # this playbook. Service restarts will be skipped if any
+  # certificates were previously expired.
+  - role: openshift_certificate_expiry

+ 16 - 10
playbooks/common/openshift-cluster/redeploy-certificates/etcd-ca.yml

@@ -153,13 +153,19 @@
     changed_when: false
 
 - include: ../../openshift-master/restart.yml
-  # Do not restart masters when master certificates were previously expired.
-  when: ('expired' not in hostvars
-                       | oo_select_keys(groups['oo_masters_to_config'])
-                       | oo_collect('check_results.check_results.ocp_certs')
-                       | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/master.server.crt"}))
-        and
-        ('expired' not in hostvars
-                          | oo_select_keys(groups['oo_masters_to_config'])
-                          | oo_collect('check_results.check_results.ocp_certs')
-                          | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/ca-bundle.crt"}))
+  # Do not restart masters when master or etcd certificates were previously expired.
+  when:
+  # masters
+  - ('expired' not in hostvars
+      | oo_select_keys(groups['oo_masters_to_config'])
+      | oo_collect('check_results.check_results.ocp_certs')
+      | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/master.server.crt"}))
+  - ('expired' not in hostvars
+      | oo_select_keys(groups['oo_masters_to_config'])
+      | oo_collect('check_results.check_results.ocp_certs')
+      | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/ca-bundle.crt"}))
+  # etcd
+  - ('expired' not in (hostvars
+      | oo_select_keys(groups['etcd'])
+      | oo_collect('check_results.check_results.etcd')
+      | oo_collect('health')))

+ 42 - 21
playbooks/common/openshift-cluster/redeploy-certificates/openshift-ca.yml

@@ -7,7 +7,7 @@
     when: not openshift.common.version_gte_3_2_or_1_2 | bool
 
 - name: Check cert expirys
-  hosts: oo_nodes_to_config:oo_masters_to_config
+  hosts: oo_nodes_to_config:oo_masters_to_config:oo_etcd_to_config
   vars:
     openshift_certificate_expiry_show_all: yes
   roles:
@@ -209,16 +209,22 @@
     with_items: "{{ client_users }}"
 
 - include: ../../openshift-master/restart.yml
-  # Do not restart masters when master certificates were previously expired.
-  when: ('expired' not in hostvars
-                       | oo_select_keys(groups['oo_masters_to_config'])
-                       | oo_collect('check_results.check_results.ocp_certs')
-                       | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/master.server.crt"}))
-        and
-        ('expired' not in hostvars
-                          | oo_select_keys(groups['oo_masters_to_config'])
-                          | oo_collect('check_results.check_results.ocp_certs')
-                          | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/ca-bundle.crt"}))
+  # Do not restart masters when master or etcd certificates were previously expired.
+  when:
+  # masters
+  - ('expired' not in hostvars
+      | oo_select_keys(groups['oo_masters_to_config'])
+      | oo_collect('check_results.check_results.ocp_certs')
+      | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/master.server.crt"}))
+  - ('expired' not in hostvars
+      | oo_select_keys(groups['oo_masters_to_config'])
+      | oo_collect('check_results.check_results.ocp_certs')
+      | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/ca-bundle.crt"}))
+  # etcd
+  - ('expired' not in (hostvars
+      | oo_select_keys(groups['etcd'])
+      | oo_collect('check_results.check_results.etcd')
+      | oo_collect('health')))
 
 - name: Distribute OpenShift CA certificate to nodes
   hosts: oo_nodes_to_config
@@ -268,13 +274,28 @@
     changed_when: false
 
 - include: ../../openshift-node/restart.yml
-  # Do not restart nodes when node certificates were previously expired.
-  when: ('expired' not in hostvars
-                       | oo_select_keys(groups['oo_nodes_to_config'])
-                       | oo_collect('check_results.check_results.ocp_certs')
-                       | oo_collect('health', {'path':hostvars[groups.oo_nodes_to_config.0].openshift.common.config_base ~ "/node/server.crt"}))
-        and
-        ('expired' not in hostvars
-                          | oo_select_keys(groups['oo_nodes_to_config'])
-                          | oo_collect('check_results.check_results.ocp_certs')
-                          | oo_collect('health', {'path':hostvars[groups.oo_nodes_to_config.0].openshift.common.config_base ~ "/node/ca.crt"}))
+  # Do not restart nodes when node, master or etcd certificates were previously expired.
+  when:
+  # nodes
+  - ('expired' not in hostvars
+      | oo_select_keys(groups['oo_nodes_to_config'])
+      | oo_collect('check_results.check_results.ocp_certs')
+      | oo_collect('health', {'path':hostvars[groups.oo_nodes_to_config.0].openshift.common.config_base ~ "/node/server.crt"}))
+  - ('expired' not in hostvars
+      | oo_select_keys(groups['oo_nodes_to_config'])
+      | oo_collect('check_results.check_results.ocp_certs')
+      | oo_collect('health', {'path':hostvars[groups.oo_nodes_to_config.0].openshift.common.config_base ~ "/node/ca.crt"}))
+  # masters
+  - ('expired' not in hostvars
+      | oo_select_keys(groups['oo_masters_to_config'])
+      | oo_collect('check_results.check_results.ocp_certs')
+      | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/master.server.crt"}))
+  - ('expired' not in hostvars
+      | oo_select_keys(groups['oo_masters_to_config'])
+      | oo_collect('check_results.check_results.ocp_certs')
+      | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/ca-bundle.crt"}))
+  # etcd
+  - ('expired' not in (hostvars
+      | oo_select_keys(groups['etcd'])
+      | oo_collect('check_results.check_results.etcd')
+      | oo_collect('health')))

+ 18 - 0
playbooks/common/openshift-etcd/restart.yml

@@ -7,3 +7,21 @@
       service:
         name: "{{ 'etcd_container' if openshift.common.etcd_runtime == 'docker' else 'etcd' }}"
         state: restarted
+      when:
+        - not g_etcd_certificates_expired | default(false) | bool
+
+- name: Restart etcd
+  hosts: oo_etcd_to_config
+  tasks:
+    - name: stop etcd
+      service:
+        name: "{{ 'etcd_container' if openshift.common.etcd_runtime == 'docker' else 'etcd' }}"
+        state: stopped
+      when:
+        - g_etcd_certificates_expired | default(false) | bool
+    - name: start etcd
+      service:
+        name: "{{ 'etcd_container' if openshift.common.etcd_runtime == 'docker' else 'etcd' }}"
+        state: started
+      when:
+        - g_etcd_certificates_expired | default(false) | bool