瀏覽代碼

Refactor csr approval for client certs ignore ready

Currently, we use node Ready = True to detect if client csrs
are approved and working.  This does not work for cni plugin
clusters because the nodes will not be ready until later.

This commit alters the logic to only wait for nodes to
be visible in output of 'oc get nodes'  If a node is listed
in that output, it is only after the client csr has been
approved.  If for some reason there is some kind of race
condition (say, cert is about to expire at some future time)
we will still approve any pending csr for nodes we know about.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1625873
Michael Gugino 6 年之前
父節點
當前提交
1b7cd18f95
共有 2 個文件被更改,包括 22 次插入26 次删除
  1. 14 18
      roles/lib_openshift/library/oc_csr_approve.py
  2. 8 8
      roles/lib_openshift/test/test_oc_csr_approve.py

+ 14 - 18
roles/lib_openshift/library/oc_csr_approve.py

@@ -105,12 +105,11 @@ class CSRapprove(object):
             self.module.fail_json(**self.result)
         return stdout
 
-    def get_ready_nodes(self):
-        '''Get list of nodes currently ready vi oc'''
+    def get_nodes(self):
+        '''Get all nodes via oc get nodes -ojson'''
         # json output is necessary for consistency here.
         command = "{} {} get nodes -ojson".format(self.oc_bin, self.oc_conf)
         stdout = self.run_command(command)
-
         try:
             data = json.loads(stdout)
         except JSONDecodeError as err:
@@ -119,14 +118,7 @@ class CSRapprove(object):
             self.result['state'] = 'unknown'
             self.module.fail_json(**self.result)
         self.result['oc_get_nodes'] = data
-        ready_nodes = []
-        for node in data['items']:
-            if node.get('status') and node['status'].get('conditions'):
-                for condition in node['status']['conditions']:
-                    # "True" is a string here, not a boolean.
-                    if condition['type'] == "Ready" and condition['status'] == 'True':
-                        ready_nodes.append(node['metadata']['name'])
-        return ready_nodes
+        return [node['metadata']['name'] for node in data['items']]
 
     def get_csrs(self):
         '''Retrieve csrs from cluster using oc get csr -ojson'''
@@ -252,10 +244,12 @@ class CSRapprove(object):
 
     def run(self):
         '''execute the csr approval process'''
-        nodes_ready = self.get_ready_nodes()
-        # don't need to check nodes that are already ready.
-        client_not_ready_nodes = [item for item in self.node_list
-                                  if item not in nodes_ready]
+        all_nodes = self.get_nodes()
+        # don't need to check nodes that have already joined the cluster because
+        # client csr needs to be approved for now to show in output of
+        # oc get nodes.
+        not_found_nodes = [item for item in self.node_list
+                           if item not in all_nodes]
 
         # Get all csrs, no good way to filter on pending.
         client_csrs = self.get_csrs()
@@ -263,11 +257,13 @@ class CSRapprove(object):
         client_csr_dict = self.process_csrs(client_csrs, "client")
         self.result['client_csrs'] = client_csr_dict
 
-        # This method is fail-happy and expects all non-Ready nodes have available
+        # This method is fail-happy and expects all not found nodes have available
         # csrs.  Handle failure for this method via ansible retry/until.
-        self.confirm_needed_requests_present(client_not_ready_nodes,
+        self.confirm_needed_requests_present(not_found_nodes,
                                              client_csr_dict)
-
+        # If for some reason a node is found in oc get nodes but it still needs
+        # a client csr approved, this method will approve all outstanding
+        # client csrs for any node in our self.node_list.
         self.approve_csrs(client_csr_dict, 'client')
 
         # # Server Cert Section # #

+ 8 - 8
roles/lib_openshift/test/test_oc_csr_approve.py

@@ -43,7 +43,7 @@ def test_parse_subject_cn():
     assert oc_csr_approve.parse_subject_cn(subject) == 'test.io'
 
 
-def test_get_ready_nodes():
+def test_get_nodes():
     output_file = os.path.join(ASSET_PATH, 'oc_get_nodes.json')
     with open(output_file) as stdoutfile:
         oc_get_nodes_stdout = stdoutfile.read()
@@ -53,8 +53,8 @@ def test_get_ready_nodes():
 
     with patch(RUN_CMD_MOCK) as call_mock:
         call_mock.return_value = (0, oc_get_nodes_stdout, '')
-        ready_nodes = approver.get_ready_nodes()
-    assert ready_nodes == ['fedora1.openshift.io', 'fedora3.openshift.io']
+        all_nodes = approver.get_nodes()
+    assert all_nodes == ['fedora1.openshift.io', 'fedora2.openshift.io', 'fedora3.openshift.io']
 
 
 def test_get_csrs():
@@ -89,15 +89,15 @@ def test_get_csrs():
 def test_confirm_needed_requests_present():
     module = DummyModule({})
     csr_dict = {'some-csr': 'fedora1.openshift.io'}
-    not_ready_nodes = ['host1']
+    not_found_nodes = ['host1']
     approver = CSRapprove(module, 'oc', '/dev/null', [])
     with pytest.raises(Exception) as err:
-        approver.confirm_needed_requests_present(not_ready_nodes, csr_dict)
+        approver.confirm_needed_requests_present(not_found_nodes, csr_dict)
         assert 'Exception: Could not find csr for nodes: host1' in str(err)
 
-    not_ready_nodes = ['fedora1.openshift.io']
+    not_found_nodes = ['fedora1.openshift.io']
     # this should complete silently
-    approver.confirm_needed_requests_present(not_ready_nodes, csr_dict)
+    approver.confirm_needed_requests_present(not_found_nodes, csr_dict)
 
 
 def test_approve_csrs():
@@ -173,7 +173,7 @@ def test_verify_server_csrs():
 
 if __name__ == '__main__':
     test_parse_subject_cn()
-    test_get_ready_nodes()
+    test_get_nodes()
     test_get_csrs()
     test_confirm_needed_requests_present()
     test_approve_csrs()