oc_csr_approve.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. #!/usr/bin/env python
  2. '''oc_csr_approve module'''
  3. # Copyright 2018 Red Hat, Inc. and/or its affiliates
  4. # and other contributors as indicated by the @author tags.
  5. #
  6. # Licensed under the Apache License, Version 2.0 (the "License");
  7. # you may not use this file except in compliance with the License.
  8. # You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. import base64
  18. import json
  19. from ansible.module_utils.basic import AnsibleModule
  20. try:
  21. from json.decoder import JSONDecodeError
  22. except ImportError:
  23. JSONDecodeError = ValueError
  24. DOCUMENTATION = '''
  25. ---
  26. module: oc_csr_approve
  27. short_description: Retrieve, approve, and verify node client csrs
  28. version_added: "2.4"
  29. description:
  30. - Runs various commands to list csrs, approve csrs, and verify nodes are
  31. ready.
  32. author:
  33. - "Michael Gugino <mgugino@redhat.com>"
  34. '''
  35. EXAMPLES = '''
  36. # Pass in a message
  37. - name: Place credentials in file
  38. oc_csr_approve:
  39. oc_bin: "/usr/bin/oc"
  40. oc_conf: "/etc/origin/master/admin.kubeconfig"
  41. node_list: ['node1.example.com', 'node2.example.com']
  42. '''
  43. CERT_MODE = {'client': 'client auth', 'server': 'server auth'}
  44. def parse_subject_cn(subject_str):
  45. '''parse output of openssl req -noout -subject to retrieve CN.
  46. example input:
  47. 'subject=/C=US/CN=test.io/L=Raleigh/O=Red Hat/ST=North Carolina/OU=OpenShift\n'
  48. or
  49. 'subject=C = US, CN = test.io, L = City, O = Company, ST = State, OU = Dept\n'
  50. example output: 'test.io'
  51. '''
  52. stripped_string = subject_str[len('subject='):].strip()
  53. kv_strings = [x.strip() for x in stripped_string.split(',')]
  54. if len(kv_strings) == 1:
  55. kv_strings = [x.strip() for x in stripped_string.split('/')][1:]
  56. for item in kv_strings:
  57. item_parts = [x.strip() for x in item.split('=')]
  58. if item_parts[0] == 'CN':
  59. return item_parts[1]
  60. class CSRapprove(object):
  61. """Approves csr requests"""
  62. def __init__(self, module, oc_bin, oc_conf, node_list):
  63. '''init method'''
  64. self.module = module
  65. self.oc_bin = oc_bin
  66. self.oc_conf = oc_conf
  67. self.node_list = node_list
  68. self.all_subjects_found = []
  69. self.unwanted_csrs = []
  70. # Build a dictionary to hold all of our output information so nothing
  71. # is lost when we fail.
  72. self.result = {'changed': False, 'rc': 0,
  73. 'oc_get_nodes': None,
  74. 'client_csrs': None,
  75. 'server_csrs': None,
  76. 'all_subjects_found': self.all_subjects_found,
  77. 'client_approve_results': [],
  78. 'server_approve_results': [],
  79. 'unwanted_csrs': self.unwanted_csrs}
  80. def run_command(self, command, rc_opts=None):
  81. '''Run a command using AnsibleModule.run_command, or fail'''
  82. if rc_opts is None:
  83. rc_opts = {}
  84. rtnc, stdout, err = self.module.run_command(command, **rc_opts)
  85. if rtnc:
  86. self.result['failed'] = True
  87. self.result['msg'] = str(err)
  88. self.result['state'] = 'unknown'
  89. self.module.fail_json(**self.result)
  90. return stdout
  91. def get_nodes(self):
  92. '''Get all nodes via oc get nodes -ojson'''
  93. # json output is necessary for consistency here.
  94. command = "{} {} get nodes -ojson".format(self.oc_bin, self.oc_conf)
  95. stdout = self.run_command(command)
  96. try:
  97. data = json.loads(stdout)
  98. except JSONDecodeError as err:
  99. self.result['failed'] = True
  100. self.result['msg'] = str(err)
  101. self.result['state'] = 'unknown'
  102. self.module.fail_json(**self.result)
  103. self.result['oc_get_nodes'] = data
  104. return [node['metadata']['name'] for node in data['items']]
  105. def get_csrs(self):
  106. '''Retrieve csrs from cluster using oc get csr -ojson'''
  107. command = "{} {} get csr -ojson".format(self.oc_bin, self.oc_conf)
  108. stdout = self.run_command(command)
  109. try:
  110. data = json.loads(stdout)
  111. except JSONDecodeError as err:
  112. self.result['failed'] = True
  113. self.result['msg'] = str(err)
  114. self.result['state'] = 'unknown'
  115. self.module.fail_json(**self.result)
  116. return data['items']
  117. def process_csrs(self, csrs, mode):
  118. '''Return a dictionary of pending csrs where the format of the dict is
  119. k=csr name, v=Subject Common Name'''
  120. csr_dict = {}
  121. for item in csrs:
  122. name = item['metadata']['name']
  123. request_data = base64.b64decode(item['spec']['request'])
  124. command = "openssl req -noout -subject"
  125. # ansible's module.run_command accepts data to pipe via stdin as
  126. # as 'data' kwarg.
  127. rc_opts = {'data': request_data, 'binary_data': True}
  128. stdout = self.run_command(command, rc_opts=rc_opts)
  129. self.all_subjects_found.append(stdout)
  130. status = item['status'].get('conditions')
  131. if status:
  132. # If status is not an empty dictionary, cert is not pending.
  133. self.unwanted_csrs.append(item)
  134. continue
  135. if CERT_MODE[mode] not in item['spec']['usages']:
  136. self.unwanted_csrs.append(item)
  137. continue
  138. # parse common_name from subject string.
  139. common_name = parse_subject_cn(stdout)
  140. if common_name and common_name.startswith('system:node:'):
  141. # common name is typically prepended with system:node:.
  142. common_name = common_name.split('system:node:')[1]
  143. # we only want to approve csrs from nodes we know about.
  144. if common_name in self.node_list:
  145. csr_dict[name] = common_name
  146. else:
  147. self.unwanted_csrs.append(item)
  148. return csr_dict
  149. def confirm_needed_requests_present(self, not_ready_nodes, csr_dict):
  150. '''Ensure all non-Ready nodes have a csr, or fail'''
  151. nodes_needed = set(not_ready_nodes)
  152. for _, val in csr_dict.items():
  153. nodes_needed.discard(val)
  154. # check that we found all of our needed nodes
  155. if nodes_needed:
  156. missing_nodes = ', '.join(nodes_needed)
  157. self.result['failed'] = True
  158. self.result['msg'] = "Could not find csr for nodes: {}".format(missing_nodes)
  159. self.result['state'] = 'unknown'
  160. self.module.fail_json(**self.result)
  161. def approve_csrs(self, csr_pending_list, mode):
  162. '''Loop through csr_pending_list and call:
  163. oc adm certificate approve <item>'''
  164. res_mode = "{}_approve_results".format(mode)
  165. base_command = "{} {} adm certificate approve {}"
  166. approve_results = []
  167. for csr in csr_pending_list:
  168. command = base_command.format(self.oc_bin, self.oc_conf, csr)
  169. rtnc, stdout, err = self.module.run_command(command)
  170. approve_results.append(stdout)
  171. if rtnc:
  172. self.result['failed'] = True
  173. self.result['msg'] = str(err)
  174. self.result[res_mode] = approve_results
  175. self.result['state'] = 'unknown'
  176. self.module.fail_json(**self.result)
  177. self.result[res_mode] = approve_results
  178. # We set changed for approved client or server csrs.
  179. self.result['changed'] = bool(approve_results) or bool(self.result['changed'])
  180. def get_ready_nodes_server(self, nodes_list):
  181. '''Determine which nodes have working server certificates'''
  182. ready_nodes_server = []
  183. base_command = "{} {} get --raw /api/v1/nodes/{}/proxy/healthz"
  184. for node in nodes_list:
  185. # need this to look like /api/v1/nodes/<node>/proxy/healthz
  186. command = base_command.format(self.oc_bin, self.oc_conf, node)
  187. rtnc, _, _ = self.module.run_command(command)
  188. if not rtnc:
  189. # if we can hit that api endpoint, the node has a valid server
  190. # cert.
  191. ready_nodes_server.append(node)
  192. return ready_nodes_server
  193. def verify_server_csrs(self):
  194. '''We approved some server csrs, now we need to validate they are working.
  195. This function will attempt to retry 10 times in case of failure.'''
  196. # Attempt to try node endpoints a few times.
  197. attempts = 0
  198. # Find not_ready_nodes for server-side again
  199. nodes_server_ready = self.get_ready_nodes_server(self.node_list)
  200. # Create list of nodes that still aren't ready.
  201. not_ready_nodes_server = set([item for item in self.node_list if item not in nodes_server_ready])
  202. while not_ready_nodes_server:
  203. nodes_server_ready = self.get_ready_nodes_server(not_ready_nodes_server)
  204. # if we have same number of nodes_server_ready now, all of the previous
  205. # not_ready_nodes are now ready.
  206. if not len(not_ready_nodes_server - set(nodes_server_ready)):
  207. break
  208. attempts += 1
  209. if attempts > 9:
  210. self.result['failed'] = True
  211. self.result['rc'] = 1
  212. missing_nodes = not_ready_nodes_server - set(nodes_server_ready)
  213. msg = "Some nodes still not ready after approving server certs: {}"
  214. msg = msg.format(", ".join(missing_nodes))
  215. self.result['msg'] = msg
  216. self.module.fail_json(**self.result)
  217. def run(self):
  218. '''execute the csr approval process'''
  219. all_nodes = self.get_nodes()
  220. # don't need to check nodes that have already joined the cluster because
  221. # client csr needs to be approved for now to show in output of
  222. # oc get nodes.
  223. not_found_nodes = [item for item in self.node_list
  224. if item not in all_nodes]
  225. # Get all csrs, no good way to filter on pending.
  226. client_csrs = self.get_csrs()
  227. # process data in csrs and build a dictionary of client requests
  228. client_csr_dict = self.process_csrs(client_csrs, "client")
  229. self.result['client_csrs'] = client_csr_dict
  230. # This method is fail-happy and expects all not found nodes have available
  231. # csrs. Handle failure for this method via ansible retry/until.
  232. self.confirm_needed_requests_present(not_found_nodes,
  233. client_csr_dict)
  234. # If for some reason a node is found in oc get nodes but it still needs
  235. # a client csr approved, this method will approve all outstanding
  236. # client csrs for any node in our self.node_list.
  237. self.approve_csrs(client_csr_dict, 'client')
  238. # # Server Cert Section # #
  239. # Find not_ready_nodes for server-side
  240. nodes_server_ready = self.get_ready_nodes_server(self.node_list)
  241. # Create list of nodes that definitely need a server cert approved.
  242. not_ready_nodes_server = [item for item in self.node_list
  243. if item not in nodes_server_ready]
  244. # Get all csrs again, no good way to filter on pending.
  245. server_csrs = self.get_csrs()
  246. # process data in csrs and build a dictionary of server requests
  247. server_csr_dict = self.process_csrs(server_csrs, "server")
  248. self.result['server_csrs'] = server_csr_dict
  249. # This will fail if all server csrs are not present, but probably shouldn't
  250. # at this point since we spent some time hitting the api to see if the
  251. # nodes are already responding.
  252. self.confirm_needed_requests_present(not_ready_nodes_server,
  253. server_csr_dict)
  254. self.approve_csrs(server_csr_dict, 'server')
  255. self.verify_server_csrs()
  256. # We made it here, everything was successful, cleanup some debug info
  257. # so we don't spam logs.
  258. for key in ('client_csrs', 'server_csrs', 'unwanted_csrs'):
  259. self.result.pop(key)
  260. self.module.exit_json(**self.result)
  261. def run_module():
  262. '''Run this module'''
  263. module_args = dict(
  264. oc_bin=dict(type='path', required=False, default='oc'),
  265. oc_conf=dict(type='path', required=False, default='/etc/origin/master/admin.kubeconfig'),
  266. node_list=dict(type='list', required=True),
  267. )
  268. module = AnsibleModule(
  269. supports_check_mode=False,
  270. argument_spec=module_args
  271. )
  272. oc_bin = module.params['oc_bin']
  273. oc_conf = '--config={}'.format(module.params['oc_conf'])
  274. node_list = module.params['node_list']
  275. approver = CSRapprove(module, oc_bin, oc_conf, node_list)
  276. approver.run()
  277. def main():
  278. '''main'''
  279. run_module()
  280. if __name__ == '__main__':
  281. main()