oc_csr_approve.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. #!/usr/bin/env python
  2. '''oc_csr_approve module'''
  3. # Copyright 2018 Red Hat, Inc. and/or its affiliates
  4. # and other contributors as indicated by the @author tags.
  5. #
  6. # Licensed under the Apache License, Version 2.0 (the "License");
  7. # you may not use this file except in compliance with the License.
  8. # You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. import base64
  18. import json
  19. from ansible.module_utils.basic import AnsibleModule
  20. try:
  21. from json.decoder import JSONDecodeError
  22. except ImportError:
  23. JSONDecodeError = ValueError
  24. DOCUMENTATION = '''
  25. ---
  26. module: oc_csr_approve
  27. short_description: Retrieve, approve, and verify node client csrs
  28. version_added: "2.4"
  29. description:
  30. - Runs various commands to list csrs, approve csrs, and verify nodes are
  31. ready.
  32. author:
  33. - "Michael Gugino <mgugino@redhat.com>"
  34. '''
  35. EXAMPLES = '''
  36. # Pass in a message
  37. - name: Place credentials in file
  38. oc_csr_approve:
  39. oc_bin: "/usr/bin/oc"
  40. oc_conf: "/etc/origin/master/admin.kubeconfig"
  41. node_list: ['node1.example.com', 'node2.example.com']
  42. '''
  43. CERT_MODE = {'client': 'client auth', 'server': 'server auth'}
  44. def parse_subject_cn(subject_str):
  45. '''parse output of openssl req -noout -subject to retrieve CN.
  46. example input:
  47. 'subject=/C=US/CN=test.io/L=Raleigh/O=Red Hat/ST=North Carolina/OU=OpenShift\n'
  48. or
  49. 'subject=C = US, CN = test.io, L = City, O = Company, ST = State, OU = Dept\n'
  50. example output: 'test.io'
  51. '''
  52. stripped_string = subject_str[len('subject='):].strip()
  53. kv_strings = [x.strip() for x in stripped_string.split(',')]
  54. if len(kv_strings) == 1:
  55. kv_strings = [x.strip() for x in stripped_string.split('/')][1:]
  56. for item in kv_strings:
  57. item_parts = [x.strip() for x in item.split('=')]
  58. if item_parts[0] == 'CN':
  59. return item_parts[1]
  60. class CSRapprove(object):
  61. """Approves csr requests"""
  62. def __init__(self, module, oc_bin, oc_conf, node_list):
  63. '''init method'''
  64. self.module = module
  65. self.oc_bin = oc_bin
  66. self.oc_conf = oc_conf
  67. self.node_list = node_list
  68. self.all_subjects_found = []
  69. self.unwanted_csrs = []
  70. # Build a dictionary to hold all of our output information so nothing
  71. # is lost when we fail.
  72. self.result = {'changed': False, 'rc': 0,
  73. 'oc_get_nodes': None,
  74. 'client_csrs': None,
  75. 'server_csrs': None,
  76. 'all_subjects_found': self.all_subjects_found,
  77. 'client_approve_results': [],
  78. 'server_approve_results': [],
  79. 'unwanted_csrs': self.unwanted_csrs}
  80. def run_command(self, command, rc_opts=None):
  81. '''Run a command using AnsibleModule.run_command, or fail'''
  82. if rc_opts is None:
  83. rc_opts = {}
  84. rtnc, stdout, err = self.module.run_command(command, **rc_opts)
  85. if rtnc:
  86. self.result['failed'] = True
  87. self.result['msg'] = str(err)
  88. self.result['state'] = 'unknown'
  89. self.module.fail_json(**self.result)
  90. return stdout
  91. def get_ready_nodes(self):
  92. '''Get list of nodes currently ready vi oc'''
  93. # json output is necessary for consistency here.
  94. command = "{} {} get nodes -ojson".format(self.oc_bin, self.oc_conf)
  95. stdout = self.run_command(command)
  96. try:
  97. data = json.loads(stdout)
  98. except JSONDecodeError as err:
  99. self.result['failed'] = True
  100. self.result['msg'] = str(err)
  101. self.result['state'] = 'unknown'
  102. self.module.fail_json(**self.result)
  103. self.result['oc_get_nodes'] = data
  104. ready_nodes = []
  105. for node in data['items']:
  106. if node.get('status') and node['status'].get('conditions'):
  107. for condition in node['status']['conditions']:
  108. # "True" is a string here, not a boolean.
  109. if condition['type'] == "Ready" and condition['status'] == 'True':
  110. ready_nodes.append(node['metadata']['name'])
  111. return ready_nodes
  112. def get_csrs(self):
  113. '''Retrieve csrs from cluster using oc get csr -ojson'''
  114. command = "{} {} get csr -ojson".format(self.oc_bin, self.oc_conf)
  115. stdout = self.run_command(command)
  116. try:
  117. data = json.loads(stdout)
  118. except JSONDecodeError as err:
  119. self.result['failed'] = True
  120. self.result['msg'] = str(err)
  121. self.result['state'] = 'unknown'
  122. self.module.fail_json(**self.result)
  123. return data['items']
  124. def process_csrs(self, csrs, mode):
  125. '''Return a dictionary of pending csrs where the format of the dict is
  126. k=csr name, v=Subject Common Name'''
  127. csr_dict = {}
  128. for item in csrs:
  129. name = item['metadata']['name']
  130. request_data = base64.b64decode(item['spec']['request'])
  131. command = "openssl req -noout -subject"
  132. # ansible's module.run_command accepts data to pipe via stdin as
  133. # as 'data' kwarg.
  134. rc_opts = {'data': request_data, 'binary_data': True}
  135. stdout = self.run_command(command, rc_opts=rc_opts)
  136. self.all_subjects_found.append(stdout)
  137. status = item['status'].get('conditions')
  138. if status:
  139. # If status is not an empty dictionary, cert is not pending.
  140. self.unwanted_csrs.append(item)
  141. continue
  142. if CERT_MODE[mode] not in item['spec']['usages']:
  143. self.unwanted_csrs.append(item)
  144. continue
  145. # parse common_name from subject string.
  146. common_name = parse_subject_cn(stdout)
  147. if common_name and common_name.startswith('system:node:'):
  148. # common name is typically prepended with system:node:.
  149. common_name = common_name.split('system:node:')[1]
  150. # we only want to approve csrs from nodes we know about.
  151. if common_name in self.node_list:
  152. csr_dict[name] = common_name
  153. else:
  154. self.unwanted_csrs.append(item)
  155. return csr_dict
  156. def confirm_needed_requests_present(self, not_ready_nodes, csr_dict):
  157. '''Ensure all non-Ready nodes have a csr, or fail'''
  158. nodes_needed = set(not_ready_nodes)
  159. for _, val in csr_dict.items():
  160. nodes_needed.discard(val)
  161. # check that we found all of our needed nodes
  162. if nodes_needed:
  163. missing_nodes = ', '.join(nodes_needed)
  164. self.result['failed'] = True
  165. self.result['msg'] = "Could not find csr for nodes: {}".format(missing_nodes)
  166. self.result['state'] = 'unknown'
  167. self.module.fail_json(**self.result)
  168. def approve_csrs(self, csr_pending_list, mode):
  169. '''Loop through csr_pending_list and call:
  170. oc adm certificate approve <item>'''
  171. res_mode = "{}_approve_results".format(mode)
  172. base_command = "{} {} adm certificate approve {}"
  173. approve_results = []
  174. for csr in csr_pending_list:
  175. command = base_command.format(self.oc_bin, self.oc_conf, csr)
  176. rtnc, stdout, err = self.module.run_command(command)
  177. approve_results.append(stdout)
  178. if rtnc:
  179. self.result['failed'] = True
  180. self.result['msg'] = str(err)
  181. self.result[res_mode] = approve_results
  182. self.result['state'] = 'unknown'
  183. self.module.fail_json(**self.result)
  184. self.result[res_mode] = approve_results
  185. # We set changed for approved client or server csrs.
  186. self.result['changed'] = bool(approve_results) or bool(self.result['changed'])
  187. def get_ready_nodes_server(self, nodes_list):
  188. '''Determine which nodes have working server certificates'''
  189. ready_nodes_server = []
  190. base_command = "{} {} get --raw /api/v1/nodes/{}/proxy/healthz"
  191. for node in nodes_list:
  192. # need this to look like /api/v1/nodes/<node>/proxy/healthz
  193. command = base_command.format(self.oc_bin, self.oc_conf, node)
  194. rtnc, _, _ = self.module.run_command(command)
  195. if not rtnc:
  196. # if we can hit that api endpoint, the node has a valid server
  197. # cert.
  198. ready_nodes_server.append(node)
  199. return ready_nodes_server
  200. def verify_server_csrs(self):
  201. '''We approved some server csrs, now we need to validate they are working.
  202. This function will attempt to retry 10 times in case of failure.'''
  203. # Attempt to try node endpoints a few times.
  204. attempts = 0
  205. # Find not_ready_nodes for server-side again
  206. nodes_server_ready = self.get_ready_nodes_server(self.node_list)
  207. # Create list of nodes that still aren't ready.
  208. not_ready_nodes_server = set([item for item in self.node_list if item not in nodes_server_ready])
  209. while not_ready_nodes_server:
  210. nodes_server_ready = self.get_ready_nodes_server(not_ready_nodes_server)
  211. # if we have same number of nodes_server_ready now, all of the previous
  212. # not_ready_nodes are now ready.
  213. if not len(not_ready_nodes_server - set(nodes_server_ready)):
  214. break
  215. attempts += 1
  216. if attempts > 9:
  217. self.result['failed'] = True
  218. self.result['rc'] = 1
  219. missing_nodes = not_ready_nodes_server - set(nodes_server_ready)
  220. msg = "Some nodes still not ready after approving server certs: {}"
  221. msg = msg.format(", ".join(missing_nodes))
  222. self.result['msg'] = msg
  223. self.module.fail_json(**self.result)
  224. def run(self):
  225. '''execute the csr approval process'''
  226. nodes_ready = self.get_ready_nodes()
  227. # don't need to check nodes that are already ready.
  228. client_not_ready_nodes = [item for item in self.node_list
  229. if item not in nodes_ready]
  230. # Get all csrs, no good way to filter on pending.
  231. client_csrs = self.get_csrs()
  232. # process data in csrs and build a dictionary of client requests
  233. client_csr_dict = self.process_csrs(client_csrs, "client")
  234. self.result['client_csrs'] = client_csr_dict
  235. # This method is fail-happy and expects all non-Ready nodes have available
  236. # csrs. Handle failure for this method via ansible retry/until.
  237. self.confirm_needed_requests_present(client_not_ready_nodes,
  238. client_csr_dict)
  239. self.approve_csrs(client_csr_dict, 'client')
  240. # # Server Cert Section # #
  241. # Find not_ready_nodes for server-side
  242. nodes_server_ready = self.get_ready_nodes_server(self.node_list)
  243. # Create list of nodes that definitely need a server cert approved.
  244. not_ready_nodes_server = [item for item in self.node_list
  245. if item not in nodes_server_ready]
  246. # Get all csrs again, no good way to filter on pending.
  247. server_csrs = self.get_csrs()
  248. # process data in csrs and build a dictionary of server requests
  249. server_csr_dict = self.process_csrs(server_csrs, "server")
  250. self.result['server_csrs'] = server_csr_dict
  251. # This will fail if all server csrs are not present, but probably shouldn't
  252. # at this point since we spent some time hitting the api to see if the
  253. # nodes are already responding.
  254. self.confirm_needed_requests_present(not_ready_nodes_server,
  255. server_csr_dict)
  256. self.approve_csrs(server_csr_dict, 'server')
  257. self.verify_server_csrs()
  258. # We made it here, everything was successful, cleanup some debug info
  259. # so we don't spam logs.
  260. for key in ('client_csrs', 'server_csrs', 'unwanted_csrs'):
  261. self.result.pop(key)
  262. self.module.exit_json(**self.result)
  263. def run_module():
  264. '''Run this module'''
  265. module_args = dict(
  266. oc_bin=dict(type='path', required=False, default='oc'),
  267. oc_conf=dict(type='path', required=False, default='/etc/origin/master/admin.kubeconfig'),
  268. node_list=dict(type='list', required=True),
  269. )
  270. module = AnsibleModule(
  271. supports_check_mode=False,
  272. argument_spec=module_args
  273. )
  274. oc_bin = module.params['oc_bin']
  275. oc_conf = '--config={}'.format(module.params['oc_conf'])
  276. node_list = module.params['node_list']
  277. approver = CSRapprove(module, oc_bin, oc_conf, node_list)
  278. approver.run()
  279. def main():
  280. '''main'''
  281. run_module()
  282. if __name__ == '__main__':
  283. main()