openshift_cert_expiry.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. # pylint: disable=line-too-long,invalid-name
  4. """For details on this module see DOCUMENTATION (below)"""
  5. import datetime
  6. import io
  7. import os
  8. import subprocess
  9. import sys
  10. import tempfile
  11. # File pointers from io.open require unicode inputs when using their
  12. # `write` method
  13. import six
  14. from six.moves import configparser
  15. import yaml
  16. try:
  17. # You can comment this import out and include a 'pass' in this
  18. # block if you're manually testing this module on a NON-ATOMIC
  19. # HOST (or any host that just doesn't have PyOpenSSL
  20. # available). That will force the `load_and_handle_cert` function
  21. # to use the Fake OpenSSL classes.
  22. import OpenSSL.crypto
  23. except ImportError:
  24. # Some platforms (such as RHEL Atomic) may not have the Python
  25. # OpenSSL library installed. In this case we will use a manual
  26. # work-around to parse each certificate.
  27. #
  28. # Check for 'OpenSSL.crypto' in `sys.modules` later.
  29. pass
  30. DOCUMENTATION = '''
  31. ---
  32. module: openshift_cert_expiry
  33. short_description: Check OpenShift Container Platform (OCP) and Kube certificate expirations on a cluster
  34. description:
  35. - The M(openshift_cert_expiry) module has two basic functions: to flag certificates which will expire in a set window of time from now, and to notify you about certificates which have already expired.
  36. - When the module finishes, a summary of the examination is returned. Each certificate in the summary has a C(health) key with a value of one of the following:
  37. - C(ok) - not expired, and outside of the expiration C(warning_days) window.
  38. - C(warning) - not expired, but will expire between now and the C(warning_days) window.
  39. - C(expired) - an expired certificate.
  40. - Certificate flagging follow this logic:
  41. - If the expiration date is before now then the certificate is classified as C(expired).
  42. - The certificates time to live (expiration date - now) is calculated, if that time window is less than C(warning_days) the certificate is classified as C(warning).
  43. - All other conditions are classified as C(ok).
  44. - The following keys are ALSO present in the certificate summary:
  45. - C(cert_cn) - The common name of the certificate (additional CNs present in SAN extensions are omitted)
  46. - C(days_remaining) - The number of days until the certificate expires.
  47. - C(expiry) - The date the certificate expires on.
  48. - C(path) - The full path to the certificate on the examined host.
  49. version_added: "1.0"
  50. options:
  51. config_base:
  52. description:
  53. - Base path to OCP system settings.
  54. required: false
  55. default: /etc/origin
  56. warning_days:
  57. description:
  58. - Flag certificates which will expire in C(warning_days) days from now.
  59. required: false
  60. default: 30
  61. show_all:
  62. description:
  63. - Enable this option to show analysis of ALL certificates examined by this module.
  64. - By default only certificates which have expired, or will expire within the C(warning_days) window will be reported.
  65. required: false
  66. default: false
  67. author: "Tim Bielawa (@tbielawa) <tbielawa@redhat.com>"
  68. '''
  69. EXAMPLES = '''
  70. # Default invocation, only notify about expired certificates or certificates which will expire within 30 days from now
  71. - openshift_cert_expiry:
  72. # Expand the warning window to show certificates expiring within a year from now
  73. - openshift_cert_expiry: warning_days=365
  74. # Show expired, soon to expire (now + 30 days), and all other certificates examined
  75. - openshift_cert_expiry: show_all=true
  76. '''
  77. class FakeOpenSSLCertificate(object):
  78. """This provides a rough mock of what you get from
  79. `OpenSSL.crypto.load_certificate()`. This is a work-around for
  80. platforms missing the Python OpenSSL library.
  81. """
  82. def __init__(self, cert_string):
  83. """`cert_string` is a certificate in the form you get from running a
  84. .crt through 'openssl x509 -in CERT.cert -text'"""
  85. self.cert_string = cert_string
  86. self.serial = None
  87. self.subject = None
  88. self.extensions = []
  89. self.not_after = None
  90. self._parse_cert()
  91. def _parse_cert(self):
  92. """Manually parse the certificate line by line"""
  93. self.extensions = []
  94. PARSING_ALT_NAMES = False
  95. for line in self.cert_string.split('\n'):
  96. l = line.strip()
  97. if PARSING_ALT_NAMES:
  98. # We're parsing a 'Subject Alternative Name' line
  99. self.extensions.append(
  100. FakeOpenSSLCertificateSANExtension(l))
  101. PARSING_ALT_NAMES = False
  102. continue
  103. # parse out the bits that we can
  104. if l.startswith('Serial Number:'):
  105. # Serial Number: 11 (0xb)
  106. # => 11
  107. self.serial = int(l.split()[-2])
  108. elif l.startswith('Not After :'):
  109. # Not After : Feb 7 18:19:35 2019 GMT
  110. # => strptime(str, '%b %d %H:%M:%S %Y %Z')
  111. # => strftime('%Y%m%d%H%M%SZ')
  112. # => 20190207181935Z
  113. not_after_raw = l.partition(' : ')[-1]
  114. # Last item: ('Not After', ' : ', 'Feb 7 18:19:35 2019 GMT')
  115. not_after_parsed = datetime.datetime.strptime(not_after_raw, '%b %d %H:%M:%S %Y %Z')
  116. self.not_after = not_after_parsed.strftime('%Y%m%d%H%M%SZ')
  117. elif l.startswith('X509v3 Subject Alternative Name:'):
  118. PARSING_ALT_NAMES = True
  119. continue
  120. elif l.startswith('Subject:'):
  121. # O=system:nodes, CN=system:node:m01.example.com
  122. self.subject = FakeOpenSSLCertificateSubjects(l.partition(': ')[-1])
  123. def get_serial_number(self):
  124. """Return the serial number of the cert"""
  125. return self.serial
  126. def get_subject(self):
  127. """Subjects must implement get_components() and return dicts or
  128. tuples. An 'openssl x509 -in CERT.cert -text' with 'Subject':
  129. Subject: Subject: O=system:nodes, CN=system:node:m01.example.com
  130. might return: [('O=system', 'nodes'), ('CN=system', 'node:m01.example.com')]
  131. """
  132. return self.subject
  133. def get_extension(self, i):
  134. """Extensions must implement get_short_name() and return the string
  135. 'subjectAltName'"""
  136. return self.extensions[i]
  137. def get_notAfter(self):
  138. """Returns a date stamp as a string in the form
  139. '20180922170439Z'. strptime the result with format param:
  140. '%Y%m%d%H%M%SZ'."""
  141. return self.not_after
  142. class FakeOpenSSLCertificateSANExtension(object): # pylint: disable=too-few-public-methods
  143. """Mocks what happens when `get_extension` is called on a certificate
  144. object"""
  145. def __init__(self, san_string):
  146. """With `san_string` as you get from:
  147. $ openssl x509 -in certificate.crt -text
  148. """
  149. self.san_string = san_string
  150. self.short_name = 'subjectAltName'
  151. def get_short_name(self):
  152. """Return the 'type' of this extension. It's always the same though
  153. because we only care about subjectAltName's"""
  154. return self.short_name
  155. def __str__(self):
  156. """Return this extension and the value as a simple string"""
  157. return self.san_string
  158. # pylint: disable=too-few-public-methods
  159. class FakeOpenSSLCertificateSubjects(object):
  160. """Mocks what happens when `get_subject` is called on a certificate
  161. object"""
  162. def __init__(self, subject_string):
  163. """With `subject_string` as you get from:
  164. $ openssl x509 -in certificate.crt -text
  165. """
  166. self.subjects = []
  167. for s in subject_string.split(', '):
  168. name, _, value = s.partition('=')
  169. self.subjects.append((name, value))
  170. def get_components(self):
  171. """Returns a list of tuples"""
  172. return self.subjects
  173. # We only need this for one thing, we don't care if it doesn't have
  174. # that many public methods
  175. #
  176. # pylint: disable=too-few-public-methods
  177. class FakeSecHead(object):
  178. """etcd does not begin their config file with an opening [section] as
  179. required by the Python ConfigParser module. We hack around it by
  180. slipping one in ourselves prior to parsing.
  181. Source: Alex Martelli - http://stackoverflow.com/a/2819788/6490583
  182. """
  183. def __init__(self, fp):
  184. self.fp = fp
  185. self.sechead = '[ETCD]\n'
  186. def readline(self):
  187. """Make this look like a file-type object"""
  188. if self.sechead:
  189. try:
  190. return self.sechead
  191. finally:
  192. self.sechead = None
  193. else:
  194. return self.fp.readline()
  195. ######################################################################
  196. def filter_paths(path_list):
  197. """`path_list` - A list of file paths to check. Only files which exist
  198. will be returned
  199. """
  200. return [p for p in path_list if os.path.exists(os.path.realpath(p))]
  201. # pylint: disable=too-many-locals,too-many-branches
  202. #
  203. # TODO: Break this function down into smaller chunks
  204. def load_and_handle_cert(cert_string, now, base64decode=False, ans_module=None):
  205. """Load a certificate, split off the good parts, and return some
  206. useful data
  207. Params:
  208. - `cert_string` (string) - a certificate loaded into a string object
  209. - `now` (datetime) - a datetime object of the time to calculate the certificate 'time_remaining' against
  210. - `base64decode` (bool) - run .decode('base64') on the input?
  211. - `ans_module` (AnsibleModule) - The AnsibleModule object for this module (so we can raise errors)
  212. Returns:
  213. A tuple of the form:
  214. (cert_subject, cert_expiry_date, time_remaining, cert_serial_number)
  215. """
  216. if base64decode:
  217. _cert_string = cert_string.decode('base-64')
  218. else:
  219. _cert_string = cert_string
  220. # Disable this. We 'redefine' the type because we are working
  221. # around a missing library on the target host.
  222. #
  223. # pylint: disable=redefined-variable-type
  224. if 'OpenSSL.crypto' in sys.modules:
  225. # No work-around required
  226. cert_loaded = OpenSSL.crypto.load_certificate(
  227. OpenSSL.crypto.FILETYPE_PEM, _cert_string)
  228. else:
  229. # Missing library, work-around required. We need to write the
  230. # cert out to disk temporarily so we can run the 'openssl'
  231. # command on it to decode it
  232. _, path = tempfile.mkstemp()
  233. with io.open(path, 'w') as fp:
  234. fp.write(six.u(_cert_string))
  235. fp.flush()
  236. cmd = 'openssl x509 -in {} -text'.format(path)
  237. try:
  238. openssl_decoded = subprocess.Popen(cmd.split(),
  239. stdout=subprocess.PIPE)
  240. except OSError:
  241. ans_module.fail_json(msg="Error: The 'OpenSSL' python library and CLI command were not found on the target host. Unable to parse any certificates. This host will not be included in generated reports.")
  242. else:
  243. openssl_decoded = openssl_decoded.communicate()[0]
  244. cert_loaded = FakeOpenSSLCertificate(openssl_decoded)
  245. finally:
  246. os.remove(path)
  247. ######################################################################
  248. # Read all possible names from the cert
  249. cert_subjects = []
  250. for name, value in cert_loaded.get_subject().get_components():
  251. cert_subjects.append('{}:{}'.format(name, value))
  252. # To read SANs from a cert we must read the subjectAltName
  253. # extension from the X509 Object. What makes this more difficult
  254. # is that pyOpenSSL does not give extensions as a list, nor does
  255. # it provide a count of all loaded extensions.
  256. #
  257. # Rather, extensions are REQUESTED by index. We must iterate over
  258. # all extensions until we find the one called 'subjectAltName'. If
  259. # we don't find that extension we'll eventually request an
  260. # extension at an index where no extension exists (IndexError is
  261. # raised). When that happens we know that the cert has no SANs so
  262. # we break out of the loop.
  263. i = 0
  264. checked_all_extensions = False
  265. while not checked_all_extensions:
  266. try:
  267. # Read the extension at index 'i'
  268. ext = cert_loaded.get_extension(i)
  269. except IndexError:
  270. # We tried to read an extension but it isn't there, that
  271. # means we ran out of extensions to check. Abort
  272. san = None
  273. checked_all_extensions = True
  274. else:
  275. # We were able to load the extension at index 'i'
  276. if ext.get_short_name() == 'subjectAltName':
  277. san = ext
  278. checked_all_extensions = True
  279. else:
  280. # Try reading the next extension
  281. i += 1
  282. if san is not None:
  283. # The X509Extension object for subjectAltName prints as a
  284. # string with the alt names separated by a comma and a
  285. # space. Split the string by ', ' and then add our new names
  286. # to the list of existing names
  287. cert_subjects.extend(str(san).split(', '))
  288. cert_subject = ', '.join(cert_subjects)
  289. ######################################################################
  290. # Grab the expiration date
  291. cert_expiry_date = datetime.datetime.strptime(
  292. cert_loaded.get_notAfter(),
  293. # example get_notAfter() => 20180922170439Z
  294. '%Y%m%d%H%M%SZ')
  295. time_remaining = cert_expiry_date - now
  296. return (cert_subject, cert_expiry_date, time_remaining, cert_loaded.get_serial_number())
  297. def classify_cert(cert_meta, now, time_remaining, expire_window, cert_list):
  298. """Given metadata about a certificate under examination, classify it
  299. into one of three categories, 'ok', 'warning', and 'expired'.
  300. Params:
  301. - `cert_meta` dict - A dict with certificate metadata. Required fields
  302. include: 'cert_cn', 'path', 'expiry', 'days_remaining', 'health'.
  303. - `now` (datetime) - a datetime object of the time to calculate the certificate 'time_remaining' against
  304. - `time_remaining` (datetime.timedelta) - a timedelta for how long until the cert expires
  305. - `expire_window` (datetime.timedelta) - a timedelta for how long the warning window is
  306. - `cert_list` list - A list to shove the classified cert into
  307. Return:
  308. - `cert_list` - The updated list of classified certificates
  309. """
  310. expiry_str = str(cert_meta['expiry'])
  311. # Categorization
  312. if cert_meta['expiry'] < now:
  313. # This already expired, must NOTIFY
  314. cert_meta['health'] = 'expired'
  315. elif time_remaining < expire_window:
  316. # WARN about this upcoming expirations
  317. cert_meta['health'] = 'warning'
  318. else:
  319. # Not expired or about to expire
  320. cert_meta['health'] = 'ok'
  321. cert_meta['expiry'] = expiry_str
  322. cert_meta['serial_hex'] = hex(int(cert_meta['serial']))
  323. cert_list.append(cert_meta)
  324. return cert_list
  325. def tabulate_summary(certificates, kubeconfigs, etcd_certs, router_certs, registry_certs):
  326. """Calculate the summary text for when the module finishes
  327. running. This includes counts of each classification and what have
  328. you.
  329. Params:
  330. - `certificates` (list of dicts) - Processed `expire_check_result`
  331. dicts with filled in `health` keys for system certificates.
  332. - `kubeconfigs` - as above for kubeconfigs
  333. - `etcd_certs` - as above for etcd certs
  334. Return:
  335. - `summary_results` (dict) - Counts of each cert type classification
  336. and total items examined.
  337. """
  338. items = certificates + kubeconfigs + etcd_certs + router_certs + registry_certs
  339. summary_results = {
  340. 'system_certificates': len(certificates),
  341. 'kubeconfig_certificates': len(kubeconfigs),
  342. 'etcd_certificates': len(etcd_certs),
  343. 'router_certs': len(router_certs),
  344. 'registry_certs': len(registry_certs),
  345. 'total': len(items),
  346. 'ok': 0,
  347. 'warning': 0,
  348. 'expired': 0
  349. }
  350. summary_results['expired'] = len([c for c in items if c['health'] == 'expired'])
  351. summary_results['warning'] = len([c for c in items if c['health'] == 'warning'])
  352. summary_results['ok'] = len([c for c in items if c['health'] == 'ok'])
  353. return summary_results
  354. ######################################################################
  355. # This is our module MAIN function after all, so there's bound to be a
  356. # lot of code bundled up into one block
  357. #
  358. # Reason: These checks are disabled because the issue was introduced
  359. # during a period where the pylint checks weren't enabled for this file
  360. # Status: temporarily disabled pending future refactoring
  361. # pylint: disable=too-many-locals,too-many-statements,too-many-branches
  362. def main():
  363. """This module examines certificates (in various forms) which compose
  364. an OpenShift Container Platform cluster
  365. """
  366. module = AnsibleModule(
  367. argument_spec=dict(
  368. config_base=dict(
  369. required=False,
  370. default="/etc/origin",
  371. type='str'),
  372. warning_days=dict(
  373. required=False,
  374. default=30,
  375. type='int'),
  376. show_all=dict(
  377. required=False,
  378. default=False,
  379. type='bool')
  380. ),
  381. supports_check_mode=True,
  382. )
  383. # Basic scaffolding for OpenShift specific certs
  384. openshift_base_config_path = module.params['config_base']
  385. openshift_master_config_path = os.path.normpath(
  386. os.path.join(openshift_base_config_path, "master/master-config.yaml")
  387. )
  388. openshift_node_config_path = os.path.normpath(
  389. os.path.join(openshift_base_config_path, "node/node-config.yaml")
  390. )
  391. openshift_cert_check_paths = [
  392. openshift_master_config_path,
  393. openshift_node_config_path,
  394. ]
  395. # Paths for Kubeconfigs. Additional kubeconfigs are conditionally
  396. # checked later in the code
  397. master_kube_configs = ['admin', 'openshift-master',
  398. 'openshift-node', 'openshift-router',
  399. 'openshift-registry']
  400. kubeconfig_paths = []
  401. for m_kube_config in master_kube_configs:
  402. kubeconfig_paths.append(
  403. os.path.normpath(
  404. os.path.join(openshift_base_config_path, "master/%s.kubeconfig" % m_kube_config)
  405. )
  406. )
  407. # Validate some paths we have the ability to do ahead of time
  408. openshift_cert_check_paths = filter_paths(openshift_cert_check_paths)
  409. kubeconfig_paths = filter_paths(kubeconfig_paths)
  410. # etcd, where do you hide your certs? Used when parsing etcd.conf
  411. etcd_cert_params = [
  412. "ETCD_CA_FILE",
  413. "ETCD_CERT_FILE",
  414. "ETCD_PEER_CA_FILE",
  415. "ETCD_PEER_CERT_FILE",
  416. ]
  417. # Expiry checking stuff
  418. now = datetime.datetime.now()
  419. # todo, catch exception for invalid input and return a fail_json
  420. warning_days = int(module.params['warning_days'])
  421. expire_window = datetime.timedelta(days=warning_days)
  422. # Module stuff
  423. #
  424. # The results of our cert checking to return from the task call
  425. check_results = {}
  426. check_results['meta'] = {}
  427. check_results['meta']['warning_days'] = warning_days
  428. check_results['meta']['checked_at_time'] = str(now)
  429. check_results['meta']['warn_before_date'] = str(now + expire_window)
  430. check_results['meta']['show_all'] = str(module.params['show_all'])
  431. # All the analyzed certs accumulate here
  432. ocp_certs = []
  433. ######################################################################
  434. # Sure, why not? Let's enable check mode.
  435. if module.check_mode:
  436. check_results['ocp_certs'] = []
  437. module.exit_json(
  438. check_results=check_results,
  439. msg="Checked 0 total certificates. Expired/Warning/OK: 0/0/0. Warning window: %s days" % module.params['warning_days'],
  440. rc=0,
  441. changed=False
  442. )
  443. ######################################################################
  444. # Check for OpenShift Container Platform specific certs
  445. ######################################################################
  446. for os_cert in filter_paths(openshift_cert_check_paths):
  447. # Open up that config file and locate the cert and CA
  448. with open(os_cert, 'r') as fp:
  449. cert_meta = {}
  450. cfg = yaml.load(fp)
  451. # cert files are specified in parsed `fp` as relative to the path
  452. # of the original config file. 'master-config.yaml' with certFile
  453. # = 'foo.crt' implies that 'foo.crt' is in the same
  454. # directory. certFile = '../foo.crt' is in the parent directory.
  455. cfg_path = os.path.dirname(fp.name)
  456. cert_meta['certFile'] = os.path.join(cfg_path, cfg['servingInfo']['certFile'])
  457. cert_meta['clientCA'] = os.path.join(cfg_path, cfg['servingInfo']['clientCA'])
  458. ######################################################################
  459. # Load the certificate and the CA, parse their expiration dates into
  460. # datetime objects so we can manipulate them later
  461. for _, v in cert_meta.items():
  462. with open(v, 'r') as fp:
  463. cert = fp.read()
  464. (cert_subject,
  465. cert_expiry_date,
  466. time_remaining,
  467. cert_serial) = load_and_handle_cert(cert, now, ans_module=module)
  468. expire_check_result = {
  469. 'cert_cn': cert_subject,
  470. 'path': fp.name,
  471. 'expiry': cert_expiry_date,
  472. 'days_remaining': time_remaining.days,
  473. 'health': None,
  474. 'serial': cert_serial
  475. }
  476. classify_cert(expire_check_result, now, time_remaining, expire_window, ocp_certs)
  477. ######################################################################
  478. # /Check for OpenShift Container Platform specific certs
  479. ######################################################################
  480. ######################################################################
  481. # Check service Kubeconfigs
  482. ######################################################################
  483. kubeconfigs = []
  484. # There may be additional kubeconfigs to check, but their naming
  485. # is less predictable than the ones we've already assembled.
  486. try:
  487. # Try to read the standard 'node-config.yaml' file to check if
  488. # this host is a node.
  489. with open(openshift_node_config_path, 'r') as fp:
  490. cfg = yaml.load(fp)
  491. # OK, the config file exists, therefore this is a
  492. # node. Nodes have their own kubeconfig files to
  493. # communicate with the master API. Let's read the relative
  494. # path to that file from the node config.
  495. node_masterKubeConfig = cfg['masterKubeConfig']
  496. # As before, the path to the 'masterKubeConfig' file is
  497. # relative to `fp`
  498. cfg_path = os.path.dirname(fp.name)
  499. node_kubeconfig = os.path.join(cfg_path, node_masterKubeConfig)
  500. with open(node_kubeconfig, 'r') as fp:
  501. # Read in the nodes kubeconfig file and grab the good stuff
  502. cfg = yaml.load(fp)
  503. c = cfg['users'][0]['user']['client-certificate-data']
  504. (cert_subject,
  505. cert_expiry_date,
  506. time_remaining,
  507. cert_serial) = load_and_handle_cert(c, now, base64decode=True, ans_module=module)
  508. expire_check_result = {
  509. 'cert_cn': cert_subject,
  510. 'path': fp.name,
  511. 'expiry': cert_expiry_date,
  512. 'days_remaining': time_remaining.days,
  513. 'health': None,
  514. 'serial': cert_serial
  515. }
  516. classify_cert(expire_check_result, now, time_remaining, expire_window, kubeconfigs)
  517. except IOError:
  518. # This is not a node
  519. pass
  520. for kube in filter_paths(kubeconfig_paths):
  521. with open(kube, 'r') as fp:
  522. # TODO: Maybe consider catching exceptions here?
  523. cfg = yaml.load(fp)
  524. # Per conversation, "the kubeconfigs you care about:
  525. # admin, router, registry should all be single
  526. # value". Following that advice we only grab the data for
  527. # the user at index 0 in the 'users' list. There should
  528. # not be more than one user.
  529. c = cfg['users'][0]['user']['client-certificate-data']
  530. (cert_subject,
  531. cert_expiry_date,
  532. time_remaining,
  533. cert_serial) = load_and_handle_cert(c, now, base64decode=True, ans_module=module)
  534. expire_check_result = {
  535. 'cert_cn': cert_subject,
  536. 'path': fp.name,
  537. 'expiry': cert_expiry_date,
  538. 'days_remaining': time_remaining.days,
  539. 'health': None,
  540. 'serial': cert_serial
  541. }
  542. classify_cert(expire_check_result, now, time_remaining, expire_window, kubeconfigs)
  543. ######################################################################
  544. # /Check service Kubeconfigs
  545. ######################################################################
  546. ######################################################################
  547. # Check etcd certs
  548. #
  549. # Two things to check: 'external' etcd, and embedded etcd.
  550. ######################################################################
  551. # FIRST: The 'external' etcd
  552. #
  553. # Some values may be duplicated, make this a set for now so we
  554. # unique them all
  555. etcd_certs_to_check = set([])
  556. etcd_certs = []
  557. etcd_cert_params.append('dne')
  558. try:
  559. with open('/etc/etcd/etcd.conf', 'r') as fp:
  560. etcd_config = configparser.ConfigParser()
  561. # Reason: This check is disabled because the issue was introduced
  562. # during a period where the pylint checks weren't enabled for this file
  563. # Status: temporarily disabled pending future refactoring
  564. # pylint: disable=deprecated-method
  565. etcd_config.readfp(FakeSecHead(fp))
  566. for param in etcd_cert_params:
  567. try:
  568. etcd_certs_to_check.add(etcd_config.get('ETCD', param))
  569. except configparser.NoOptionError:
  570. # That parameter does not exist, oh well...
  571. pass
  572. except IOError:
  573. # No etcd to see here, move along
  574. pass
  575. for etcd_cert in filter_paths(etcd_certs_to_check):
  576. with open(etcd_cert, 'r') as fp:
  577. c = fp.read()
  578. (cert_subject,
  579. cert_expiry_date,
  580. time_remaining,
  581. cert_serial) = load_and_handle_cert(c, now, ans_module=module)
  582. expire_check_result = {
  583. 'cert_cn': cert_subject,
  584. 'path': fp.name,
  585. 'expiry': cert_expiry_date,
  586. 'days_remaining': time_remaining.days,
  587. 'health': None,
  588. 'serial': cert_serial
  589. }
  590. classify_cert(expire_check_result, now, time_remaining, expire_window, etcd_certs)
  591. ######################################################################
  592. # Now the embedded etcd
  593. ######################################################################
  594. try:
  595. with open('/etc/origin/master/master-config.yaml', 'r') as fp:
  596. cfg = yaml.load(fp)
  597. except IOError:
  598. # Not present
  599. pass
  600. else:
  601. if cfg.get('etcdConfig', {}).get('servingInfo', {}).get('certFile', None) is not None:
  602. # This is embedded
  603. etcd_crt_name = cfg['etcdConfig']['servingInfo']['certFile']
  604. else:
  605. # Not embedded
  606. etcd_crt_name = None
  607. if etcd_crt_name is not None:
  608. # etcd_crt_name is relative to the location of the
  609. # master-config.yaml file
  610. cfg_path = os.path.dirname(fp.name)
  611. etcd_cert = os.path.join(cfg_path, etcd_crt_name)
  612. with open(etcd_cert, 'r') as etcd_fp:
  613. (cert_subject,
  614. cert_expiry_date,
  615. time_remaining,
  616. cert_serial) = load_and_handle_cert(etcd_fp.read(), now, ans_module=module)
  617. expire_check_result = {
  618. 'cert_cn': cert_subject,
  619. 'path': etcd_fp.name,
  620. 'expiry': cert_expiry_date,
  621. 'days_remaining': time_remaining.days,
  622. 'health': None,
  623. 'serial': cert_serial
  624. }
  625. classify_cert(expire_check_result, now, time_remaining, expire_window, etcd_certs)
  626. ######################################################################
  627. # /Check etcd certs
  628. ######################################################################
  629. ######################################################################
  630. # Check router/registry certs
  631. #
  632. # These are saved as secrets in etcd. That means that we can not
  633. # simply read a file to grab the data. Instead we're going to
  634. # subprocess out to the 'oc get' command. On non-masters this
  635. # command will fail, that is expected so we catch that exception.
  636. ######################################################################
  637. router_certs = []
  638. registry_certs = []
  639. ######################################################################
  640. # First the router certs
  641. try:
  642. router_secrets_raw = subprocess.Popen('oc get -n default secret router-certs -o yaml'.split(),
  643. stdout=subprocess.PIPE)
  644. router_ds = yaml.load(router_secrets_raw.communicate()[0])
  645. router_c = router_ds['data']['tls.crt']
  646. router_path = router_ds['metadata']['selfLink']
  647. except TypeError:
  648. # YAML couldn't load the result, this is not a master
  649. pass
  650. except OSError:
  651. # The OC command doesn't exist here. Move along.
  652. pass
  653. else:
  654. (cert_subject,
  655. cert_expiry_date,
  656. time_remaining,
  657. cert_serial) = load_and_handle_cert(router_c, now, base64decode=True, ans_module=module)
  658. expire_check_result = {
  659. 'cert_cn': cert_subject,
  660. 'path': router_path,
  661. 'expiry': cert_expiry_date,
  662. 'days_remaining': time_remaining.days,
  663. 'health': None,
  664. 'serial': cert_serial
  665. }
  666. classify_cert(expire_check_result, now, time_remaining, expire_window, router_certs)
  667. ######################################################################
  668. # Now for registry
  669. try:
  670. registry_secrets_raw = subprocess.Popen('oc get -n default secret registry-certificates -o yaml'.split(),
  671. stdout=subprocess.PIPE)
  672. registry_ds = yaml.load(registry_secrets_raw.communicate()[0])
  673. registry_c = registry_ds['data']['registry.crt']
  674. registry_path = registry_ds['metadata']['selfLink']
  675. except TypeError:
  676. # YAML couldn't load the result, this is not a master
  677. pass
  678. except OSError:
  679. # The OC command doesn't exist here. Move along.
  680. pass
  681. else:
  682. (cert_subject,
  683. cert_expiry_date,
  684. time_remaining,
  685. cert_serial) = load_and_handle_cert(registry_c, now, base64decode=True, ans_module=module)
  686. expire_check_result = {
  687. 'cert_cn': cert_subject,
  688. 'path': registry_path,
  689. 'expiry': cert_expiry_date,
  690. 'days_remaining': time_remaining.days,
  691. 'health': None,
  692. 'serial': cert_serial
  693. }
  694. classify_cert(expire_check_result, now, time_remaining, expire_window, registry_certs)
  695. ######################################################################
  696. # /Check router/registry certs
  697. ######################################################################
  698. res = tabulate_summary(ocp_certs, kubeconfigs, etcd_certs, router_certs, registry_certs)
  699. msg = "Checked {count} total certificates. Expired/Warning/OK: {exp}/{warn}/{ok}. Warning window: {window} days".format(
  700. count=res['total'],
  701. exp=res['expired'],
  702. warn=res['warning'],
  703. ok=res['ok'],
  704. window=int(module.params['warning_days']),
  705. )
  706. # By default we only return detailed information about expired or
  707. # warning certificates. If show_all is true then we will print all
  708. # the certificates examined.
  709. if not module.params['show_all']:
  710. check_results['ocp_certs'] = [crt for crt in ocp_certs if crt['health'] in ['expired', 'warning']]
  711. check_results['kubeconfigs'] = [crt for crt in kubeconfigs if crt['health'] in ['expired', 'warning']]
  712. check_results['etcd'] = [crt for crt in etcd_certs if crt['health'] in ['expired', 'warning']]
  713. check_results['registry'] = [crt for crt in registry_certs if crt['health'] in ['expired', 'warning']]
  714. check_results['router'] = [crt for crt in router_certs if crt['health'] in ['expired', 'warning']]
  715. else:
  716. check_results['ocp_certs'] = ocp_certs
  717. check_results['kubeconfigs'] = kubeconfigs
  718. check_results['etcd'] = etcd_certs
  719. check_results['registry'] = registry_certs
  720. check_results['router'] = router_certs
  721. # Sort the final results to report in order of ascending safety
  722. # time. That is to say, the certificates which will expire sooner
  723. # will be at the front of the list and certificates which will
  724. # expire later are at the end. Router and registry certs should be
  725. # limited to just 1 result, so don't bother sorting those.
  726. def cert_key(item):
  727. ''' return the days_remaining key '''
  728. return item['days_remaining']
  729. check_results['ocp_certs'] = sorted(check_results['ocp_certs'], key=cert_key)
  730. check_results['kubeconfigs'] = sorted(check_results['kubeconfigs'], key=cert_key)
  731. check_results['etcd'] = sorted(check_results['etcd'], key=cert_key)
  732. # This module will never change anything, but we might want to
  733. # change the return code parameter if there is some catastrophic
  734. # error we noticed earlier
  735. module.exit_json(
  736. check_results=check_results,
  737. summary=res,
  738. msg=msg,
  739. rc=0,
  740. changed=False
  741. )
  742. ######################################################################
  743. # It's just the way we do things in Ansible. So disable this warning
  744. #
  745. # pylint: disable=wrong-import-position,import-error
  746. from ansible.module_utils.basic import AnsibleModule # noqa: E402
  747. if __name__ == '__main__':
  748. main()