Bläddra i källkod

Merge pull request #2549 from tbielawa/check-cert-expiry

Cert Expiry for Cluster Ops
Andrew Butcher 8 år sedan
förälder
incheckning
1e943532d3

+ 250 - 0
roles/openshift_certificate_expiry/README.md

@@ -0,0 +1,250 @@
+OpenShift Certificate Expiration Checker
+========================================
+
+OpenShift certificate expiration checking. Be warned of certificates
+expiring within a configurable window of days, and notified of
+certificates which have already expired. Certificates examined
+include:
+
+* Master/Node Service Certificates
+* Router/Registry Service Certificates from etcd secrets
+* Master/Node/Router/Registry/Admin `kubeconfig`s
+* Etcd certificates
+
+This role pairs well with the redeploy certificates playbook:
+
+* [Redeploying Certificates Documentation](https://docs.openshift.com/container-platform/latest/install_config/redeploying_certificates.html)
+
+Just like the redeploying certificates playbook, this role is intended
+to be used with an inventory that is representative of the
+cluster. For best results run `ansible-playbook` with the `-v` option.
+
+
+
+Role Variables
+--------------
+
+Core variables in this role:
+
+| Name                                                  | Default value                  | Description                                                           |
+|-------------------------------------------------------|--------------------------------|-----------------------------------------------------------------------|
+| `openshift_certificate_expiry_config_base`            | `/etc/origin`                  | Base openshift config directory                                       |
+| `openshift_certificate_expiry_warning_days`           | `30`                           | Flag certificates which will expire in this many days from now        |
+| `openshift_certificate_expiry_show_all`               | `no`                           | Include healthy (non-expired and non-warning) certificates in results |
+
+Optional report/result saving variables in this role:
+
+| Name                                                  | Default value                  | Description                                                           |
+|-------------------------------------------------------|--------------------------------|-----------------------------------------------------------------------|
+| `openshift_certificate_expiry_generate_html_report`   | `no`                           | Generate an HTML report of the expiry check results                   |
+| `openshift_certificate_expiry_html_report_path`       | `/tmp/cert-expiry-report.html` | The full path to save the HTML report as                              |
+| `openshift_certificate_expiry_save_json_results`      | `no`                           | Save expiry check results as a json file                              |
+| `openshift_certificate_expiry_json_results_path`      | `/tmp/cert-expiry-report.json` | The full path to save the json report as                              |
+
+
+Example Playbook
+----------------
+
+Default behavior:
+
+```yaml
+---
+- name: Check cert expirys
+  hosts: nodes:masters:etcd
+  become: yes
+  gather_facts: no
+  roles:
+    - role: openshift_certificate_expiry
+```
+
+Generate HTML and JSON artifacts in their default paths:
+
+```yaml
+---
+- name: Check cert expirys
+  hosts: nodes:masters:etcd
+  become: yes
+  gather_facts: no
+  vars:
+    openshift_certificate_expiry_generate_html_report: yes
+    openshift_certificate_expiry_save_json_results: yes
+  roles:
+    - role: openshift_certificate_expiry
+```
+
+Change the expiration warning window to 1500 days (good for testing
+the module out):
+
+```yaml
+---
+- name: Check cert expirys
+  hosts: nodes:masters:etcd
+  become: yes
+  gather_facts: no
+  vars:
+    openshift_certificate_expiry_warning_days: 1500
+  roles:
+    - role: openshift_certificate_expiry
+```
+
+Change the expiration warning window to 1500 days (good for testing
+the module out) and save the results as a JSON file:
+
+```yaml
+---
+- name: Check cert expirys
+  hosts: nodes:masters:etcd
+  become: yes
+  gather_facts: no
+  vars:
+    openshift_certificate_expiry_warning_days: 1500
+    openshift_certificate_expiry_save_json_results: yes
+  roles:
+    - role: openshift_certificate_expiry
+```
+
+
+JSON Output
+-----------
+
+There are two top-level keys in the saved JSON results, `data` and
+`summary`.
+
+The `data` key is a hash where the keys are the names of each host
+examined and the values are the check results for each respective
+host.
+
+The `summary` key is a hash that summarizes the number of certificates
+expiring within the configured warning window and the number of
+already expired certificates.
+
+The example below is abbreviated to save space:
+
+```json
+{
+    "data": {
+        "192.168.124.148": {
+            "etcd": [
+                {
+                    "cert_cn": "CN:etcd-signer@1474563722",
+                    "days_remaining": 350,
+                    "expiry": "2017-09-22 17:02:25",
+                    "health": "warning",
+                    "path": "/etc/etcd/ca.crt"
+                },
+            ],
+            "kubeconfigs": [
+                {
+                    "cert_cn": "O:system:nodes, CN:system:node:m01.example.com",
+                    "days_remaining": 715,
+                    "expiry": "2018-09-22 17:08:57",
+                    "health": "warning",
+                    "path": "/etc/origin/node/system:node:m01.example.com.kubeconfig"
+                },
+                {
+                    "cert_cn": "O:system:cluster-admins, CN:system:admin",
+                    "days_remaining": 715,
+                    "expiry": "2018-09-22 17:04:40",
+                    "health": "warning",
+                    "path": "/etc/origin/master/admin.kubeconfig"
+                }
+            ],
+            "meta": {
+                "checked_at_time": "2016-10-07 15:26:47.608192",
+                "show_all": "True",
+                "warn_before_date": "2020-11-15 15:26:47.608192",
+                "warning_days": 1500
+            },
+            "ocp_certs": [
+                {
+                    "cert_cn": "CN:172.30.0.1, DNS:kubernetes, DNS:kubernetes.default, DNS:kubernetes.default.svc, DNS:kubernetes.default.svc.cluster.local, DNS:m01.example.com, DNS:openshift, DNS:openshift.default, DNS:openshift.default.svc, DNS:openshift.default.svc.cluster.local, DNS:172.30.0.1, DNS:192.168.124.148, IP Address:172.30.0.1, IP Address:192.168.124.148",
+                    "days_remaining": 715,
+                    "expiry": "2018-09-22 17:04:39",
+                    "health": "warning",
+                    "path": "/etc/origin/master/master.server.crt"
+                },
+                {
+                    "cert_cn": "CN:openshift-signer@1474563878",
+                    "days_remaining": 1810,
+                    "expiry": "2021-09-21 17:04:38",
+                    "health": "ok",
+                    "path": "/etc/origin/node/ca.crt"
+                }
+            ],
+            "registry": [
+                {
+                    "cert_cn": "CN:172.30.101.81, DNS:docker-registry-default.router.default.svc.cluster.local, DNS:docker-registry.default.svc.cluster.local, DNS:172.30.101.81, IP Address:172.30.101.81",
+                    "days_remaining": 728,
+                    "expiry": "2018-10-05 18:54:29",
+                    "health": "warning",
+                    "path": "/api/v1/namespaces/default/secrets/registry-certificates"
+                }
+            ],
+            "router": [
+                {
+                    "cert_cn": "CN:router.default.svc, DNS:router.default.svc, DNS:router.default.svc.cluster.local",
+                    "days_remaining": 715,
+                    "expiry": "2018-09-22 17:48:23",
+                    "health": "warning",
+                    "path": "/api/v1/namespaces/default/secrets/router-certs"
+                }
+            ]
+        }
+    },
+    "summary": {
+        "warning": 6,
+        "expired": 0
+    }
+}
+```
+
+The `summary` from the json data can be easily checked for
+warnings/expirations using a variety of command-line tools.
+
+For exampe, using `grep` we can look for the word `summary` and print
+out the 2 lines **after** the match (`-A2`):
+
+```
+$ grep -A2 summary /tmp/cert-expiry-report.json
+    "summary": {
+        "warning": 16,
+        "expired": 0
+```
+
+If available, the [jq](https://stedolan.github.io/jq/) tool can also
+be used to pick out specific values. Example 1 and 2 below show how to
+select just one value, either `warning` or `expired`. Example 3 shows
+how to select both values at once:
+
+```
+$ jq '.summary.warning' /tmp/cert-expiry-report.json
+16
+$ jq '.summary.expired' /tmp/cert-expiry-report.json
+0
+$ jq '.summary.warning,.summary.expired' /tmp/cert-expiry-report.json
+16
+0
+```
+
+
+Requirements
+------------
+
+* None
+
+
+Dependencies
+------------
+
+* None
+
+
+License
+-------
+
+Apache License, Version 2.0
+
+Author Information
+------------------
+
+Tim Bielawa (tbielawa@redhat.com)

+ 8 - 0
roles/openshift_certificate_expiry/defaults/main.yml

@@ -0,0 +1,8 @@
+---
+openshift_certificate_expiry_config_base: "/etc/origin"
+openshift_certificate_expiry_warning_days: 30
+openshift_certificate_expiry_show_all: no
+openshift_certificate_expiry_generate_html_report: no
+openshift_certificate_expiry_html_report_path: "/tmp/cert-expiry-report.html"
+openshift_certificate_expiry_save_json_results: no
+openshift_certificate_expiry_json_results_path: "/tmp/cert-expiry-report.json"

+ 88 - 0
roles/openshift_certificate_expiry/filter_plugins/oo_cert_expiry.py

@@ -0,0 +1,88 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# vim: expandtab:tabstop=4:shiftwidth=4
+"""
+Custom filters for use in openshift-ansible
+"""
+
+from ansible import errors
+from collections import Mapping
+from distutils.util import strtobool
+from distutils.version import LooseVersion
+from operator import itemgetter
+import OpenSSL.crypto
+import os
+import pdb
+import pkg_resources
+import re
+import json
+import yaml
+from ansible.parsing.yaml.dumper import AnsibleDumper
+from urlparse import urlparse
+
+try:
+    # ansible-2.2
+    # ansible.utils.unicode.to_unicode is deprecated in ansible-2.2,
+    # ansible.module_utils._text.to_text should be used instead.
+    from ansible.module_utils._text import to_text
+except ImportError:
+    # ansible-2.1
+    from ansible.utils.unicode import to_unicode as to_text
+
+# Disabling too-many-public-methods, since filter methods are necessarily
+# public
+# pylint: disable=too-many-public-methods
+class FilterModule(object):
+    """ Custom ansible filters """
+
+    @staticmethod
+    def oo_cert_expiry_results_to_json(hostvars, play_hosts):
+        """Takes results (`hostvars`) from the openshift_cert_expiry role
+check and serializes them into proper machine-readable JSON
+output. This filter parameter **MUST** be the playbook `hostvars`
+variable. The `play_hosts` parameter is so we know what to loop over
+when we're extrating the values.
+
+Returns:
+
+Results are collected into two top-level keys under the `json_results`
+dict:
+
+* `json_results.data` [dict] - Each individual host check result, keys are hostnames
+* `json_results.summary` [dict] - Summary of number of `warning` and `expired`
+certificates
+
+Example playbook usage:
+
+  - name: Generate expiration results JSON
+    become: no
+    run_once: yes
+    delegate_to: localhost
+    when: "{{ openshift_certificate_expiry_save_json_results|bool }}"
+    copy:
+      content: "{{ hostvars|oo_cert_expiry_results_to_json() }}"
+      dest: "{{ openshift_certificate_expiry_json_results_path }}"
+
+        """
+        json_result = {
+            'data': {},
+            'summary': {},
+        }
+
+        for host in play_hosts:
+            json_result['data'][host] = hostvars[host]['check_results']['check_results']
+
+        total_warnings = sum([hostvars[h]['check_results']['summary']['warning'] for h in play_hosts])
+        total_expired = sum([hostvars[h]['check_results']['summary']['expired'] for h in play_hosts])
+
+        json_result['summary']['warning'] = total_warnings
+        json_result['summary']['expired'] = total_expired
+
+        return json_result
+
+
+    def filters(self):
+        """ returns a mapping of filters to methods """
+        return {
+            "oo_cert_expiry_results_to_json": self.oo_cert_expiry_results_to_json,
+        }

+ 637 - 0
roles/openshift_certificate_expiry/library/openshift_cert_expiry.py

@@ -0,0 +1,637 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# pylint: disable=line-too-long,invalid-name
+
+"""For details on this module see DOCUMENTATION (below)"""
+
+# router/registry cert grabbing
+import subprocess
+# etcd config file
+import ConfigParser
+# Expiration parsing
+import datetime
+# File path stuff
+import os
+# Config file parsing
+import yaml
+# Certificate loading
+import OpenSSL.crypto
+
+DOCUMENTATION = '''
+---
+module: openshift_cert_expiry
+short_description: Check OpenShift Container Platform (OCP) and Kube certificate expirations on a cluster
+description:
+  - The M(openshift_cert_expiry) module has two basic functions: to flag certificates which will expire in a set window of time from now, and to notify you about certificates which have already expired.
+  - When the module finishes, a summary of the examination is returned. Each certificate in the summary has a C(health) key with a value of one of the following:
+  - C(ok) - not expired, and outside of the expiration C(warning_days) window.
+  - C(warning) - not expired, but will expire between now and the C(warning_days) window.
+  - C(expired) - an expired certificate.
+  - Certificate flagging follow this logic:
+  - If the expiration date is before now then the certificate is classified as C(expired).
+  - The certificates time to live (expiration date - now) is calculated, if that time window is less than C(warning_days) the certificate is classified as C(warning).
+  - All other conditions are classified as C(ok).
+  - The following keys are ALSO present in the certificate summary:
+  - C(cert_cn) - The common name of the certificate (additional CNs present in SAN extensions are omitted)
+  - C(days_remaining) - The number of days until the certificate expires.
+  - C(expiry) - The date the certificate expires on.
+  - C(path) - The full path to the certificate on the examined host.
+version_added: "1.0"
+options:
+  config_base:
+    description:
+      - Base path to OCP system settings.
+    required: false
+    default: /etc/origin
+  warning_days:
+    description:
+      - Flag certificates which will expire in C(warning_days) days from now.
+    required: false
+    default: 30
+  show_all:
+    description:
+      - Enable this option to show analysis of ALL certificates examined by this module.
+      - By default only certificates which have expired, or will expire within the C(warning_days) window will be reported.
+    required: false
+    default: false
+
+author: "Tim Bielawa (@tbielawa) <tbielawa@redhat.com>"
+'''
+
+EXAMPLES = '''
+# Default invocation, only notify about expired certificates or certificates which will expire within 30 days from now
+- openshift_cert_expiry:
+
+# Expand the warning window to show certificates expiring within a year from now
+- openshift_cert_expiry: warning_days=365
+
+# Show expired, soon to expire (now + 30 days), and all other certificates examined
+- openshift_cert_expiry: show_all=true
+'''
+
+
+# We only need this for one thing, we don't care if it doesn't have
+# that many public methods
+#
+# pylint: disable=too-few-public-methods
+class FakeSecHead(object):
+    """etcd does not begin their config file with an opening [section] as
+required by the Python ConfigParser module. We hack around it by
+slipping one in ourselves prior to parsing.
+
+Source: Alex Martelli - http://stackoverflow.com/a/2819788/6490583
+    """
+    def __init__(self, fp):
+        self.fp = fp
+        self.sechead = '[ETCD]\n'
+
+    def readline(self):
+        """Make this look like a file-type object"""
+        if self.sechead:
+            try:
+                return self.sechead
+            finally:
+                self.sechead = None
+        else:
+            return self.fp.readline()
+
+
+######################################################################
+def filter_paths(path_list):
+    """`path_list` - A list of file paths to check. Only files which exist
+will be returned
+    """
+    return [p for p in path_list if os.path.exists(os.path.realpath(p))]
+
+
+def load_and_handle_cert(cert_string, now, base64decode=False):
+    """Load a certificate, split off the good parts, and return some
+useful data
+
+Params:
+
+- `cert_string` (string) - a certificate loaded into a string object
+- `now` (datetime) - a datetime object of the time to calculate the certificate 'time_remaining' against
+- `base64decode` (bool) - run .decode('base64') on the input?
+
+Returns:
+A 3-tuple of the form: (certificate_common_name, certificate_expiry_date, certificate_time_remaining)
+
+    """
+    if base64decode:
+        _cert_string = cert_string.decode('base-64')
+    else:
+        _cert_string = cert_string
+
+    cert_loaded = OpenSSL.crypto.load_certificate(
+        OpenSSL.crypto.FILETYPE_PEM, _cert_string)
+
+    ######################################################################
+    # Read all possible names from the cert
+    cert_subjects = []
+    for name, value in cert_loaded.get_subject().get_components():
+        cert_subjects.append('{}:{}'.format(name, value))
+
+    # To read SANs from a cert we must read the subjectAltName
+    # extension from the X509 Object. What makes this more difficult
+    # is that pyOpenSSL does not give extensions as a list, nor does
+    # it provide a count of all loaded extensions.
+    #
+    # Rather, extensions are REQUESTED by index. We must iterate over
+    # all extensions until we find the one called 'subjectAltName'. If
+    # we don't find that extension we'll eventually request an
+    # extension at an index where no extension exists (IndexError is
+    # raised). When that happens we know that the cert has no SANs so
+    # we break out of the loop.
+    i = 0
+    checked_all_extensions = False
+    while not checked_all_extensions:
+        try:
+            # Read the extension at index 'i'
+            ext = cert_loaded.get_extension(i)
+        except IndexError:
+            # We tried to read an extension but it isn't there, that
+            # means we ran out of extensions to check. Abort
+            san = None
+            checked_all_extensions = True
+        else:
+            # We were able to load the extension at index 'i'
+            if ext.get_short_name() == 'subjectAltName':
+                san = ext
+                checked_all_extensions = True
+            else:
+                # Try reading the next extension
+                i += 1
+
+    if san is not None:
+        # The X509Extension object for subjectAltName prints as a
+        # string with the alt names separated by a comma and a
+        # space. Split the string by ', ' and then add our new names
+        # to the list of existing names
+        cert_subjects.extend(str(san).split(', '))
+
+    cert_subject = ', '.join(cert_subjects)
+    ######################################################################
+
+    # Grab the expiration date
+    cert_expiry = cert_loaded.get_notAfter()
+    cert_expiry_date = datetime.datetime.strptime(
+        cert_expiry,
+        # example get_notAfter() => 20180922170439Z
+        '%Y%m%d%H%M%SZ')
+
+    time_remaining = cert_expiry_date - now
+
+    return (cert_subject, cert_expiry_date, time_remaining)
+
+
+def classify_cert(cert_meta, now, time_remaining, expire_window, cert_list):
+    """Given metadata about a certificate under examination, classify it
+    into one of three categories, 'ok', 'warning', and 'expired'.
+
+Params:
+
+- `cert_meta` dict - A dict with certificate metadata. Required fields
+  include: 'cert_cn', 'path', 'expiry', 'days_remaining', 'health'.
+- `now` (datetime) - a datetime object of the time to calculate the certificate 'time_remaining' against
+- `time_remaining` (datetime.timedelta) - a timedelta for how long until the cert expires
+- `expire_window` (datetime.timedelta) - a timedelta for how long the warning window is
+- `cert_list` list - A list to shove the classified cert into
+
+Return:
+- `cert_list` - The updated list of classified certificates
+    """
+    expiry_str = str(cert_meta['expiry'])
+    # Categorization
+    if cert_meta['expiry'] < now:
+        # This already expired, must NOTIFY
+        cert_meta['health'] = 'expired'
+    elif time_remaining < expire_window:
+        # WARN about this upcoming expirations
+        cert_meta['health'] = 'warning'
+    else:
+        # Not expired or about to expire
+        cert_meta['health'] = 'ok'
+
+    cert_meta['expiry'] = expiry_str
+    cert_list.append(cert_meta)
+    return cert_list
+
+
+def tabulate_summary(certificates, kubeconfigs, etcd_certs, router_certs, registry_certs):
+    """Calculate the summary text for when the module finishes
+running. This includes counts of each classification and what have
+you.
+
+Params:
+
+- `certificates` (list of dicts) - Processed `expire_check_result`
+  dicts with filled in `health` keys for system certificates.
+- `kubeconfigs` - as above for kubeconfigs
+- `etcd_certs` - as above for etcd certs
+
+Return:
+
+- `summary_results` (dict) - Counts of each cert type classification
+  and total items examined.
+    """
+    items = certificates + kubeconfigs + etcd_certs + router_certs + registry_certs
+
+    summary_results = {
+        'system_certificates': len(certificates),
+        'kubeconfig_certificates': len(kubeconfigs),
+        'etcd_certificates': len(etcd_certs),
+        'router_certs': len(router_certs),
+        'registry_certs': len(registry_certs),
+        'total': len(items),
+        'ok': 0,
+        'warning': 0,
+        'expired': 0
+    }
+
+    summary_results['expired'] = len([c for c in items if c['health'] == 'expired'])
+    summary_results['warning'] = len([c for c in items if c['health'] == 'warning'])
+    summary_results['ok'] = len([c for c in items if c['health'] == 'ok'])
+
+    return summary_results
+
+
+######################################################################
+# This is our module MAIN function after all, so there's bound to be a
+# lot of code bundled up into one block
+#
+# pylint: disable=too-many-locals,too-many-locals,too-many-statements,too-many-branches
+def main():
+    """This module examines certificates (in various forms) which compose
+an OpenShift Container Platform cluster
+    """
+
+    module = AnsibleModule(
+        argument_spec=dict(
+            config_base=dict(
+                required=False,
+                default="/etc/origin",
+                type='str'),
+            warning_days=dict(
+                required=False,
+                default=30,
+                type='int'),
+            show_all=dict(
+                required=False,
+                default=False,
+                type='bool')
+        ),
+        supports_check_mode=True,
+    )
+
+    # Basic scaffolding for OpenShift specific certs
+    openshift_base_config_path = module.params['config_base']
+    openshift_master_config_path = os.path.normpath(
+        os.path.join(openshift_base_config_path, "master/master-config.yaml")
+    )
+    openshift_node_config_path = os.path.normpath(
+        os.path.join(openshift_base_config_path, "node/node-config.yaml")
+    )
+    openshift_cert_check_paths = [
+        openshift_master_config_path,
+        openshift_node_config_path,
+    ]
+
+    # Paths for Kubeconfigs. Additional kubeconfigs are conditionally
+    # checked later in the code
+    master_kube_configs = ['admin', 'openshift-master',
+                           'openshift-node', 'openshift-router',
+                           'openshift-registry']
+
+    kubeconfig_paths = []
+    for m_kube_config in master_kube_configs:
+        kubeconfig_paths.append(
+            os.path.normpath(
+                os.path.join(openshift_base_config_path, "master/%s.kubeconfig" % m_kube_config)
+            )
+        )
+
+    # Validate some paths we have the ability to do ahead of time
+    openshift_cert_check_paths = filter_paths(openshift_cert_check_paths)
+    kubeconfig_paths = filter_paths(kubeconfig_paths)
+
+    # etcd, where do you hide your certs? Used when parsing etcd.conf
+    etcd_cert_params = [
+        "ETCD_CA_FILE",
+        "ETCD_CERT_FILE",
+        "ETCD_PEER_CA_FILE",
+        "ETCD_PEER_CERT_FILE",
+    ]
+
+    # Expiry checking stuff
+    now = datetime.datetime.now()
+    # todo, catch exception for invalid input and return a fail_json
+    warning_days = int(module.params['warning_days'])
+    expire_window = datetime.timedelta(days=warning_days)
+
+    # Module stuff
+    #
+    # The results of our cert checking to return from the task call
+    check_results = {}
+    check_results['meta'] = {}
+    check_results['meta']['warning_days'] = warning_days
+    check_results['meta']['checked_at_time'] = str(now)
+    check_results['meta']['warn_before_date'] = str(now + expire_window)
+    check_results['meta']['show_all'] = str(module.params['show_all'])
+    # All the analyzed certs accumulate here
+    ocp_certs = []
+
+    ######################################################################
+    # Sure, why not? Let's enable check mode.
+    if module.check_mode:
+        check_results['ocp_certs'] = []
+        module.exit_json(
+            check_results=check_results,
+            msg="Checked 0 total certificates. Expired/Warning/OK: 0/0/0. Warning window: %s days" % module.params['warning_days'],
+            rc=0,
+            changed=False
+        )
+
+    ######################################################################
+    # Check for OpenShift Container Platform specific certs
+    ######################################################################
+    for os_cert in filter_paths(openshift_cert_check_paths):
+        # Open up that config file and locate the cert and CA
+        with open(os_cert, 'r') as fp:
+            cert_meta = {}
+            cfg = yaml.load(fp)
+            # cert files are specified in parsed `fp` as relative to the path
+            # of the original config file. 'master-config.yaml' with certFile
+            # = 'foo.crt' implies that 'foo.crt' is in the same
+            # directory. certFile = '../foo.crt' is in the parent directory.
+            cfg_path = os.path.dirname(fp.name)
+            cert_meta['certFile'] = os.path.join(cfg_path, cfg['servingInfo']['certFile'])
+            cert_meta['clientCA'] = os.path.join(cfg_path, cfg['servingInfo']['clientCA'])
+
+        ######################################################################
+        # Load the certificate and the CA, parse their expiration dates into
+        # datetime objects so we can manipulate them later
+        for _, v in cert_meta.iteritems():
+            with open(v, 'r') as fp:
+                cert = fp.read()
+                cert_subject, cert_expiry_date, time_remaining = load_and_handle_cert(cert, now)
+
+                expire_check_result = {
+                    'cert_cn': cert_subject,
+                    'path': fp.name,
+                    'expiry': cert_expiry_date,
+                    'days_remaining': time_remaining.days,
+                    'health': None,
+                }
+
+                classify_cert(expire_check_result, now, time_remaining, expire_window, ocp_certs)
+
+    ######################################################################
+    # /Check for OpenShift Container Platform specific certs
+    ######################################################################
+
+    ######################################################################
+    # Check service Kubeconfigs
+    ######################################################################
+    kubeconfigs = []
+
+    # There may be additional kubeconfigs to check, but their naming
+    # is less predictable than the ones we've already assembled.
+
+    try:
+        # Try to read the standard 'node-config.yaml' file to check if
+        # this host is a node.
+        with open(openshift_node_config_path, 'r') as fp:
+            cfg = yaml.load(fp)
+
+        # OK, the config file exists, therefore this is a
+        # node. Nodes have their own kubeconfig files to
+        # communicate with the master API. Let's read the relative
+        # path to that file from the node config.
+        node_masterKubeConfig = cfg['masterKubeConfig']
+        # As before, the path to the 'masterKubeConfig' file is
+        # relative to `fp`
+        cfg_path = os.path.dirname(fp.name)
+        node_kubeconfig = os.path.join(cfg_path, node_masterKubeConfig)
+
+        with open(node_kubeconfig, 'r') as fp:
+            # Read in the nodes kubeconfig file and grab the good stuff
+            cfg = yaml.load(fp)
+
+        c = cfg['users'][0]['user']['client-certificate-data']
+        (cert_subject,
+         cert_expiry_date,
+         time_remaining) = load_and_handle_cert(c, now, base64decode=True)
+
+        expire_check_result = {
+            'cert_cn': cert_subject,
+            'path': fp.name,
+            'expiry': cert_expiry_date,
+            'days_remaining': time_remaining.days,
+            'health': None,
+        }
+
+        classify_cert(expire_check_result, now, time_remaining, expire_window, kubeconfigs)
+    except IOError:
+        # This is not a node
+        pass
+
+    for kube in filter_paths(kubeconfig_paths):
+        with open(kube, 'r') as fp:
+            # TODO: Maybe consider catching exceptions here?
+            cfg = yaml.load(fp)
+
+        # Per conversation, "the kubeconfigs you care about:
+        # admin, router, registry should all be single
+        # value". Following that advice we only grab the data for
+        # the user at index 0 in the 'users' list. There should
+        # not be more than one user.
+        c = cfg['users'][0]['user']['client-certificate-data']
+        (cert_subject,
+         cert_expiry_date,
+         time_remaining) = load_and_handle_cert(c, now, base64decode=True)
+
+        expire_check_result = {
+            'cert_cn': cert_subject,
+            'path': fp.name,
+            'expiry': cert_expiry_date,
+            'days_remaining': time_remaining.days,
+            'health': None,
+        }
+
+        classify_cert(expire_check_result, now, time_remaining, expire_window, kubeconfigs)
+
+    ######################################################################
+    # /Check service Kubeconfigs
+    ######################################################################
+
+    ######################################################################
+    # Check etcd certs
+    ######################################################################
+    # Some values may be duplicated, make this a set for now so we
+    # unique them all
+    etcd_certs_to_check = set([])
+    etcd_certs = []
+    etcd_cert_params.append('dne')
+    try:
+        with open('/etc/etcd/etcd.conf', 'r') as fp:
+            etcd_config = ConfigParser.ConfigParser()
+            etcd_config.readfp(FakeSecHead(fp))
+
+        for param in etcd_cert_params:
+            try:
+                etcd_certs_to_check.add(etcd_config.get('ETCD', param))
+            except ConfigParser.NoOptionError:
+                # That parameter does not exist, oh well...
+                pass
+    except IOError:
+        # No etcd to see here, move along
+        pass
+
+    for etcd_cert in filter_paths(etcd_certs_to_check):
+        with open(etcd_cert, 'r') as fp:
+            c = fp.read()
+            (cert_subject,
+             cert_expiry_date,
+             time_remaining) = load_and_handle_cert(c, now)
+
+            expire_check_result = {
+                'cert_cn': cert_subject,
+                'path': fp.name,
+                'expiry': cert_expiry_date,
+                'days_remaining': time_remaining.days,
+                'health': None,
+            }
+
+            classify_cert(expire_check_result, now, time_remaining, expire_window, etcd_certs)
+
+    ######################################################################
+    # /Check etcd certs
+    ######################################################################
+
+    ######################################################################
+    # Check router/registry certs
+    #
+    # These are saved as secrets in etcd. That means that we can not
+    # simply read a file to grab the data. Instead we're going to
+    # subprocess out to the 'oc get' command. On non-masters this
+    # command will fail, that is expected so we catch that exception.
+    ######################################################################
+    router_certs = []
+    registry_certs = []
+
+    ######################################################################
+    # First the router certs
+    try:
+        router_secrets_raw = subprocess.Popen('oc get secret router-certs -o yaml'.split(),
+                                              stdout=subprocess.PIPE)
+        router_ds = yaml.load(router_secrets_raw.communicate()[0])
+        router_c = router_ds['data']['tls.crt']
+        router_path = router_ds['metadata']['selfLink']
+    except TypeError:
+        # YAML couldn't load the result, this is not a master
+        pass
+    except OSError:
+        # The OC command doesn't exist here. Move along.
+        pass
+    else:
+        (cert_subject,
+         cert_expiry_date,
+         time_remaining) = load_and_handle_cert(router_c, now, base64decode=True)
+
+        expire_check_result = {
+            'cert_cn': cert_subject,
+            'path': router_path,
+            'expiry': cert_expiry_date,
+            'days_remaining': time_remaining.days,
+            'health': None,
+        }
+
+        classify_cert(expire_check_result, now, time_remaining, expire_window, router_certs)
+
+    ######################################################################
+    # Now for registry
+    try:
+        registry_secrets_raw = subprocess.Popen('oc get secret registry-certificates -o yaml'.split(),
+                                                stdout=subprocess.PIPE)
+        registry_ds = yaml.load(registry_secrets_raw.communicate()[0])
+        registry_c = registry_ds['data']['registry.crt']
+        registry_path = registry_ds['metadata']['selfLink']
+    except TypeError:
+        # YAML couldn't load the result, this is not a master
+        pass
+    except OSError:
+        # The OC command doesn't exist here. Move along.
+        pass
+    else:
+        (cert_subject,
+         cert_expiry_date,
+         time_remaining) = load_and_handle_cert(registry_c, now, base64decode=True)
+
+        expire_check_result = {
+            'cert_cn': cert_subject,
+            'path': registry_path,
+            'expiry': cert_expiry_date,
+            'days_remaining': time_remaining.days,
+            'health': None,
+        }
+
+        classify_cert(expire_check_result, now, time_remaining, expire_window, registry_certs)
+
+    ######################################################################
+    # /Check router/registry certs
+    ######################################################################
+
+    res = tabulate_summary(ocp_certs, kubeconfigs, etcd_certs, router_certs, registry_certs)
+
+    msg = "Checked {count} total certificates. Expired/Warning/OK: {exp}/{warn}/{ok}. Warning window: {window} days".format(
+        count=res['total'],
+        exp=res['expired'],
+        warn=res['warning'],
+        ok=res['ok'],
+        window=int(module.params['warning_days']),
+    )
+
+    # By default we only return detailed information about expired or
+    # warning certificates. If show_all is true then we will print all
+    # the certificates examined.
+    if not module.params['show_all']:
+        check_results['ocp_certs'] = [crt for crt in ocp_certs if crt['health'] in ['expired', 'warning']]
+        check_results['kubeconfigs'] = [crt for crt in kubeconfigs if crt['health'] in ['expired', 'warning']]
+        check_results['etcd'] = [crt for crt in etcd_certs if crt['health'] in ['expired', 'warning']]
+        check_results['registry'] = [crt for crt in registry_certs if crt['health'] in ['expired', 'warning']]
+        check_results['router'] = [crt for crt in router_certs if crt['health'] in ['expired', 'warning']]
+    else:
+        check_results['ocp_certs'] = ocp_certs
+        check_results['kubeconfigs'] = kubeconfigs
+        check_results['etcd'] = etcd_certs
+        check_results['registry'] = registry_certs
+        check_results['router'] = router_certs
+
+    # Sort the final results to report in order of ascending safety
+    # time. That is to say, the certificates which will expire sooner
+    # will be at the front of the list and certificates which will
+    # expire later are at the end. Router and registry certs should be
+    # limited to just 1 result, so don't bother sorting those.
+    check_results['ocp_certs'] = sorted(check_results['ocp_certs'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining']))
+    check_results['kubeconfigs'] = sorted(check_results['kubeconfigs'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining']))
+    check_results['etcd'] = sorted(check_results['etcd'], cmp=lambda x, y: cmp(x['days_remaining'], y['days_remaining']))
+
+    # This module will never change anything, but we might want to
+    # change the return code parameter if there is some catastrophic
+    # error we noticed earlier
+    module.exit_json(
+        check_results=check_results,
+        summary=res,
+        msg=msg,
+        rc=0,
+        changed=False
+    )
+
+######################################################################
+# It's just the way we do things in Ansible. So disable this warning
+#
+# pylint: disable=wrong-import-position,import-error
+from ansible.module_utils.basic import AnsibleModule
+if __name__ == '__main__':
+    main()

+ 16 - 0
roles/openshift_certificate_expiry/meta/main.yml

@@ -0,0 +1,16 @@
+---
+galaxy_info:
+  author: Tim Bielawa
+  description: OpenShift Certificate Expiry Checker
+  company: Red Hat, Inc.
+  license: Apache License, Version 2.0
+  min_ansible_version: 2.1
+  version: 1.0
+  platforms:
+  - name: EL
+    versions:
+    - 7
+  categories:
+  - cloud
+  - system
+dependencies: []

+ 30 - 0
roles/openshift_certificate_expiry/tasks/main.yml

@@ -0,0 +1,30 @@
+---
+- name: Check cert expirys on host
+  openshift_cert_expiry:
+    warning_days: "{{ openshift_certificate_expiry_warning_days|int }}"
+    config_base: "{{ openshift_certificate_expiry_config_base }}"
+    show_all: "{{ openshift_certificate_expiry_show_all|bool }}"
+  register: check_results
+
+- name: Generate expiration report HTML
+  become: no
+  run_once: yes
+  template:
+    src: cert-expiry-table.html.j2
+    dest: "{{ openshift_certificate_expiry_html_report_path }}"
+  delegate_to: localhost
+  when: "{{ openshift_certificate_expiry_generate_html_report|bool }}"
+
+- name: Generate the result JSON string
+  run_once: yes
+  set_fact: json_result_string="{{ hostvars|oo_cert_expiry_results_to_json(play_hosts) }}"
+  when: "{{ openshift_certificate_expiry_save_json_results|bool }}"
+
+- name: Generate results JSON file
+  become: no
+  run_once: yes
+  template:
+    src: save_json_results.j2
+    dest: "{{ openshift_certificate_expiry_json_results_path }}"
+  delegate_to: localhost
+  when: "{{ openshift_certificate_expiry_save_json_results|bool }}"

+ 124 - 0
roles/openshift_certificate_expiry/templates/cert-expiry-table.html.j2

@@ -0,0 +1,124 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="UTF-8" />
+    <title>OCP Certificate Expiry Report</title>
+    {# For fancy icons and a pleasing font #}
+    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" />
+    <link href="https://fonts.googleapis.com/css?family=Source+Sans+Pro:300,400,700" rel="stylesheet" />
+    <style type="text/css">
+      body {
+      font-family: 'Source Sans Pro', sans-serif;
+      margin-left: 50px;
+      margin-right: 50px;
+      margin-bottom: 20px;
+      padding-top: 70px;
+      }
+      table {
+      border-collapse: collapse;
+      margin-bottom: 20px;
+      }
+      table, th, td {
+      border: 1px solid black;
+      }
+      th, td {
+      padding: 5px;
+      }
+      .cert-kind {
+      margin-top: 5px;
+      margin-bottom: 5px;
+      }
+      footer {
+      font-size: small;
+      text-align: center;
+      }
+      tr.odd {
+      background-color: #f2f2f2;
+      }
+    </style>
+  </head>
+  <body>
+    <nav class="navbar navbar-default navbar-fixed-top">
+      <div class="container-fluid">
+        <div class="navbar-header">
+          <a class="navbar-brand" href="#">OCP Certificate Expiry Report</a>
+        </div>
+        <div class="collapse navbar-collapse">
+          <p class="navbar-text navbar-right">
+	    <a href="https://docs.openshift.com/container-platform/latest/install_config/redeploying_certificates.html"
+	       target="_blank"
+	       class="navbar-link">
+	       <i class="glyphicon glyphicon-book"></i> Redeploying Certificates
+	    </a>
+	  </p>
+        </div>
+      </div>
+    </nav>
+
+    {# Each host has a header and table to itself #}
+    {% for host in play_hosts %}
+      <h1>{{ host }}</h1>
+
+      <p>
+        {{ hostvars[host].check_results.msg }}
+      </p>
+      <ul>
+        <li><b>Expirations checked at:</b> {{ hostvars[host].check_results.check_results.meta.checked_at_time }}</li>
+        <li><b>Warn after date:</b> {{ hostvars[host].check_results.check_results.meta.warn_before_date }}</li>
+      </ul>
+
+      <table border="1" width="100%">
+        {# These are hard-coded right now, but should be grabbed dynamically from the registered results #}
+        {%- for kind in ['ocp_certs', 'etcd', 'kubeconfigs', 'router', 'registry'] -%}
+          <tr>
+            <th colspan="6" style="text-align:center"><h2 class="cert-kind">{{ kind }}</h2></th>
+          </tr>
+
+          <tr>
+            <th>&nbsp;</th>
+            <th style="width:33%">Certificate Common/Alt Name(s)</th>
+            <th>Health</th>
+            <th>Days Remaining</th>
+            <th>Expiration Date</th>
+            <th>Path</th>
+          </tr>
+
+          {# A row for each certificate examined #}
+          {%- for v in hostvars[host].check_results.check_results[kind] -%}
+
+            {# Let's add some flair and show status visually with fancy icons #}
+            {% if v.health == 'ok' %}
+              {% set health_icon = 'glyphicon glyphicon-ok' %}
+            {% elif v.health == 'warning' %}
+              {% set health_icon = 'glyphicon glyphicon-alert' %}
+            {% else %}
+              {% set health_icon = 'glyphicon glyphicon-remove' %}
+            {% endif %}
+
+            <tr class="{{ loop.cycle('odd', 'even') }}">
+              <td style="text-align:center"><i class="{{ health_icon }}"></i></td>
+              <td style="width:33%">{{ v.cert_cn }}</td>
+              <td>{{ v.health }}</td>
+              <td>{{ v.days_remaining }}</td>
+              <td>{{ v.expiry }}</td>
+              <td>{{ v.path }}</td>
+            </tr>
+          {% endfor %}
+          {# end row generation per cert of this type #}
+        {% endfor %}
+        {# end generation for each kind of cert block #}
+      </table>
+      <hr />
+    {% endfor %}
+    {# end section generation for each host #}
+
+    <footer>
+      <p>
+        Expiration report generated by <a href="https://github.com/openshift/openshift-ansible" target="_blank">openshift-ansible</a>
+      </p>
+      <p>
+        Status icons from bootstrap/glyphicon
+      </p>
+    </footer>
+  </body>
+</html>

+ 1 - 0
roles/openshift_certificate_expiry/templates/save_json_results.j2

@@ -0,0 +1 @@
+{{ json_result_string | to_nice_json(indent=2)}}

+ 9 - 3
utils/Makefile

@@ -31,6 +31,8 @@ ASCII2MAN = a2x -D $(dir $@) -d manpage -f manpage $<
 MANPAGES := docs/man/man1/atomic-openshift-installer.1
 VERSION := 1.3
 
+PEPEXCLUDES := E501,E121,E124
+
 sdist: clean
 	python setup.py sdist
 	rm -fR $(SHORTNAME).egg-info
@@ -80,7 +82,7 @@ ci-pylint:
 	@echo "#############################################"
 	@echo "# Running PyLint Tests in virtualenv"
 	@echo "#############################################"
-	. $(NAME)env/bin/activate && python -m pylint --rcfile ../git/.pylintrc src/ooinstall/cli_installer.py src/ooinstall/oo_config.py src/ooinstall/openshift_ansible.py src/ooinstall/variants.py ../callback_plugins/openshift_quick_installer.py
+	. $(NAME)env/bin/activate && python -m pylint --rcfile ../git/.pylintrc src/ooinstall/cli_installer.py src/ooinstall/oo_config.py src/ooinstall/openshift_ansible.py src/ooinstall/variants.py ../callback_plugins/openshift_quick_installer.py ../roles/openshift_certificate_expiry/library/openshift_cert_expiry.py
 
 ci-list-deps:
 	@echo "#############################################"
@@ -94,13 +96,17 @@ ci-pyflakes:
 	@echo "#################################################"
 	. $(NAME)env/bin/activate && pyflakes src/ooinstall/*.py
 	. $(NAME)env/bin/activate && pyflakes ../callback_plugins/openshift_quick_installer.py
+	. $(NAME)env/bin/activate && pyflakes ../roles/openshift_certificate_expiry/library/openshift_cert_expiry.py
 
 ci-pep8:
 	@echo "#############################################"
 	@echo "# Running PEP8 Compliance Tests in virtualenv"
 	@echo "#############################################"
-	. $(NAME)env/bin/activate && pep8 --ignore=E501,E121,E124 src/$(SHORTNAME)/
-	. $(NAME)env/bin/activate && pep8 --ignore=E501,E121,E124 ../callback_plugins/openshift_quick_installer.py
+	. $(NAME)env/bin/activate && pep8 --ignore=$(PEPEXCLUDES) src/$(SHORTNAME)/
+	. $(NAME)env/bin/activate && pep8 --ignore=$(PEPEXCLUDES) ../callback_plugins/openshift_quick_installer.py
+# This one excludes E402 because it is an ansible module and the
+# boilerplate import statement is expected to be at the bottom
+	. $(NAME)env/bin/activate && pep8 --ignore=$(PEPEXCLUDES),E402 ../roles/openshift_certificate_expiry/library/openshift_cert_expiry.py
 
 ci: clean virtualenv ci-list-deps ci-pep8 ci-pylint ci-pyflakes ci-unittests
 	:

+ 1 - 0
utils/test-requirements.txt

@@ -9,3 +9,4 @@ flake8
 PyYAML
 click
 backports.functools_lru_cache
+pyOpenSSL