Browse Source

Merge pull request #3234 from rhcarvalho/check-module

Replace multi-role checks with action-plugin-based checks
Scott Dodson 8 years ago
parent
commit
db243097da

+ 10 - 29
playbooks/byo/openshift-preflight/check.yml

@@ -1,31 +1,12 @@
 ---
 - hosts: OSEv3
-  roles:
-    - openshift_preflight/init
-
-- hosts: OSEv3
-  name: checks that apply to all hosts
-  gather_facts: no
-  ignore_errors: yes
-  roles:
-    - openshift_preflight/common
-
-- hosts: masters
-  name: checks that apply to masters
-  gather_facts: no
-  ignore_errors: yes
-  roles:
-    - openshift_preflight/masters
-
-- hosts: nodes
-  name: checks that apply to nodes
-  gather_facts: no
-  ignore_errors: yes
-  roles:
-    - openshift_preflight/nodes
-
-- hosts: OSEv3
-  name: verify check results
-  gather_facts: no
-  roles:
-    - openshift_preflight/verify_status
+  name: run OpenShift health checks
+  roles:
+    - openshift_health_checker
+  post_tasks:
+    # NOTE: we need to use the old "action: name" syntax until
+    # https://github.com/ansible/ansible/issues/20513 is fixed.
+    - action: openshift_health_check
+      args:
+        checks:
+          - '@preflight'

+ 34 - 0
roles/openshift_health_checker/HOWTO_CHECKS.md

@@ -0,0 +1,34 @@
+# OpenShift health checks
+
+This Ansible role contains health checks to diagnose problems in OpenShift
+environments.
+
+Checks are typically implemented as two parts:
+
+1. a Python module in [openshift_checks/](openshift_checks), with a class that
+   inherits from `OpenShiftCheck`.
+2. a custom Ansible module in [library/](library), for cases when the modules
+   shipped with Ansible do not provide the required functionality.
+
+The checks are called from an Ansible playbooks via the `openshift_health_check`
+action plugin. See
+[playbooks/byo/openshift-preflight/check.yml](../../playbooks/byo/openshift-preflight/check.yml)
+for an example.
+
+The action plugin dynamically discovers all checks and executes only those
+selected in the play.
+
+Checks can determine when they are active by implementing the method
+`is_active`. Inactive checks are skipped. This is similar to the `when`
+instruction in Ansible plays.
+
+Checks may have tags, which are a way to group related checks together. For
+instance, to run all preflight checks, pass in the group `'@preflight'` to
+`openshift_health_check`.
+
+Groups are automatically computed from tags.
+
+Groups and individual check names can be used together in the argument list to
+`openshift_health_check`.
+
+Look at existing checks for the implementation details.

+ 45 - 0
roles/openshift_health_checker/README.md

@@ -0,0 +1,45 @@
+OpenShift Health Checker
+========================
+
+This role detects common problems with OpenShift installations or with
+environments prior to install.
+
+For more information about creating new checks, see [HOWTO_CHECKS.md](HOWTO_CHECKS.md).
+
+Requirements
+------------
+
+* Ansible 2.2+
+
+Role Variables
+--------------
+
+None
+
+Dependencies
+------------
+
+- openshift_facts
+
+Example Playbook
+----------------
+
+```yaml
+---
+- hosts: OSEv3
+  name: run OpenShift health checks
+  roles:
+    - openshift_health_checker
+  post_tasks:
+    - action: openshift_health_check
+```
+
+License
+-------
+
+Apache License Version 2.0
+
+Author Information
+------------------
+
+Customer Success team (dev@lists.openshift.redhat.com)

+ 116 - 0
roles/openshift_health_checker/action_plugins/openshift_health_check.py

@@ -0,0 +1,116 @@
+"""
+Ansible action plugin to execute health checks in OpenShift clusters.
+"""
+# pylint: disable=wrong-import-position,missing-docstring,invalid-name
+import sys
+import os
+
+try:
+    from __main__ import display
+except ImportError:
+    from ansible.utils.display import Display
+    display = Display()
+
+from ansible.plugins.action import ActionBase
+
+# Augment sys.path so that we can import checks from a directory relative to
+# this callback plugin.
+sys.path.insert(1, os.path.dirname(os.path.dirname(__file__)))
+
+from openshift_checks import OpenShiftCheck, OpenShiftCheckException  # noqa: E402
+
+
+class ActionModule(ActionBase):
+
+    def run(self, tmp=None, task_vars=None):
+        result = super(ActionModule, self).run(tmp, task_vars)
+
+        if task_vars is None:
+            task_vars = {}
+
+        if "openshift" not in task_vars:
+            result["failed"] = True
+            result["msg"] = "'openshift' is undefined, did 'openshift_facts' run?"
+            return result
+
+        try:
+            known_checks = self.load_known_checks()
+        except OpenShiftCheckException as e:
+            result["failed"] = True
+            result["msg"] = str(e)
+            return result
+
+        args = self._task.args
+        requested_checks = resolve_checks(args.get("checks", []), known_checks.values())
+
+        unknown_checks = requested_checks - set(known_checks)
+        if unknown_checks:
+            result["failed"] = True
+            result["msg"] = (
+                "One or more checks are unknown: {}. "
+                "Make sure there is no typo in the playbook and no files are missing."
+            ).format(", ".join(unknown_checks))
+            return result
+
+        result["checks"] = check_results = {}
+
+        for check_name in requested_checks & set(known_checks):
+            display.banner("CHECK [{} : {}]".format(check_name, task_vars["ansible_host"]))
+            check = known_checks[check_name]
+
+            if check.is_active(task_vars):
+                try:
+                    r = check.run(tmp, task_vars)
+                except OpenShiftCheckException as e:
+                    r = {}
+                    r["failed"] = True
+                    r["msg"] = str(e)
+            else:
+                r = {"skipped": True}
+
+            check_results[check_name] = r
+
+            if r.get("failed", False):
+                result["failed"] = True
+                result["msg"] = "One or more checks failed"
+
+        return result
+
+    def load_known_checks(self):
+        known_checks = {}
+
+        known_check_classes = set(cls for cls in OpenShiftCheck.subclasses())
+
+        for cls in known_check_classes:
+            check_name = cls.name
+            if check_name in known_checks:
+                other_cls = known_checks[check_name].__class__
+                raise OpenShiftCheckException(
+                    "non-unique check name '{}' in: '{}.{}' and '{}.{}'".format(
+                        check_name,
+                        cls.__module__, cls.__name__,
+                        other_cls.__module__, other_cls.__name__))
+            known_checks[check_name] = cls(module_executor=self._execute_module)
+
+        return known_checks
+
+
+def resolve_checks(names, all_checks):
+    """Returns a set of resolved check names.
+
+    Resolving a check name involves expanding tag references (e.g., '@tag') with
+    all the checks that contain the given tag.
+
+    names should be a sequence of strings.
+
+    all_checks should be a sequence of check classes/instances.
+    """
+    resolved = set()
+    for name in names:
+        if name.startswith("@"):
+            for check in all_checks:
+                if name[1:] in check.tags:
+                    resolved.add(check.name)
+        else:
+            resolved.add(name)
+    return resolved

+ 4 - 0
roles/openshift_preflight/verify_status/callback_plugins/zz_failure_summary.py

@@ -3,6 +3,8 @@
 Ansible callback plugin.
 '''
 
+from pprint import pformat
+
 from ansible.plugins.callback import CallbackBase
 from ansible import constants as C
 from ansible.utils.color import stringc
@@ -79,6 +81,8 @@ def _format_failure(failure):
         (u'Task', task),
         (u'Message', stringc(msg, C.COLOR_ERROR)),
     )
+    if 'checks' in result._result:
+        rows += ((u'Details', stringc(pformat(result._result['checks']), C.COLOR_ERROR)),)
     row_format = '{:10}{}'
     return [row_format.format(header + u':', body) for header, body in rows]
 

+ 18 - 27
roles/openshift_preflight/base/library/aos_version.py

@@ -1,57 +1,49 @@
 #!/usr/bin/python
 # vim: expandtab:tabstop=4:shiftwidth=4
 '''
-An ansible module for determining if more than one minor version
-of any atomic-openshift package is available, which would indicate
-that multiple repos are enabled for different versions of the same
-thing which may cause problems.
+Ansible module for determining if multiple versions of an OpenShift package are
+available, and if the version requested is available down to the given
+precision.
 
-Also, determine if the version requested is available down to the
-precision requested.
+Multiple versions available suggest that multiple repos are enabled for the
+different versions, which may cause installation problems.
 '''
 
-# import os
-# import sys
 import yum  # pylint: disable=import-error
+
 from ansible.module_utils.basic import AnsibleModule
 
 
-def main():  # pylint: disable=missing-docstring
+def main():  # pylint: disable=missing-docstring,too-many-branches
     module = AnsibleModule(
         argument_spec=dict(
-            version=dict(required=True)
+            prefix=dict(required=True),  # atomic-openshift, origin, ...
+            version=dict(required=True),
         ),
         supports_check_mode=True
     )
 
-    # NOTE(rhcarvalho): sosiouxme added _unmute, but I couldn't find a case yet
-    # for when it is actually necessary. Leaving it commented out for now,
-    # though this comment and the commented out code related to _unmute should
-    # be deleted later if not proven necessary.
-
-    # sys.stdout = os.devnull  # mute yum so it doesn't break our output
-    # sys.stderr = os.devnull  # mute yum so it doesn't break our output
-
-    # def _unmute():  # pylint: disable=missing-docstring
-    #     sys.stdout = sys.__stdout__
-
     def bail(error):  # pylint: disable=missing-docstring
-        # _unmute()
         module.fail_json(msg=error)
 
+    rpm_prefix = module.params['prefix']
+
+    if not rpm_prefix:
+        bail("prefix must not be empty")
+
     yb = yum.YumBase()  # pylint: disable=invalid-name
 
     # search for package versions available for aos pkgs
     expected_pkgs = [
-        'atomic-openshift',
-        'atomic-openshift-master',
-        'atomic-openshift-node',
+        rpm_prefix,
+        rpm_prefix + '-master',
+        rpm_prefix + '-node',
     ]
     try:
         pkgs = yb.pkgSack.returnPackages(patterns=expected_pkgs)
     except yum.Errors.PackageSackError as e:  # pylint: disable=invalid-name
         # you only hit this if *none* of the packages are available
-        bail('Unable to find any atomic-openshift packages. \nCheck your subscription and repo settings. \n%s' % e)
+        bail('Unable to find any OpenShift packages.\nCheck your subscription and repo settings.\n%s' % e)
 
     # determine what level of precision we're expecting for the version
     expected_version = module.params['version']
@@ -92,7 +84,6 @@ def main():  # pylint: disable=missing-docstring
             msg += '  %s\n' % name
         bail(msg + "There should only be one OpenShift version's repository enabled at a time.")
 
-    # _unmute()
     module.exit_json(changed=False)
 
 

+ 2 - 13
roles/openshift_preflight/base/library/check_yum_update.py

@@ -8,9 +8,10 @@ parameters:
             If omitted, all installed RPMs are considered for updates.
 '''
 
-# import os
 import sys
+
 import yum  # pylint: disable=import-error
+
 from ansible.module_utils.basic import AnsibleModule
 
 
@@ -22,18 +23,7 @@ def main():  # pylint: disable=missing-docstring,too-many-branches
         supports_check_mode=True
     )
 
-    # NOTE(rhcarvalho): sosiouxme added _unmute, but I couldn't find a case yet
-    # for when it is actually necessary. Leaving it commented out for now,
-    # though this comment and the commented out code related to _unmute should
-    # be deleted later if not proven necessary.
-
-    # sys.stdout = os.devnull  # mute yum so it doesn't break our output
-
-    # def _unmute():  # pylint: disable=missing-docstring
-    #     sys.stdout = sys.__stdout__
-
     def bail(error):  # pylint: disable=missing-docstring
-        # _unmute()
         module.fail_json(msg=error)
 
     yb = yum.YumBase()  # pylint: disable=invalid-name
@@ -108,7 +98,6 @@ def main():  # pylint: disable=missing-docstring,too-many-branches
         bail('Unknown error(s) from dependency resolution. Exit Code: %d:\n%s' %
              (txn_result, txn_msgs))
 
-    # _unmute()
     module.exit_json(changed=False)
 
 

roles/openshift_preflight/init/meta/main.yml → roles/openshift_health_checker/meta/main.yml


+ 84 - 0
roles/openshift_health_checker/openshift_checks/__init__.py

@@ -0,0 +1,84 @@
+"""
+Health checks for OpenShift clusters.
+"""
+
+import os
+from abc import ABCMeta, abstractmethod, abstractproperty
+from importlib import import_module
+import operator
+
+import six
+from six.moves import reduce
+
+
+class OpenShiftCheckException(Exception):
+    """Raised when a check cannot proceed."""
+    pass
+
+
+@six.add_metaclass(ABCMeta)
+class OpenShiftCheck(object):
+    """A base class for defining checks for an OpenShift cluster environment."""
+
+    def __init__(self, module_executor):
+        self.module_executor = module_executor
+
+    @abstractproperty
+    def name(self):
+        """The name of this check, usually derived from the class name."""
+        return "openshift_check"
+
+    @property
+    def tags(self):
+        """A list of tags that this check satisfy.
+
+        Tags are used to reference multiple checks with a single '@tagname'
+        special check name.
+        """
+        return []
+
+    @classmethod
+    def is_active(cls, task_vars):  # pylint: disable=unused-argument
+        """Returns true if this check applies to the ansible-playbook run."""
+        return True
+
+    @abstractmethod
+    def run(self, tmp, task_vars):
+        """Executes a check, normally implemented as a module."""
+        return {}
+
+    @classmethod
+    def subclasses(cls):
+        """Returns a generator of subclasses of this class and its subclasses."""
+        for subclass in cls.__subclasses__():  # pylint: disable=no-member
+            yield subclass
+            for subclass in subclass.subclasses():
+                yield subclass
+
+
+def get_var(task_vars, *keys, **kwargs):
+    """Helper function to get deeply nested values from task_vars.
+
+    Ansible task_vars structures are Python dicts, often mapping strings to
+    other dicts. This helper makes it easier to get a nested value, raising
+    OpenShiftCheckException when a key is not found.
+    """
+    try:
+        value = reduce(operator.getitem, keys, task_vars)
+    except (KeyError, TypeError):
+        if "default" in kwargs:
+            return kwargs["default"]
+        raise OpenShiftCheckException("'{}' is undefined".format(".".join(map(str, keys))))
+    return value
+
+
+# Dynamically import all submodules for the side effect of loading checks.
+
+EXCLUDES = (
+    "__init__.py",
+    "mixins.py",
+)
+
+for name in os.listdir(os.path.dirname(__file__)):
+    if name.endswith(".py") and name not in EXCLUDES:
+        import_module(__package__ + "." + name[:-3])

+ 21 - 0
roles/openshift_health_checker/openshift_checks/mixins.py

@@ -0,0 +1,21 @@
+# pylint: disable=missing-docstring
+from openshift_checks import get_var
+
+
+class NotContainerized(object):
+    """Mixin for checks that are only active when not in containerized mode."""
+
+    @classmethod
+    def is_active(cls, task_vars):
+        return (
+            # This mixin is meant to be used with subclasses of
+            # OpenShiftCheck. Pylint disables this by default on mixins,
+            # though it relies on the class name ending in 'mixin'.
+            # pylint: disable=no-member
+            super(NotContainerized, cls).is_active(task_vars) and
+            not cls.is_containerized(task_vars)
+        )
+
+    @staticmethod
+    def is_containerized(task_vars):
+        return get_var(task_vars, "openshift", "common", "is_containerized")

+ 66 - 0
roles/openshift_health_checker/openshift_checks/package_availability.py

@@ -0,0 +1,66 @@
+# pylint: disable=missing-docstring
+from openshift_checks import OpenShiftCheck, get_var
+from openshift_checks.mixins import NotContainerized
+
+
+class PackageAvailability(NotContainerized, OpenShiftCheck):
+    """Check that required RPM packages are available."""
+
+    name = "package_availability"
+    tags = ["preflight"]
+
+    def run(self, tmp, task_vars):
+        rpm_prefix = get_var(task_vars, "openshift", "common", "service_type")
+        group_names = get_var(task_vars, "group_names", default=[])
+
+        packages = set()
+
+        if "masters" in group_names:
+            packages.update(self.master_packages(rpm_prefix))
+        if "nodes" in group_names:
+            packages.update(self.node_packages(rpm_prefix))
+
+        args = {"packages": sorted(set(packages))}
+        return self.module_executor("check_yum_update", args, tmp, task_vars)
+
+    @staticmethod
+    def master_packages(rpm_prefix):
+        return [
+            "{rpm_prefix}".format(rpm_prefix=rpm_prefix),
+            "{rpm_prefix}-clients".format(rpm_prefix=rpm_prefix),
+            "{rpm_prefix}-master".format(rpm_prefix=rpm_prefix),
+            "bash-completion",
+            "cockpit-bridge",
+            "cockpit-docker",
+            "cockpit-kubernetes",
+            "cockpit-shell",
+            "cockpit-ws",
+            "etcd",
+            "httpd-tools",
+        ]
+
+    @staticmethod
+    def node_packages(rpm_prefix):
+        return [
+            "{rpm_prefix}".format(rpm_prefix=rpm_prefix),
+            "{rpm_prefix}-node".format(rpm_prefix=rpm_prefix),
+            "{rpm_prefix}-sdn-ovs".format(rpm_prefix=rpm_prefix),
+            "bind",
+            "ceph-common",
+            "dnsmasq",
+            "docker",
+            "firewalld",
+            "flannel",
+            "glusterfs-fuse",
+            "iptables-services",
+            "iptables",
+            "iscsi-initiator-utils",
+            "libselinux-python",
+            "nfs-utils",
+            "ntp",
+            "openssl",
+            "pyparted",
+            "python-httplib2",
+            "PyYAML",
+            "yum-utils",
+        ]

+ 14 - 0
roles/openshift_health_checker/openshift_checks/package_update.py

@@ -0,0 +1,14 @@
+# pylint: disable=missing-docstring
+from openshift_checks import OpenShiftCheck
+from openshift_checks.mixins import NotContainerized
+
+
+class PackageUpdate(NotContainerized, OpenShiftCheck):
+    """Check that there are no conflicts in RPM packages."""
+
+    name = "package_update"
+    tags = ["preflight"]
+
+    def run(self, tmp, task_vars):
+        args = {"packages": []}
+        return self.module_executor("check_yum_update", args, tmp, task_vars)

+ 20 - 0
roles/openshift_health_checker/openshift_checks/package_version.py

@@ -0,0 +1,20 @@
+# pylint: disable=missing-docstring
+from openshift_checks import OpenShiftCheck, get_var
+from openshift_checks.mixins import NotContainerized
+
+
+class PackageVersion(NotContainerized, OpenShiftCheck):
+    """Check that available RPM packages match the required versions."""
+
+    name = "package_version"
+    tags = ["preflight"]
+
+    def run(self, tmp, task_vars):
+        rpm_prefix = get_var(task_vars, "openshift", "common", "service_type")
+        openshift_release = get_var(task_vars, "openshift_release")
+
+        args = {
+            "prefix": rpm_prefix,
+            "version": openshift_release,
+        }
+        return self.module_executor("aos_version", args, tmp, task_vars)

+ 0 - 52
roles/openshift_preflight/README.md

@@ -1,52 +0,0 @@
-OpenShift Preflight Checks
-==========================
-
-This role detects common problems prior to installing OpenShift.
-
-Requirements
-------------
-
-* Ansible 2.2+
-
-Role Variables
---------------
-
-None
-
-Dependencies
-------------
-
-None
-
-Example Playbook
-----------------
-
-```yaml
----
-- hosts: OSEv3
-  roles:
-    - openshift_preflight/init
-
-- hosts: OSEv3
-  name: checks that apply to all hosts
-  gather_facts: no
-  ignore_errors: yes
-  roles:
-    - openshift_preflight/common
-
-- hosts: OSEv3
-  name: verify check results
-  gather_facts: no
-  roles:
-    - openshift_preflight/verify_status
-```
-
-License
--------
-
-Apache License Version 2.0
-
-Author Information
-------------------
-
-Customer Success team (dev@lists.openshift.redhat.com)

+ 0 - 3
roles/openshift_preflight/common/meta/main.yml

@@ -1,3 +0,0 @@
----
-dependencies:
-  - role: openshift_preflight/base

+ 0 - 21
roles/openshift_preflight/common/tasks/main.yml

@@ -1,21 +0,0 @@
----
-# check content available on all hosts
-- when: not openshift.common.is_containerized | bool
-  block:
-
-    - name: determine if yum update will work
-      action: check_yum_update
-      register: r
-
-    - set_fact:
-        oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'determine if yum update will work'})] }}"
-
-    - name: determine if expected version matches what is available
-      aos_version:
-        version: "{{ openshift_release }}"
-      when:
-        - deployment_type == "openshift-enterprise"
-      register: r
-
-    - set_fact:
-        oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'determine if expected version matches what is available'})] }}"

+ 0 - 4
roles/openshift_preflight/init/tasks/main.yml

@@ -1,4 +0,0 @@
----
-- name: set common variables
-  set_fact:
-    oo_preflight_check_results: "{{ oo_preflight_check_results | default([]) }}"

+ 0 - 3
roles/openshift_preflight/masters/meta/main.yml

@@ -1,3 +0,0 @@
----
-dependencies:
-  - role: openshift_preflight/base

+ 0 - 31
roles/openshift_preflight/masters/tasks/main.yml

@@ -1,31 +0,0 @@
----
-# determine if yum install of master pkgs will work
-- when: not openshift.common.is_containerized | bool
-  block:
-
-    - name: main master packages availability
-      check_yum_update:
-        packages:
-          - "{{ openshift.common.service_type }}"
-          - "{{ openshift.common.service_type }}-clients"
-          - "{{ openshift.common.service_type }}-master"
-      register: r
-
-    - set_fact:
-        oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'main master packages availability'})] }}"
-
-    - name: other master packages availability
-      check_yum_update:
-        packages:
-          - etcd
-          - bash-completion
-          - cockpit-bridge
-          - cockpit-docker
-          - cockpit-kubernetes
-          - cockpit-shell
-          - cockpit-ws
-          - httpd-tools
-      register: r
-
-    - set_fact:
-        oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'other master packages availability'})] }}"

+ 0 - 3
roles/openshift_preflight/nodes/meta/main.yml

@@ -1,3 +0,0 @@
----
-dependencies:
-  - role: openshift_preflight/base

+ 0 - 41
roles/openshift_preflight/nodes/tasks/main.yml

@@ -1,41 +0,0 @@
----
-# determine if yum install of node pkgs will work
-- when: not openshift.common.is_containerized | bool
-  block:
-
-    - name: main node packages availability
-      check_yum_update:
-        packages:
-          - "{{ openshift.common.service_type }}"
-          - "{{ openshift.common.service_type }}-node"
-          - "{{ openshift.common.service_type }}-sdn-ovs"
-      register: r
-
-    - set_fact:
-        oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'main node packages availability'})] }}"
-
-    - name: other node packages availability
-      check_yum_update:
-        packages:
-          - docker
-          - PyYAML
-          - firewalld
-          - iptables
-          - iptables-services
-          - nfs-utils
-          - ntp
-          - yum-utils
-          - dnsmasq
-          - libselinux-python
-          - ceph-common
-          - glusterfs-fuse
-          - iscsi-initiator-utils
-          - pyparted
-          - python-httplib2
-          - openssl
-          - flannel
-          - bind
-      register: r
-
-    - set_fact:
-        oo_preflight_check_results: "{{ oo_preflight_check_results + [r|combine({'_task': 'other node packages availability'})] }}"

+ 0 - 8
roles/openshift_preflight/verify_status/tasks/main.yml

@@ -1,8 +0,0 @@
----
-- name: find check failures
-  set_fact:
-    oo_preflight_check_failures: "{{ oo_preflight_check_results | select('failed', 'equalto', True) | list }}"
-
-- name: ensure all checks succeed
-  action: fail
-  when: oo_preflight_check_failures