Browse Source

List existing health checks when none is requested

This is a simple mechanism to learn what health checks are available.

Note that we defer task_vars verification, so that we can compute
requested_checks and resolved_checks earlier, allowing us to list checks
even if openshift_facts has not run.
Rodolfo Carvalho 7 years ago
parent
commit
25276bda8c

+ 9 - 1
playbooks/byo/openshift-checks/README.md

@@ -26,7 +26,8 @@ callback plugin summarizes execution errors at the end of a playbook run.
 3. Certificate expiry playbooks ([certificate_expiry](certificate_expiry)) -
    check that certificates in use are valid and not expiring soon.
 
-4. Adhoc playbook ([adhoc.yml](adhoc.yml)) - use it to run adhoc checks.
+4. Adhoc playbook ([adhoc.yml](adhoc.yml)) - use it to run adhoc checks or to
+   list existing checks.
    See the [next section](#the-adhoc-playbook) for a usage example.
 
 ## Running
@@ -89,6 +90,13 @@ $ ansible-playbook -i <inventory file> playbooks/byo/openshift-checks/adhoc.yml
 It is valid to specify multiple check tags and individual check names together
 in a comma-separated list.
 
+To list all of the available checks and tags, run the adhoc playbook without
+setting the `openshift_checks` variable:
+
+```console
+$ ansible-playbook -i <inventory file> playbooks/byo/openshift-checks/adhoc.yml
+```
+
 ## Running in a container
 
 This repository is built into a Docker image including Ansible so that it can

+ 21 - 0
playbooks/byo/openshift-checks/adhoc.yml

@@ -1,4 +1,25 @@
 ---
+# NOTE: ideally this would be just part of a single play in
+# common/openshift-checks/adhoc.yml that lists the existing checks when
+# openshift_checks is not set or run the requested checks. However, to actually
+# run the checks we need to have the included dependencies to run first and that
+# takes time. To speed up listing checks, we use this separate play that runs
+# before the include of dependencies to save time and improve the UX.
+- name: OpenShift health checks
+  # NOTE: though the openshift_checks variable could be potentially defined on
+  # individual hosts while not defined for localhost, we do not support that
+  # usage. Running this play only in localhost speeds up execution.
+  hosts: localhost
+  connection: local
+  roles:
+  - openshift_health_checker
+  vars:
+  - r_openshift_health_checker_playbook_context: adhoc
+  pre_tasks:
+  - name: List known health checks
+    action: openshift_health_check
+    when: openshift_checks is undefined or not openshift_checks
+
 - include: ../openshift-cluster/initialize_groups.yml
 
 - include: ../../common/openshift-cluster/std_include.yml

+ 41 - 7
roles/openshift_health_checker/action_plugins/openshift_health_check.py

@@ -28,25 +28,32 @@ class ActionModule(ActionBase):
         result = super(ActionModule, self).run(tmp, task_vars)
         task_vars = task_vars or {}
 
-        # vars are not supportably available in the callback plugin,
-        # so record any it will need in the result.
+        # callback plugins cannot read Ansible vars, but we would like
+        # zz_failure_summary to have access to certain values. We do so by
+        # storing the information we need in the result.
         result['playbook_context'] = task_vars.get('r_openshift_health_checker_playbook_context')
 
-        if "openshift" not in task_vars:
-            result["failed"] = True
-            result["msg"] = "'openshift' is undefined, did 'openshift_facts' run?"
-            return result
-
         try:
             known_checks = self.load_known_checks(tmp, task_vars)
             args = self._task.args
             requested_checks = normalize(args.get('checks', []))
+
+            if not requested_checks:
+                result['failed'] = True
+                result['msg'] = list_known_checks(known_checks)
+                return result
+
             resolved_checks = resolve_checks(requested_checks, known_checks.values())
         except OpenShiftCheckException as e:
             result["failed"] = True
             result["msg"] = str(e)
             return result
 
+        if "openshift" not in task_vars:
+            result["failed"] = True
+            result["msg"] = "'openshift' is undefined, did 'openshift_facts' run?"
+            return result
+
         result["checks"] = check_results = {}
 
         user_disabled_checks = normalize(task_vars.get('openshift_disable_check', []))
@@ -96,6 +103,33 @@ class ActionModule(ActionBase):
         return known_checks
 
 
+def list_known_checks(known_checks):
+    """Return text listing the existing checks and tags."""
+    # TODO: we could include a description of each check by taking it from a
+    # check class attribute (e.g., __doc__) when building the message below.
+    msg = (
+        'This playbook is meant to run health checks, but no checks were '
+        'requested. Set the `openshift_checks` variable to a comma-separated '
+        'list of check names or a YAML list. Available checks:\n  {}'
+    ).format('\n  '.join(sorted(known_checks)))
+
+    tag_checks = defaultdict(list)
+    for cls in known_checks.values():
+        for tag in cls.tags:
+            tag_checks[tag].append(cls.name)
+    tags = [
+        '@{} = {}'.format(tag, ','.join(sorted(checks)))
+        for tag, checks in tag_checks.items()
+    ]
+
+    msg += (
+        '\n\nTags can be used as a shortcut to select multiple '
+        'checks. Available tags and the checks they select:\n  {}'
+    ).format('\n  '.join(sorted(tags)))
+
+    return msg
+
+
 def resolve_checks(names, all_checks):
     """Returns a set of resolved check names.
 

+ 2 - 1
roles/openshift_health_checker/test/action_plugin_test.py

@@ -80,7 +80,8 @@ def skipped(result):
     None,
     {},
 ])
-def test_action_plugin_missing_openshift_facts(plugin, task_vars):
+def test_action_plugin_missing_openshift_facts(plugin, task_vars, monkeypatch):
+    monkeypatch.setattr('openshift_health_check.resolve_checks', lambda *args: ['fake_check'])
     result = plugin.run(tmp=None, task_vars=task_vars)
 
     assert failed(result, msg_has=['openshift_facts'])