openshift_health_check.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. """
  2. Ansible action plugin to execute health checks in OpenShift clusters.
  3. """
  4. # pylint: disable=wrong-import-position,missing-docstring,invalid-name
  5. import sys
  6. import os
  7. import traceback
  8. from collections import defaultdict
  9. try:
  10. from __main__ import display
  11. except ImportError:
  12. from ansible.utils.display import Display
  13. display = Display()
  14. from ansible.plugins.action import ActionBase
  15. from ansible.module_utils.six import string_types
  16. # Augment sys.path so that we can import checks from a directory relative to
  17. # this callback plugin.
  18. sys.path.insert(1, os.path.dirname(os.path.dirname(__file__)))
  19. from openshift_checks import OpenShiftCheck, OpenShiftCheckException, load_checks # noqa: E402
  20. class ActionModule(ActionBase):
  21. def run(self, tmp=None, task_vars=None):
  22. result = super(ActionModule, self).run(tmp, task_vars)
  23. task_vars = task_vars or {}
  24. # callback plugins cannot read Ansible vars, but we would like
  25. # zz_failure_summary to have access to certain values. We do so by
  26. # storing the information we need in the result.
  27. result['playbook_context'] = task_vars.get('r_openshift_health_checker_playbook_context')
  28. try:
  29. known_checks = self.load_known_checks(tmp, task_vars)
  30. args = self._task.args
  31. requested_checks = normalize(args.get('checks', []))
  32. if not requested_checks:
  33. result['failed'] = True
  34. result['msg'] = list_known_checks(known_checks)
  35. return result
  36. resolved_checks = resolve_checks(requested_checks, known_checks.values())
  37. except OpenShiftCheckException as e:
  38. result["failed"] = True
  39. result["msg"] = str(e)
  40. return result
  41. if "openshift" not in task_vars:
  42. result["failed"] = True
  43. result["msg"] = "'openshift' is undefined, did 'openshift_facts' run?"
  44. return result
  45. result["checks"] = check_results = {}
  46. user_disabled_checks = normalize(task_vars.get('openshift_disable_check', []))
  47. for name in resolved_checks:
  48. display.banner("CHECK [{} : {}]".format(name, task_vars["ansible_host"]))
  49. check = known_checks[name]
  50. check_results[name] = run_check(name, check, user_disabled_checks)
  51. if check.changed:
  52. check_results[name]["changed"] = True
  53. result["changed"] = any(r.get("changed") for r in check_results.values())
  54. if any(r.get("failed") for r in check_results.values()):
  55. result["failed"] = True
  56. result["msg"] = "One or more checks failed"
  57. return result
  58. def load_known_checks(self, tmp, task_vars):
  59. load_checks()
  60. known_checks = {}
  61. for cls in OpenShiftCheck.subclasses():
  62. check_name = cls.name
  63. if check_name in known_checks:
  64. other_cls = known_checks[check_name].__class__
  65. raise OpenShiftCheckException(
  66. "non-unique check name '{}' in: '{}.{}' and '{}.{}'".format(
  67. check_name,
  68. cls.__module__, cls.__name__,
  69. other_cls.__module__, other_cls.__name__))
  70. known_checks[check_name] = cls(execute_module=self._execute_module, tmp=tmp, task_vars=task_vars)
  71. return known_checks
  72. def list_known_checks(known_checks):
  73. """Return text listing the existing checks and tags."""
  74. # TODO: we could include a description of each check by taking it from a
  75. # check class attribute (e.g., __doc__) when building the message below.
  76. msg = (
  77. 'This playbook is meant to run health checks, but no checks were '
  78. 'requested. Set the `openshift_checks` variable to a comma-separated '
  79. 'list of check names or a YAML list. Available checks:\n {}'
  80. ).format('\n '.join(sorted(known_checks)))
  81. tags = describe_tags(known_checks.values())
  82. msg += (
  83. '\n\nTags can be used as a shortcut to select multiple '
  84. 'checks. Available tags and the checks they select:\n {}'
  85. ).format('\n '.join(tags))
  86. return msg
  87. def describe_tags(check_classes):
  88. """Return a sorted list of strings describing tags and the checks they include."""
  89. tag_checks = defaultdict(list)
  90. for cls in check_classes:
  91. for tag in cls.tags:
  92. tag_checks[tag].append(cls.name)
  93. tags = [
  94. '@{} = {}'.format(tag, ','.join(sorted(checks)))
  95. for tag, checks in tag_checks.items()
  96. ]
  97. return sorted(tags)
  98. def resolve_checks(names, all_checks):
  99. """Returns a set of resolved check names.
  100. Resolving a check name expands tag references (e.g., "@tag") to all the
  101. checks that contain the given tag. OpenShiftCheckException is raised if
  102. names contains an unknown check or tag name.
  103. names should be a sequence of strings.
  104. all_checks should be a sequence of check classes/instances.
  105. """
  106. known_check_names = set(check.name for check in all_checks)
  107. known_tag_names = set(name for check in all_checks for name in check.tags)
  108. check_names = set(name for name in names if not name.startswith('@'))
  109. tag_names = set(name[1:] for name in names if name.startswith('@'))
  110. unknown_check_names = check_names - known_check_names
  111. unknown_tag_names = tag_names - known_tag_names
  112. if unknown_check_names or unknown_tag_names:
  113. msg = []
  114. if unknown_check_names:
  115. msg.append('Unknown check names: {}.'.format(', '.join(sorted(unknown_check_names))))
  116. if unknown_tag_names:
  117. msg.append('Unknown tag names: {}.'.format(', '.join(sorted(unknown_tag_names))))
  118. msg.append('Make sure there is no typo in the playbook and no files are missing.')
  119. # TODO: implement a "Did you mean ...?" when the input is similar to a
  120. # valid check or tag.
  121. msg.append('Known checks:')
  122. msg.append(' {}'.format('\n '.join(sorted(known_check_names))))
  123. msg.append('Known tags:')
  124. msg.append(' {}'.format('\n '.join(describe_tags(all_checks))))
  125. raise OpenShiftCheckException('\n'.join(msg))
  126. tag_to_checks = defaultdict(set)
  127. for check in all_checks:
  128. for tag in check.tags:
  129. tag_to_checks[tag].add(check.name)
  130. resolved = check_names.copy()
  131. for tag in tag_names:
  132. resolved.update(tag_to_checks[tag])
  133. return resolved
  134. def normalize(checks):
  135. """Return a clean list of check names.
  136. The input may be a comma-separated string or a sequence. Leading and
  137. trailing whitespace characters are removed. Empty items are discarded.
  138. """
  139. if isinstance(checks, string_types):
  140. checks = checks.split(',')
  141. return [name.strip() for name in checks if name.strip()]
  142. def run_check(name, check, user_disabled_checks):
  143. """Run a single check if enabled and return a result dict."""
  144. if name in user_disabled_checks:
  145. return dict(skipped=True, skipped_reason="Disabled by user request")
  146. # pylint: disable=broad-except; capturing exceptions broadly is intentional,
  147. # to isolate arbitrary failures in one check from others.
  148. try:
  149. is_active = check.is_active()
  150. except Exception as exc:
  151. reason = "Could not determine if check should be run, exception: {}".format(exc)
  152. return dict(skipped=True, skipped_reason=reason, exception=traceback.format_exc())
  153. if not is_active:
  154. return dict(skipped=True, skipped_reason="Not active for this host")
  155. try:
  156. return check.run()
  157. except OpenShiftCheckException as exc:
  158. return dict(failed=True, msg=str(exc))
  159. except Exception as exc:
  160. return dict(failed=True, msg=str(exc), exception=traceback.format_exc())