__init__.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. """
  2. Health checks for OpenShift clusters.
  3. """
  4. import json
  5. import operator
  6. import os
  7. import time
  8. import collections
  9. from abc import ABCMeta, abstractmethod, abstractproperty
  10. from importlib import import_module
  11. from ansible.module_utils import six
  12. from ansible.module_utils.six.moves import reduce # pylint: disable=import-error,redefined-builtin
  13. from ansible.plugins.filter.core import to_bool as ansible_to_bool
  14. class OpenShiftCheckException(Exception):
  15. """Raised when a check encounters a failure condition."""
  16. def __init__(self, name, msg=None):
  17. # msg is for the message the user will see when this is raised.
  18. # name is for test code to identify the error without looking at msg text.
  19. if msg is None: # for parameter backward compatibility
  20. msg = name
  21. name = self.__class__.__name__
  22. self.name = name
  23. super(OpenShiftCheckException, self).__init__(msg)
  24. class OpenShiftCheckExceptionList(OpenShiftCheckException):
  25. """A container for multiple errors that may be detected in one check."""
  26. def __init__(self, errors):
  27. self.errors = errors
  28. super(OpenShiftCheckExceptionList, self).__init__(
  29. 'OpenShiftCheckExceptionList',
  30. '\n'.join(str(msg) for msg in errors)
  31. )
  32. # make iterable
  33. def __getitem__(self, index):
  34. return self.errors[index]
  35. FileToSave = collections.namedtuple("FileToSave", "filename contents remote_filename")
  36. # pylint: disable=too-many-instance-attributes; all represent significantly different state.
  37. # Arguably they could be separated into two hashes, one for storing parameters, and one for
  38. # storing result state; but that smells more like clutter than clarity.
  39. @six.add_metaclass(ABCMeta)
  40. class OpenShiftCheck(object):
  41. """A base class for defining checks for an OpenShift cluster environment.
  42. Optional init params: method execute_module, dict task_vars, and string tmp
  43. execute_module is expected to have a signature compatible with _execute_module
  44. from ansible plugins/action/__init__.py, e.g.:
  45. def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None, *args):
  46. This is stored so that it can be invoked in subclasses via check.execute_module("name", args)
  47. which provides the check's stored task_vars and tmp.
  48. Optional init param: want_full_results
  49. If the check can gather logs, tarballs, etc., do so when True; but no need to spend
  50. the time if they're not wanted (won't be written to output directory).
  51. """
  52. def __init__(self, execute_module=None, task_vars=None, tmp=None, want_full_results=False):
  53. # store a method for executing ansible modules from the check
  54. self._execute_module = execute_module
  55. # the task variables and tmpdir passed into the health checker task
  56. self.task_vars = task_vars or {}
  57. self.tmp = tmp
  58. # a boolean for disabling the gathering of results (files, computations) that won't
  59. # actually be recorded/used
  60. self.want_full_results = want_full_results
  61. # mainly for testing purposes; see execute_module_with_retries
  62. self._module_retries = 3
  63. self._module_retry_interval = 5 # seconds
  64. # state to be recorded for inspection after the check runs:
  65. #
  66. # set to True when the check changes the host, for accurate total "changed" count
  67. self.changed = False
  68. # list of OpenShiftCheckException for check to report (alternative to returning a failed result)
  69. self.failures = []
  70. # list of FileToSave - files the check specifies to be written locally if so configured
  71. self.files_to_save = []
  72. # log messages for the check - tuples of (description, msg) where msg is serializable.
  73. # These are intended to be a sequential record of what the check observed and determined.
  74. self.logs = []
  75. @abstractproperty
  76. def name(self):
  77. """The name of this check, usually derived from the class name."""
  78. return "openshift_check"
  79. @property
  80. def tags(self):
  81. """A list of tags that this check satisfy.
  82. Tags are used to reference multiple checks with a single '@tagname'
  83. special check name.
  84. """
  85. return []
  86. @staticmethod
  87. def is_active():
  88. """Returns true if this check applies to the ansible-playbook run."""
  89. return True
  90. @abstractmethod
  91. def run(self):
  92. """Executes a check against a host and returns a result hash similar to Ansible modules.
  93. Actually the direction ahead is to record state in the attributes and
  94. not bother building a result hash. Instead, return an empty hash and let
  95. the action plugin fill it in. Or raise an OpenShiftCheckException.
  96. Returning a hash may become deprecated if it does not prove necessary.
  97. """
  98. return {}
  99. @classmethod
  100. def subclasses(cls):
  101. """Returns a generator of subclasses of this class and its subclasses."""
  102. # AUDIT: no-member makes sense due to this having a metaclass
  103. for subclass in cls.__subclasses__(): # pylint: disable=no-member
  104. yield subclass
  105. for subclass in subclass.subclasses():
  106. yield subclass
  107. def register_failure(self, error):
  108. """Record in the check that a failure occurred.
  109. Recorded failures are merged into the result hash for now. They are also saved to output directory
  110. (if provided) <check>.failures.json and registered as a log entry for context <check>.log.json.
  111. """
  112. # It should be an exception; make it one if not
  113. if not isinstance(error, OpenShiftCheckException):
  114. error = OpenShiftCheckException(str(error))
  115. self.failures.append(error)
  116. # duplicate it in the logs so it can be seen in the context of any
  117. # information that led to the failure
  118. self.register_log("failure: " + error.name, str(error))
  119. def register_log(self, context, msg):
  120. """Record an entry for the check log.
  121. Notes are intended to serve as context of the whole sequence of what the check observed.
  122. They are be saved as an ordered list in a local check log file.
  123. They are not to included in the result or in the ansible log; it's just for the record.
  124. """
  125. self.logs.append([context, msg])
  126. def register_file(self, filename, contents=None, remote_filename=""):
  127. """Record a file that a check makes available to be saved individually to output directory.
  128. Either file contents should be passed in, or a file to be copied from the remote host
  129. should be specified. Contents that are not a string are to be serialized as JSON.
  130. NOTE: When copying a file from remote host, it is slurped into memory as base64, meaning
  131. you should avoid using this on huge files (more than say 10M).
  132. """
  133. if contents is None and not remote_filename:
  134. raise OpenShiftCheckException("File data/source not specified; this is a bug in the check.")
  135. self.files_to_save.append(FileToSave(filename, contents, remote_filename))
  136. def execute_module(self, module_name=None, module_args=None, save_as_name=None, register=True):
  137. """Invoke an Ansible module from a check.
  138. Invoke stored _execute_module, normally copied from the action
  139. plugin, with its params and the task_vars and tmp given at
  140. check initialization. No positional parameters beyond these
  141. are specified. If it's necessary to specify any of the other
  142. parameters to _execute_module then that should just be invoked
  143. directly (with awareness of changes in method signature per
  144. Ansible version).
  145. So e.g. check.execute_module("foo", dict(arg1=...))
  146. save_as_name specifies a file name for saving the result to an output directory,
  147. if needed, and is intended to uniquely identify the result of invoking execute_module.
  148. If not provided, the module name will be used.
  149. If register is set False, then the result won't be registered in logs or files to save.
  150. Return: result hash from module execution.
  151. """
  152. if self._execute_module is None:
  153. raise NotImplementedError(
  154. self.__class__.__name__ +
  155. " invoked execute_module without providing the method at initialization."
  156. )
  157. result = self._execute_module(module_name, module_args, self.tmp, self.task_vars)
  158. if result.get("changed"):
  159. self.changed = True
  160. for output in ["result", "stdout"]:
  161. # output is often JSON; attempt to decode
  162. try:
  163. result[output + "_json"] = json.loads(result[output])
  164. except (KeyError, ValueError):
  165. pass
  166. if register:
  167. self.register_log("execute_module: " + module_name, result)
  168. self.register_file(save_as_name or module_name + ".json", result)
  169. return result
  170. def execute_module_with_retries(self, module_name, module_args):
  171. """Run execute_module and retry on failure."""
  172. result = {}
  173. tries = 0
  174. while True:
  175. res = self.execute_module(module_name, module_args)
  176. if tries > self._module_retries or not res.get("failed"):
  177. result.update(res)
  178. return result
  179. result["last_failed"] = res
  180. tries += 1
  181. time.sleep(self._module_retry_interval)
  182. def get_var(self, *keys, **kwargs):
  183. """Get deeply nested values from task_vars.
  184. Ansible task_vars structures are Python dicts, often mapping strings to
  185. other dicts. This helper makes it easier to get a nested value, raising
  186. OpenShiftCheckException when a key is not found.
  187. Keyword args:
  188. default:
  189. On missing key, return this as default value instead of raising exception.
  190. convert:
  191. Supply a function to apply to normalize the value before returning it.
  192. None is the default (return as-is).
  193. This function should raise ValueError if the user has provided a value
  194. that cannot be converted, or OpenShiftCheckException if some other
  195. problem needs to be described to the user.
  196. """
  197. if len(keys) == 1:
  198. keys = keys[0].split(".")
  199. try:
  200. value = reduce(operator.getitem, keys, self.task_vars)
  201. except (KeyError, TypeError):
  202. if "default" not in kwargs:
  203. raise OpenShiftCheckException(
  204. "This check expects the '{}' inventory variable to be defined\n"
  205. "in order to proceed, but it is undefined. There may be a bug\n"
  206. "in Ansible, the checks, or their dependencies."
  207. "".format(".".join(map(str, keys)))
  208. )
  209. value = kwargs["default"]
  210. convert = kwargs.get("convert", None)
  211. try:
  212. if convert is None:
  213. return value
  214. elif convert is bool: # interpret bool as Ansible does, instead of python truthiness
  215. return ansible_to_bool(value)
  216. else:
  217. return convert(value)
  218. except ValueError as error: # user error in specifying value
  219. raise OpenShiftCheckException(
  220. 'Cannot convert inventory variable to expected type:\n'
  221. ' "{var}={value}"\n'
  222. '{error}'.format(var=".".join(keys), value=value, error=error)
  223. )
  224. except OpenShiftCheckException: # some other check-specific problem
  225. raise
  226. except Exception as error: # probably a bug in the function
  227. raise OpenShiftCheckException(
  228. 'There is a bug in this check. While trying to convert variable \n'
  229. ' "{var}={value}"\n'
  230. 'the given converter cannot be used or failed unexpectedly:\n'
  231. '{type}: {error}'.format(
  232. var=".".join(keys),
  233. value=value,
  234. type=error.__class__.__name__,
  235. error=error
  236. ))
  237. @staticmethod
  238. def get_major_minor_version(openshift_image_tag):
  239. """Parse and return the deployed version of OpenShift as a tuple."""
  240. if openshift_image_tag and openshift_image_tag[0] == 'v':
  241. openshift_image_tag = openshift_image_tag[1:]
  242. # map major release versions across releases
  243. # to a common major version
  244. openshift_major_release_version = {
  245. "1": "3",
  246. }
  247. components = openshift_image_tag.split(".")
  248. if not components or len(components) < 2:
  249. msg = "An invalid version of OpenShift was found for this host: {}"
  250. raise OpenShiftCheckException(msg.format(openshift_image_tag))
  251. if components[0] in openshift_major_release_version:
  252. components[0] = openshift_major_release_version[components[0]]
  253. components = tuple(int(x) for x in components[:2])
  254. return components
  255. def find_ansible_mount(self, path):
  256. """Return the mount point for path from ansible_mounts."""
  257. # reorganize list of mounts into dict by path
  258. mount_for_path = {
  259. mount['mount']: mount
  260. for mount
  261. in self.get_var('ansible_mounts')
  262. }
  263. # NOTE: including base cases '/' and '' to ensure the loop ends
  264. mount_targets = set(mount_for_path.keys()) | {'/', ''}
  265. mount_point = path
  266. while mount_point not in mount_targets:
  267. mount_point = os.path.dirname(mount_point)
  268. try:
  269. mount = mount_for_path[mount_point]
  270. self.register_log("mount point for " + path, mount)
  271. return mount
  272. except KeyError:
  273. known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(mount_for_path))
  274. raise OpenShiftCheckException(
  275. 'Unable to determine mount point for path "{}".\n'
  276. 'Known mount points: {}.'.format(path, known_mounts or 'none')
  277. )
  278. LOADER_EXCLUDES = (
  279. "__init__.py",
  280. "mixins.py",
  281. "logging.py",
  282. )
  283. def load_checks(path=None, subpkg=""):
  284. """Dynamically import all check modules for the side effect of registering checks."""
  285. if path is None:
  286. path = os.path.dirname(__file__)
  287. modules = []
  288. for name in os.listdir(path):
  289. if os.path.isdir(os.path.join(path, name)):
  290. modules = modules + load_checks(os.path.join(path, name), subpkg + "." + name)
  291. continue
  292. if name.endswith(".py") and name not in LOADER_EXCLUDES:
  293. modules.append(import_module(__package__ + subpkg + "." + name[:-3]))
  294. return modules