__init__.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. """
  2. Health checks for OpenShift clusters.
  3. """
  4. import json
  5. import operator
  6. import os
  7. import time
  8. import collections
  9. from abc import ABCMeta, abstractmethod, abstractproperty
  10. from importlib import import_module
  11. from ansible.module_utils import six
  12. from ansible.module_utils.six.moves import reduce # pylint: disable=import-error,redefined-builtin
  13. from ansible.module_utils.six import string_types
  14. from ansible.plugins.filter.core import to_bool as ansible_to_bool
  15. class OpenShiftCheckException(Exception):
  16. """Raised when a check encounters a failure condition."""
  17. def __init__(self, name, msg=None):
  18. # msg is for the message the user will see when this is raised.
  19. # name is for test code to identify the error without looking at msg text.
  20. if msg is None: # for parameter backward compatibility
  21. msg = name
  22. name = self.__class__.__name__
  23. self.name = name
  24. super(OpenShiftCheckException, self).__init__(msg)
  25. class OpenShiftCheckExceptionList(OpenShiftCheckException):
  26. """A container for multiple errors that may be detected in one check."""
  27. def __init__(self, errors):
  28. self.errors = errors
  29. super(OpenShiftCheckExceptionList, self).__init__(
  30. 'OpenShiftCheckExceptionList',
  31. '\n'.join(str(msg) for msg in errors)
  32. )
  33. # make iterable
  34. def __getitem__(self, index):
  35. return self.errors[index]
  36. FileToSave = collections.namedtuple("FileToSave", "filename contents remote_filename")
  37. # pylint: disable=too-many-instance-attributes; all represent significantly different state.
  38. # Arguably they could be separated into two hashes, one for storing parameters, and one for
  39. # storing result state; but that smells more like clutter than clarity.
  40. @six.add_metaclass(ABCMeta)
  41. class OpenShiftCheck(object):
  42. """A base class for defining checks for an OpenShift cluster environment.
  43. Optional init params: method execute_module, dict task_vars, and string tmp
  44. execute_module is expected to have a signature compatible with _execute_module
  45. from ansible plugins/action/__init__.py, e.g.:
  46. def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None, *args):
  47. This is stored so that it can be invoked in subclasses via check.execute_module("name", args)
  48. which provides the check's stored task_vars and tmp.
  49. Optional init param: want_full_results
  50. If the check can gather logs, tarballs, etc., do so when True; but no need to spend
  51. the time if they're not wanted (won't be written to output directory).
  52. """
  53. def __init__(self, execute_module=None, task_vars=None, tmp=None, want_full_results=False):
  54. # store a method for executing ansible modules from the check
  55. self._execute_module = execute_module
  56. # the task variables and tmpdir passed into the health checker task
  57. self.task_vars = task_vars or {}
  58. self.tmp = tmp
  59. # a boolean for disabling the gathering of results (files, computations) that won't
  60. # actually be recorded/used
  61. self.want_full_results = want_full_results
  62. # mainly for testing purposes; see execute_module_with_retries
  63. self._module_retries = 3
  64. self._module_retry_interval = 5 # seconds
  65. # state to be recorded for inspection after the check runs:
  66. #
  67. # set to True when the check changes the host, for accurate total "changed" count
  68. self.changed = False
  69. # list of OpenShiftCheckException for check to report (alternative to returning a failed result)
  70. self.failures = []
  71. # list of FileToSave - files the check specifies to be written locally if so configured
  72. self.files_to_save = []
  73. # log messages for the check - tuples of (description, msg) where msg is serializable.
  74. # These are intended to be a sequential record of what the check observed and determined.
  75. self.logs = []
  76. @abstractproperty
  77. def name(self):
  78. """The name of this check, usually derived from the class name."""
  79. return "openshift_check"
  80. @property
  81. def tags(self):
  82. """A list of tags that this check satisfy.
  83. Tags are used to reference multiple checks with a single '@tagname'
  84. special check name.
  85. """
  86. return []
  87. @staticmethod
  88. def is_active():
  89. """Returns true if this check applies to the ansible-playbook run."""
  90. return True
  91. def is_first_master(self):
  92. """Determine if running on first master. Returns: bool"""
  93. masters = self.get_var("groups", "oo_first_master", default=None) or [None]
  94. return masters[0] == self.get_var("ansible_host")
  95. @abstractmethod
  96. def run(self):
  97. """Executes a check against a host and returns a result hash similar to Ansible modules.
  98. Actually the direction ahead is to record state in the attributes and
  99. not bother building a result hash. Instead, return an empty hash and let
  100. the action plugin fill it in. Or raise an OpenShiftCheckException.
  101. Returning a hash may become deprecated if it does not prove necessary.
  102. """
  103. return {}
  104. @classmethod
  105. def subclasses(cls):
  106. """Returns a generator of subclasses of this class and its subclasses."""
  107. # AUDIT: no-member makes sense due to this having a metaclass
  108. for subclass in cls.__subclasses__(): # pylint: disable=no-member
  109. yield subclass
  110. for subclass in subclass.subclasses():
  111. yield subclass
  112. def register_failure(self, error):
  113. """Record in the check that a failure occurred.
  114. Recorded failures are merged into the result hash for now. They are also saved to output directory
  115. (if provided) <check>.failures.json and registered as a log entry for context <check>.log.json.
  116. """
  117. # It should be an exception; make it one if not
  118. if not isinstance(error, OpenShiftCheckException):
  119. error = OpenShiftCheckException(str(error))
  120. self.failures.append(error)
  121. # duplicate it in the logs so it can be seen in the context of any
  122. # information that led to the failure
  123. self.register_log("failure: " + error.name, str(error))
  124. def register_log(self, context, msg):
  125. """Record an entry for the check log.
  126. Notes are intended to serve as context of the whole sequence of what the check observed.
  127. They are be saved as an ordered list in a local check log file.
  128. They are not to included in the result or in the ansible log; it's just for the record.
  129. """
  130. self.logs.append([context, msg])
  131. def register_file(self, filename, contents=None, remote_filename=""):
  132. """Record a file that a check makes available to be saved individually to output directory.
  133. Either file contents should be passed in, or a file to be copied from the remote host
  134. should be specified. Contents that are not a string are to be serialized as JSON.
  135. NOTE: When copying a file from remote host, it is slurped into memory as base64, meaning
  136. you should avoid using this on huge files (more than say 10M).
  137. """
  138. if contents is None and not remote_filename:
  139. raise OpenShiftCheckException("File data/source not specified; this is a bug in the check.")
  140. self.files_to_save.append(FileToSave(filename, contents, remote_filename))
  141. def execute_module(self, module_name=None, module_args=None, save_as_name=None, register=True):
  142. """Invoke an Ansible module from a check.
  143. Invoke stored _execute_module, normally copied from the action
  144. plugin, with its params and the task_vars and tmp given at
  145. check initialization. No positional parameters beyond these
  146. are specified. If it's necessary to specify any of the other
  147. parameters to _execute_module then that should just be invoked
  148. directly (with awareness of changes in method signature per
  149. Ansible version).
  150. So e.g. check.execute_module("foo", dict(arg1=...))
  151. save_as_name specifies a file name for saving the result to an output directory,
  152. if needed, and is intended to uniquely identify the result of invoking execute_module.
  153. If not provided, the module name will be used.
  154. If register is set False, then the result won't be registered in logs or files to save.
  155. Return: result hash from module execution.
  156. """
  157. if self._execute_module is None:
  158. raise NotImplementedError(
  159. self.__class__.__name__ +
  160. " invoked execute_module without providing the method at initialization."
  161. )
  162. result = self._execute_module(module_name, module_args, self.tmp, self.task_vars)
  163. if result.get("changed"):
  164. self.changed = True
  165. for output in ["result", "stdout"]:
  166. # output is often JSON; attempt to decode
  167. try:
  168. result[output + "_json"] = json.loads(result[output])
  169. except (KeyError, ValueError):
  170. pass
  171. if register:
  172. self.register_log("execute_module: " + module_name, result)
  173. self.register_file(save_as_name or module_name + ".json", result)
  174. return result
  175. def execute_module_with_retries(self, module_name, module_args):
  176. """Run execute_module and retry on failure."""
  177. result = {}
  178. tries = 0
  179. while True:
  180. res = self.execute_module(module_name, module_args)
  181. if tries > self._module_retries or not res.get("failed"):
  182. result.update(res)
  183. return result
  184. result["last_failed"] = res
  185. tries += 1
  186. time.sleep(self._module_retry_interval)
  187. def get_var(self, *keys, **kwargs):
  188. """Get deeply nested values from task_vars.
  189. Ansible task_vars structures are Python dicts, often mapping strings to
  190. other dicts. This helper makes it easier to get a nested value, raising
  191. OpenShiftCheckException when a key is not found.
  192. Keyword args:
  193. default:
  194. On missing key, return this as default value instead of raising exception.
  195. convert:
  196. Supply a function to apply to normalize the value before returning it.
  197. None is the default (return as-is).
  198. This function should raise ValueError if the user has provided a value
  199. that cannot be converted, or OpenShiftCheckException if some other
  200. problem needs to be described to the user.
  201. """
  202. if len(keys) == 1:
  203. keys = keys[0].split(".")
  204. try:
  205. value = reduce(operator.getitem, keys, self.task_vars)
  206. except (KeyError, TypeError):
  207. if "default" not in kwargs:
  208. raise OpenShiftCheckException(
  209. "This check expects the '{}' inventory variable to be defined\n"
  210. "in order to proceed, but it is undefined. There may be a bug\n"
  211. "in Ansible, the checks, or their dependencies."
  212. "".format(".".join(map(str, keys)))
  213. )
  214. value = kwargs["default"]
  215. convert = kwargs.get("convert", None)
  216. try:
  217. if convert is None:
  218. return value
  219. elif convert is bool: # interpret bool as Ansible does, instead of python truthiness
  220. return ansible_to_bool(value)
  221. else:
  222. return convert(value)
  223. except ValueError as error: # user error in specifying value
  224. raise OpenShiftCheckException(
  225. 'Cannot convert inventory variable to expected type:\n'
  226. ' "{var}={value}"\n'
  227. '{error}'.format(var=".".join(keys), value=value, error=error)
  228. )
  229. except OpenShiftCheckException: # some other check-specific problem
  230. raise
  231. except Exception as error: # probably a bug in the function
  232. raise OpenShiftCheckException(
  233. 'There is a bug in this check. While trying to convert variable \n'
  234. ' "{var}={value}"\n'
  235. 'the given converter cannot be used or failed unexpectedly:\n'
  236. '{type}: {error}'.format(
  237. var=".".join(keys),
  238. value=value,
  239. type=error.__class__.__name__,
  240. error=error
  241. ))
  242. @staticmethod
  243. def normalize(name_list):
  244. """Return a clean list of names.
  245. The input may be a comma-separated string or a sequence. Leading and
  246. trailing whitespace characters are removed. Empty items are discarded.
  247. """
  248. if isinstance(name_list, string_types):
  249. name_list = name_list.split(',')
  250. return [name.strip() for name in name_list if name.strip()]
  251. @staticmethod
  252. def get_major_minor_version(openshift_image_tag):
  253. """Parse and return the deployed version of OpenShift as a tuple."""
  254. if openshift_image_tag and openshift_image_tag[0] == 'v':
  255. openshift_image_tag = openshift_image_tag[1:]
  256. # map major release versions across releases
  257. # to a common major version
  258. openshift_major_release_version = {
  259. "1": "3",
  260. }
  261. components = openshift_image_tag.split(".")
  262. if not components or len(components) < 2:
  263. msg = "An invalid version of OpenShift was found for this host: {}"
  264. raise OpenShiftCheckException(msg.format(openshift_image_tag))
  265. if components[0] in openshift_major_release_version:
  266. components[0] = openshift_major_release_version[components[0]]
  267. components = tuple(int(x) for x in components[:2])
  268. return components
  269. def find_ansible_mount(self, path):
  270. """Return the mount point for path from ansible_mounts."""
  271. # reorganize list of mounts into dict by path
  272. mount_for_path = {
  273. mount['mount']: mount
  274. for mount
  275. in self.get_var('ansible_mounts')
  276. }
  277. # NOTE: including base cases '/' and '' to ensure the loop ends
  278. mount_targets = set(mount_for_path.keys()) | {'/', ''}
  279. mount_point = path
  280. while mount_point not in mount_targets:
  281. mount_point = os.path.dirname(mount_point)
  282. try:
  283. mount = mount_for_path[mount_point]
  284. self.register_log("mount point for " + path, mount)
  285. return mount
  286. except KeyError:
  287. known_mounts = ', '.join('"{}"'.format(mount) for mount in sorted(mount_for_path))
  288. raise OpenShiftCheckException(
  289. 'Unable to determine mount point for path "{}".\n'
  290. 'Known mount points: {}.'.format(path, known_mounts or 'none')
  291. )
  292. LOADER_EXCLUDES = (
  293. "__init__.py",
  294. "mixins.py",
  295. "logging.py",
  296. )
  297. def load_checks(path=None, subpkg=""):
  298. """Dynamically import all check modules for the side effect of registering checks."""
  299. if path is None:
  300. path = os.path.dirname(__file__)
  301. modules = []
  302. for name in os.listdir(path):
  303. if os.path.isdir(os.path.join(path, name)):
  304. modules = modules + load_checks(os.path.join(path, name), subpkg + "." + name)
  305. continue
  306. if name.endswith(".py") and name not in LOADER_EXCLUDES:
  307. modules.append(import_module(__package__ + subpkg + "." + name[:-3]))
  308. return modules