curator.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. """
  2. Module for performing checks on an Curator logging deployment
  3. """
  4. from openshift_checks import get_var
  5. from openshift_checks.logging.logging import LoggingCheck
  6. class Curator(LoggingCheck):
  7. """Module that checks an integrated logging Curator deployment"""
  8. name = "curator"
  9. tags = ["health", "logging"]
  10. logging_namespace = None
  11. def run(self, tmp, task_vars):
  12. """Check various things and gather errors. Returns: result as hash"""
  13. self.logging_namespace = get_var(task_vars, "openshift_logging_namespace", default="logging")
  14. curator_pods, error = super(Curator, self).get_pods_for_component(
  15. self.module_executor,
  16. self.logging_namespace,
  17. "curator",
  18. task_vars
  19. )
  20. if error:
  21. return {"failed": True, "changed": False, "msg": error}
  22. check_error = self.check_curator(curator_pods)
  23. if check_error:
  24. msg = ("The following Curator deployment issue was found:"
  25. "\n-------\n"
  26. "{}".format(check_error))
  27. return {"failed": True, "changed": False, "msg": msg}
  28. # TODO(lmeyer): run it all again for the ops cluster
  29. return {"failed": False, "changed": False, "msg": 'No problems found with Curator deployment.'}
  30. def check_curator(self, pods):
  31. """Check to see if curator is up and working. Returns: error string"""
  32. if not pods:
  33. return (
  34. "There are no Curator pods for the logging stack,\n"
  35. "so nothing will prune Elasticsearch indexes.\n"
  36. "Is Curator correctly deployed?"
  37. )
  38. not_running = super(Curator, self).not_running_pods(pods)
  39. if len(not_running) == len(pods):
  40. return (
  41. "The Curator pod is not currently in a running state,\n"
  42. "so Elasticsearch indexes may increase without bound."
  43. )
  44. if len(pods) - len(not_running) > 1:
  45. return (
  46. "There is more than one Curator pod running. This should not normally happen.\n"
  47. "Although this doesn't cause any problems, you may want to investigate."
  48. )
  49. return None