etcd_traffic.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. """Check that scans journalctl for messages caused as a symptom of increased etcd traffic."""
  2. from openshift_checks import OpenShiftCheck, get_var
  3. class EtcdTraffic(OpenShiftCheck):
  4. """Check if host is being affected by an increase in etcd traffic."""
  5. name = "etcd_traffic"
  6. tags = ["health", "etcd"]
  7. @classmethod
  8. def is_active(cls, task_vars):
  9. """Skip hosts that do not have etcd in their group names."""
  10. group_names = get_var(task_vars, "group_names", default=[])
  11. valid_group_names = "etcd" in group_names
  12. version = get_var(task_vars, "openshift", "common", "short_version")
  13. valid_version = version in ("3.4", "3.5", "1.4", "1.5")
  14. return super(EtcdTraffic, cls).is_active(task_vars) and valid_group_names and valid_version
  15. def run(self, tmp, task_vars):
  16. is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")
  17. unit = "etcd_container" if is_containerized else "etcd"
  18. log_matchers = [{
  19. "start_regexp": r"Starting Etcd Server",
  20. "regexp": r"etcd: sync duration of [^,]+, expected less than 1s",
  21. "unit": unit
  22. }]
  23. match = self.execute_module("search_journalctl", {
  24. "log_matchers": log_matchers,
  25. }, task_vars)
  26. if match.get("matched"):
  27. msg = ("Higher than normal etcd traffic detected.\n"
  28. "OpenShift 3.4 introduced an increase in etcd traffic.\n"
  29. "Upgrading to OpenShift 3.6 is recommended in order to fix this issue.\n"
  30. "Please refer to https://access.redhat.com/solutions/2916381 for more information.")
  31. return {"failed": True, "msg": msg}
  32. if match.get("failed"):
  33. return {"failed": True, "msg": "\n".join(match.get("errors"))}
  34. return {}