etcd_traffic.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. """Check that scans journalctl for messages caused as a symptom of increased etcd traffic."""
  2. from openshift_checks import OpenShiftCheck
  3. class EtcdTraffic(OpenShiftCheck):
  4. """Check if host is being affected by an increase in etcd traffic."""
  5. name = "etcd_traffic"
  6. tags = ["health", "etcd"]
  7. def is_active(self):
  8. """Skip hosts that do not have etcd in their group names."""
  9. group_names = self.get_var("group_names", default=[])
  10. valid_group_names = "oo_etcd_to_config" in group_names
  11. version = self.get_major_minor_version(self.get_var("openshift_image_tag"))
  12. valid_version = version in ((3, 4), (3, 5))
  13. return super(EtcdTraffic, self).is_active() and valid_group_names and valid_version
  14. def run(self):
  15. is_containerized = self.get_var("openshift", "common", "is_containerized")
  16. unit = "etcd_container" if is_containerized else "etcd"
  17. log_matchers = [{
  18. "start_regexp": r"Starting Etcd Server",
  19. "regexp": r"etcd: sync duration of [^,]+, expected less than 1s",
  20. "unit": unit
  21. }]
  22. match = self.execute_module("search_journalctl", {"log_matchers": log_matchers})
  23. if match.get("matched"):
  24. msg = ("Higher than normal etcd traffic detected.\n"
  25. "OpenShift 3.4 introduced an increase in etcd traffic.\n"
  26. "Upgrading to OpenShift 3.6 is recommended in order to fix this issue.\n"
  27. "Please refer to https://access.redhat.com/solutions/2916381 for more information.")
  28. return {"failed": True, "msg": msg}
  29. if match.get("failed"):
  30. return {"failed": True, "msg": "\n".join(match.get("errors"))}
  31. return {}