1234567891011121314151617181920212223242526272829303132333435363738394041424344454647 |
- """Check that scans journalctl for messages caused as a symptom of increased etcd traffic."""
- from openshift_checks import OpenShiftCheck, get_var
- class EtcdTraffic(OpenShiftCheck):
- """Check if host is being affected by an increase in etcd traffic."""
- name = "etcd_traffic"
- tags = ["health", "etcd"]
- @classmethod
- def is_active(cls, task_vars):
- """Skip hosts that do not have etcd in their group names."""
- group_names = get_var(task_vars, "group_names", default=[])
- valid_group_names = "etcd" in group_names
- version = get_var(task_vars, "openshift", "common", "short_version")
- valid_version = version in ("3.4", "3.5", "1.4", "1.5")
- return super(EtcdTraffic, cls).is_active(task_vars) and valid_group_names and valid_version
- def run(self, tmp, task_vars):
- is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")
- unit = "etcd_container" if is_containerized else "etcd"
- log_matchers = [{
- "start_regexp": r"Starting Etcd Server",
- "regexp": r"etcd: sync duration of [^,]+, expected less than 1s",
- "unit": unit
- }]
- match = self.execute_module("search_journalctl", {
- "log_matchers": log_matchers,
- }, task_vars)
- if match.get("matched"):
- msg = ("Higher than normal etcd traffic detected.\n"
- "OpenShift 3.4 introduced an increase in etcd traffic.\n"
- "Upgrading to OpenShift 3.6 is recommended in order to fix this issue.\n"
- "Please refer to https://access.redhat.com/solutions/2916381 for more information.")
- return {"failed": True, "msg": msg}
- if match.get("failed"):
- return {"failed": True, "msg": "\n".join(match.get("errors"))}
- return {}
|