etcd_volume.py 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. """A health check for OpenShift clusters."""
  2. from openshift_checks import OpenShiftCheck
  3. class EtcdVolume(OpenShiftCheck):
  4. """Ensures etcd storage usage does not exceed a given threshold."""
  5. name = "etcd_volume"
  6. tags = ["etcd", "health"]
  7. # Default device usage threshold. Value should be in the range [0, 100].
  8. default_threshold_percent = 90
  9. # Where to find etcd data
  10. etcd_mount_path = "/var/lib/etcd"
  11. def is_active(self):
  12. etcd_hosts = (
  13. self.get_var("groups", "oo_etcd_to_config", default=[]) or
  14. self.get_var("groups", "oo_masters_to_config", default=[]) or
  15. []
  16. )
  17. is_etcd_host = self.get_var("ansible_host") in etcd_hosts
  18. return super(EtcdVolume, self).is_active() and is_etcd_host
  19. def run(self):
  20. mount_info = self.find_ansible_mount(self.etcd_mount_path)
  21. available = mount_info["size_available"]
  22. total = mount_info["size_total"]
  23. used = total - available
  24. threshold = self.get_var(
  25. "etcd_device_usage_threshold_percent",
  26. default=self.default_threshold_percent
  27. )
  28. used_percent = 100.0 * used / total
  29. if used_percent > threshold:
  30. device = mount_info.get("device", "unknown")
  31. mount = mount_info.get("mount", "unknown")
  32. msg = "etcd storage usage ({:.1f}%) is above threshold ({:.1f}%). Device: {}, mount: {}.".format(
  33. used_percent, threshold, device, mount
  34. )
  35. return {"failed": True, "msg": msg}
  36. return {}