瀏覽代碼

Merge pull request #3630 from juanvallejo/jvallejo/add-etcd-volume-check

Merged by openshift-bot
OpenShift Bot 8 年之前
父節點
當前提交
177beab324

+ 58 - 0
roles/openshift_health_checker/openshift_checks/etcd_volume.py

@@ -0,0 +1,58 @@
+"""A health check for OpenShift clusters."""
+
+from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var
+
+
+class EtcdVolume(OpenShiftCheck):
+    """Ensures etcd storage usage does not exceed a given threshold."""
+
+    name = "etcd_volume"
+    tags = ["etcd", "health"]
+
+    # Default device usage threshold. Value should be in the range [0, 100].
+    default_threshold_percent = 90
+    # Where to find ectd data, higher priority first.
+    supported_mount_paths = ["/var/lib/etcd", "/var/lib", "/var", "/"]
+
+    @classmethod
+    def is_active(cls, task_vars):
+        etcd_hosts = get_var(task_vars, "groups", "etcd", default=[]) or get_var(task_vars, "groups", "masters",
+                                                                                 default=[]) or []
+        is_etcd_host = get_var(task_vars, "ansible_ssh_host") in etcd_hosts
+        return super(EtcdVolume, cls).is_active(task_vars) and is_etcd_host
+
+    def run(self, tmp, task_vars):
+        mount_info = self._etcd_mount_info(task_vars)
+        available = mount_info["size_available"]
+        total = mount_info["size_total"]
+        used = total - available
+
+        threshold = get_var(
+            task_vars,
+            "etcd_device_usage_threshold_percent",
+            default=self.default_threshold_percent
+        )
+
+        used_percent = 100.0 * used / total
+
+        if used_percent > threshold:
+            device = mount_info.get("device", "unknown")
+            mount = mount_info.get("mount", "unknown")
+            msg = "etcd storage usage ({:.1f}%) is above threshold ({:.1f}%). Device: {}, mount: {}.".format(
+                used_percent, threshold, device, mount
+            )
+            return {"failed": True, "msg": msg}
+
+        return {"changed": False}
+
+    def _etcd_mount_info(self, task_vars):
+        ansible_mounts = get_var(task_vars, "ansible_mounts")
+        mounts = {mnt.get("mount"): mnt for mnt in ansible_mounts}
+
+        for path in self.supported_mount_paths:
+            if path in mounts:
+                return mounts[path]
+
+        paths = ', '.join(sorted(mounts)) or 'none'
+        msg = "Unable to find etcd storage mount point. Paths mounted: {}.".format(paths)
+        raise OpenShiftCheckException(msg)

+ 149 - 0
roles/openshift_health_checker/test/etcd_volume_test.py

@@ -0,0 +1,149 @@
+import pytest
+
+from openshift_checks.etcd_volume import EtcdVolume, OpenShiftCheckException
+
+
+@pytest.mark.parametrize('ansible_mounts,extra_words', [
+    ([], ['none']),  # empty ansible_mounts
+    ([{'mount': '/mnt'}], ['/mnt']),  # missing relevant mount paths
+])
+def test_cannot_determine_available_disk(ansible_mounts, extra_words):
+    task_vars = dict(
+        ansible_mounts=ansible_mounts,
+    )
+    check = EtcdVolume(execute_module=fake_execute_module)
+
+    with pytest.raises(OpenShiftCheckException) as excinfo:
+        check.run(tmp=None, task_vars=task_vars)
+
+    for word in 'Unable to find etcd storage mount point'.split() + extra_words:
+        assert word in str(excinfo.value)
+
+
+@pytest.mark.parametrize('size_limit,ansible_mounts', [
+    (
+        # if no size limit is specified, expect max usage
+        # limit to default to 90% of size_total
+        None,
+        [{
+            'mount': '/',
+            'size_available': 40 * 10**9,
+            'size_total': 80 * 10**9
+        }],
+    ),
+    (
+        1,
+        [{
+            'mount': '/',
+            'size_available': 30 * 10**9,
+            'size_total': 30 * 10**9,
+        }],
+    ),
+    (
+        20000000000,
+        [{
+            'mount': '/',
+            'size_available': 20 * 10**9,
+            'size_total': 40 * 10**9,
+        }],
+    ),
+    (
+        5000000000,
+        [{
+            # not enough space on / ...
+            'mount': '/',
+            'size_available': 0,
+            'size_total': 0,
+        }, {
+            # not enough space on /var/lib ...
+            'mount': '/var/lib',
+            'size_available': 2 * 10**9,
+            'size_total': 21 * 10**9,
+        }, {
+            # ... but enough on /var/lib/etcd
+            'mount': '/var/lib/etcd',
+            'size_available': 36 * 10**9,
+            'size_total': 40 * 10**9
+        }],
+    )
+])
+def test_succeeds_with_recommended_disk_space(size_limit, ansible_mounts):
+    task_vars = dict(
+        etcd_device_usage_threshold_percent=size_limit,
+        ansible_mounts=ansible_mounts,
+    )
+
+    if task_vars["etcd_device_usage_threshold_percent"] is None:
+        task_vars.pop("etcd_device_usage_threshold_percent")
+
+    check = EtcdVolume(execute_module=fake_execute_module)
+    result = check.run(tmp=None, task_vars=task_vars)
+
+    assert not result.get('failed', False)
+
+
+@pytest.mark.parametrize('size_limit_percent,ansible_mounts,extra_words', [
+    (
+        # if no size limit is specified, expect max usage
+        # limit to default to 90% of size_total
+        None,
+        [{
+            'mount': '/',
+            'size_available': 1 * 10**9,
+            'size_total': 100 * 10**9,
+        }],
+        ['99.0%'],
+    ),
+    (
+        70.0,
+        [{
+            'mount': '/',
+            'size_available': 1 * 10**6,
+            'size_total': 5 * 10**9,
+        }],
+        ['100.0%'],
+    ),
+    (
+        40.0,
+        [{
+            'mount': '/',
+            'size_available': 2 * 10**9,
+            'size_total': 6 * 10**9,
+        }],
+        ['66.7%'],
+    ),
+    (
+        None,
+        [{
+            # enough space on /var ...
+            'mount': '/var',
+            'size_available': 20 * 10**9,
+            'size_total': 20 * 10**9,
+        }, {
+            # .. but not enough on /var/lib
+            'mount': '/var/lib',
+            'size_available': 1 * 10**9,
+            'size_total': 20 * 10**9,
+        }],
+        ['95.0%'],
+    ),
+])
+def test_fails_with_insufficient_disk_space(size_limit_percent, ansible_mounts, extra_words):
+    task_vars = dict(
+        etcd_device_usage_threshold_percent=size_limit_percent,
+        ansible_mounts=ansible_mounts,
+    )
+
+    if task_vars["etcd_device_usage_threshold_percent"] is None:
+        task_vars.pop("etcd_device_usage_threshold_percent")
+
+    check = EtcdVolume(execute_module=fake_execute_module)
+    result = check.run(tmp=None, task_vars=task_vars)
+
+    assert result['failed']
+    for word in extra_words:
+        assert word in result['msg']
+
+
+def fake_execute_module(*args):
+    raise AssertionError('this function should not be called')