Browse Source

add etcd volume check

juanvallejo 8 years ago
parent
commit
a62594a218

+ 58 - 0
roles/openshift_health_checker/openshift_checks/etcd_volume.py

@@ -0,0 +1,58 @@
+# vim: expandtab:tabstop=4:shiftwidth=4
+"""
+Ansible module for warning about etcd volume size past a defined threshold.
+"""
+
+from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var
+
+
+class EtcdVolume(OpenShiftCheck):
+    """Ensure disk size for an etcd host does not exceed a defined limit"""
+
+    name = "etcd_volume"
+    tags = ["etcd", "health"]
+
+    etcd_default_size_limit_percent = 0.9
+
+    def run(self, tmp, task_vars):
+        ansible_mounts = get_var(task_vars, "ansible_mounts")
+
+        etcd_mount_path = self._get_etcd_mount_path(ansible_mounts)
+        etcd_disk_size_available = int(etcd_mount_path["size_available"])
+        etcd_disk_size_total = int(etcd_mount_path["size_total"])
+        etcd_disk_size_used = etcd_disk_size_total - etcd_disk_size_available
+
+        size_limit_percent = get_var(
+            task_vars,
+            "etcd_disk_size_limit_percent",
+            default=self.etcd_default_size_limit_percent
+        )
+
+        if float(etcd_disk_size_used) / float(etcd_disk_size_total) > size_limit_percent:
+            msg = ("Current etcd volume usage ({actual:.2f} GB) for the volume \"{volume}\" "
+                   "is greater than the storage limit ({limit:.2f} GB).")
+            msg = msg.format(
+                actual=self._to_gigabytes(etcd_disk_size_used),
+                volume=etcd_mount_path["mount"],
+                limit=self._to_gigabytes(size_limit_percent * etcd_disk_size_total),
+            )
+            return {"failed": True, "msg": msg}
+
+        return {"changed": False}
+
+    @staticmethod
+    def _get_etcd_mount_path(ansible_mounts):
+        supported_mnt_paths = ["/var/lib/etcd", "/var/lib", "/var", "/"]
+        available_mnts = {mnt.get("mount"): mnt for mnt in ansible_mounts}
+
+        for path in supported_mnt_paths:
+            if path in available_mnts:
+                return available_mnts[path]
+
+        paths = ', '.join(sorted(available_mnts)) or 'none'
+        msg = "Unable to determine available disk space. Paths mounted: {}.".format(paths)
+        raise OpenShiftCheckException(msg)
+
+    @staticmethod
+    def _to_gigabytes(byte_size):
+        return float(byte_size) / 10.0**9

+ 149 - 0
roles/openshift_health_checker/test/etcd_volume_test.py

@@ -0,0 +1,149 @@
+import pytest
+
+from openshift_checks.etcd_volume import EtcdVolume, OpenShiftCheckException
+
+
+@pytest.mark.parametrize('ansible_mounts,extra_words', [
+    ([], ['none']),  # empty ansible_mounts
+    ([{'mount': '/mnt'}], ['/mnt']),  # missing relevant mount paths
+])
+def test_cannot_determine_available_disk(ansible_mounts, extra_words):
+    task_vars = dict(
+        ansible_mounts=ansible_mounts,
+    )
+    check = EtcdVolume(execute_module=fake_execute_module)
+
+    with pytest.raises(OpenShiftCheckException) as excinfo:
+        check.run(tmp=None, task_vars=task_vars)
+
+    for word in 'determine available disk'.split() + extra_words:
+        assert word in str(excinfo.value)
+
+
+@pytest.mark.parametrize('size_limit,ansible_mounts', [
+    (
+        # if no size limit is specified, expect max usage
+        # limit to default to 90% of size_total
+        None,
+        [{
+            'mount': '/',
+            'size_available': 40 * 10**9,
+            'size_total': 80 * 10**9
+        }],
+    ),
+    (
+        1,
+        [{
+            'mount': '/',
+            'size_available': 30 * 10**9,
+            'size_total': 30 * 10**9,
+        }],
+    ),
+    (
+        20000000000,
+        [{
+            'mount': '/',
+            'size_available': 20 * 10**9,
+            'size_total': 40 * 10**9,
+        }],
+    ),
+    (
+        5000000000,
+        [{
+            # not enough space on / ...
+            'mount': '/',
+            'size_available': 0,
+            'size_total': 0,
+        }, {
+            # not enough space on /var/lib ...
+            'mount': '/var/lib',
+            'size_available': 2 * 10**9,
+            'size_total': 21 * 10**9,
+        }, {
+            # ... but enough on /var/lib/etcd
+            'mount': '/var/lib/etcd',
+            'size_available': 36 * 10**9,
+            'size_total': 40 * 10**9
+        }],
+    )
+])
+def test_succeeds_with_recommended_disk_space(size_limit, ansible_mounts):
+    task_vars = dict(
+        etcd_disk_size_limit_percent=size_limit,
+        ansible_mounts=ansible_mounts,
+    )
+
+    if task_vars["etcd_disk_size_limit_percent"] is None:
+        task_vars.pop("etcd_disk_size_limit_percent")
+
+    check = EtcdVolume(execute_module=fake_execute_module)
+    result = check.run(tmp=None, task_vars=task_vars)
+
+    assert not result.get('failed', False)
+
+
+@pytest.mark.parametrize('size_limit_percent,ansible_mounts,extra_words', [
+    (
+        # if no size limit is specified, expect max usage
+        # limit to default to 90% of size_total
+        None,
+        [{
+            'mount': '/',
+            'size_available': 1 * 10**9,
+            'size_total': 100 * 10**9,
+        }],
+        ['90.00 GB'],
+    ),
+    (
+        0.7,
+        [{
+            'mount': '/',
+            'size_available': 1 * 10**6,
+            'size_total': 5 * 10**9,
+        }],
+        ['3.50 GB'],
+    ),
+    (
+        0.4,
+        [{
+            'mount': '/',
+            'size_available': 2 * 10**9,
+            'size_total': 6 * 10**9,
+        }],
+        ['2.40 GB'],
+    ),
+    (
+        None,
+        [{
+            # enough space on /var ...
+            'mount': '/var',
+            'size_available': 20 * 10**9,
+            'size_total': 20 * 10**9,
+        }, {
+            # .. but not enough on /var/lib
+            'mount': '/var/lib',
+            'size_available': 1 * 10**9,
+            'size_total': 20 * 10**9,
+        }],
+        ['18.00 GB'],
+    ),
+])
+def test_fails_with_insufficient_disk_space(size_limit_percent, ansible_mounts, extra_words):
+    task_vars = dict(
+        etcd_disk_size_limit_percent=size_limit_percent,
+        ansible_mounts=ansible_mounts,
+    )
+
+    if task_vars["etcd_disk_size_limit_percent"] is None:
+        task_vars.pop("etcd_disk_size_limit_percent")
+
+    check = EtcdVolume(execute_module=fake_execute_module)
+    result = check.run(tmp=None, task_vars=task_vars)
+
+    assert result['failed']
+    for word in extra_words:
+        assert word in result['msg']
+
+
+def fake_execute_module(*args):
+    raise AssertionError('this function should not be called')