浏览代码

openshift_checks: add retries in python

Luke Meyer 7 年之前
父节点
当前提交
a202f1647a

+ 17 - 0
roles/openshift_health_checker/openshift_checks/__init__.py

@@ -4,6 +4,7 @@ Health checks for OpenShift clusters.
 
 import operator
 import os
+import time
 
 from abc import ABCMeta, abstractmethod, abstractproperty
 from importlib import import_module
@@ -57,6 +58,9 @@ class OpenShiftCheck(object):
         self._execute_module = execute_module
         self.task_vars = task_vars or {}
         self.tmp = tmp
+        # mainly for testing purposes; see execute_module_with_retries
+        self._module_retries = 3
+        self._module_retry_interval = 5  # seconds
 
         # set to True when the check changes the host, for accurate total "changed" count
         self.changed = False
@@ -115,6 +119,19 @@ class OpenShiftCheck(object):
             )
         return self._execute_module(module_name, module_args, self.tmp, self.task_vars)
 
+    def execute_module_with_retries(self, module_name, module_args):
+        """Run execute_module and retry on failure."""
+        result = {}
+        tries = 0
+        while True:
+            res = self.execute_module(module_name, module_args)
+            if tries > self._module_retries or not res.get("failed"):
+                result.update(res)
+                return result
+            result["last_failed"] = res
+            tries += 1
+            time.sleep(self._module_retry_interval)
+
     def get_var(self, *keys, **kwargs):
         """Get deeply nested values from task_vars.
 

+ 2 - 4
roles/openshift_health_checker/openshift_checks/docker_image_availability.py

@@ -171,10 +171,8 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck):
             registries = [registry]
 
         for registry in registries:
-            args = {
-                "_raw_params": self.skopeo_img_check_command + " docker://{}/{}".format(registry, image)
-            }
-            result = self.execute_module("command", args)
+            args = {"_raw_params": self.skopeo_img_check_command + " docker://{}/{}".format(registry, image)}
+            result = self.execute_module_with_retries("command", args)
             if result.get("rc", 0) == 0 and not result.get("failed"):
                 return True
 

+ 1 - 1
roles/openshift_health_checker/openshift_checks/mixins.py

@@ -36,7 +36,7 @@ class DockerHostMixin(object):
 
         # NOTE: we would use the "package" module but it's actually an action plugin
         # and it's not clear how to invoke one of those. This is about the same anyway:
-        result = self.execute_module(
+        result = self.execute_module_with_retries(
             self.get_var("ansible_pkg_mgr", default="yum"),
             {"name": self.dependencies, "state": "present"},
         )

+ 1 - 1
roles/openshift_health_checker/openshift_checks/package_availability.py

@@ -26,7 +26,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck):
             packages.update(self.node_packages(rpm_prefix))
 
         args = {"packages": sorted(set(packages))}
-        return self.execute_module("check_yum_update", args)
+        return self.execute_module_with_retries("check_yum_update", args)
 
     @staticmethod
     def master_packages(rpm_prefix):

+ 1 - 1
roles/openshift_health_checker/openshift_checks/package_update.py

@@ -11,4 +11,4 @@ class PackageUpdate(NotContainerizedMixin, OpenShiftCheck):
 
     def run(self):
         args = {"packages": []}
-        return self.execute_module("check_yum_update", args)
+        return self.execute_module_with_retries("check_yum_update", args)

+ 1 - 1
roles/openshift_health_checker/openshift_checks/package_version.py

@@ -76,7 +76,7 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):
             ],
         }
 
-        return self.execute_module("aos_version", args)
+        return self.execute_module_with_retries("aos_version", args)
 
     def get_required_ovs_version(self):
         """Return the correct Open vSwitch version(s) for the current OpenShift version."""

+ 12 - 6
roles/openshift_health_checker/test/docker_image_availability_test.py

@@ -69,7 +69,7 @@ def test_all_images_available_remotely(available_locally):
             return {'images': [], 'failed': available_locally}
         return {'changed': False}
 
-    result = DockerImageAvailability(execute_module, task_vars=dict(
+    check = DockerImageAvailability(execute_module, task_vars=dict(
         openshift=dict(
             common=dict(
                 service_type='origin',
@@ -81,7 +81,9 @@ def test_all_images_available_remotely(available_locally):
         openshift_deployment_type='origin',
         openshift_image_tag='v3.4',
         group_names=['nodes', 'masters'],
-    )).run()
+    ))
+    check._module_retry_interval = 0
+    result = check.run()
 
     assert not result.get('failed', False)
 
@@ -97,7 +99,7 @@ def test_all_images_unavailable():
             'changed': False,
         }
 
-    actual = DockerImageAvailability(execute_module, task_vars=dict(
+    check = DockerImageAvailability(execute_module, task_vars=dict(
         openshift=dict(
             common=dict(
                 service_type='origin',
@@ -109,7 +111,9 @@ def test_all_images_unavailable():
         openshift_deployment_type="openshift-enterprise",
         openshift_image_tag='latest',
         group_names=['nodes', 'masters'],
-    )).run()
+    ))
+    check._module_retry_interval = 0
+    actual = check.run()
 
     assert actual['failed']
     assert "required Docker images are not available" in actual['msg']
@@ -136,7 +140,7 @@ def test_skopeo_update_failure(message, extra_words):
 
         return {'changed': False}
 
-    actual = DockerImageAvailability(execute_module, task_vars=dict(
+    check = DockerImageAvailability(execute_module, task_vars=dict(
         openshift=dict(
             common=dict(
                 service_type='origin',
@@ -148,7 +152,9 @@ def test_skopeo_update_failure(message, extra_words):
         openshift_deployment_type="openshift-enterprise",
         openshift_image_tag='',
         group_names=['nodes', 'masters'],
-    )).run()
+    ))
+    check._module_retry_interval = 0
+    actual = check.run()
 
     assert actual["failed"]
     for word in extra_words:

+ 2 - 2
roles/openshift_health_checker/test/package_availability_test.py

@@ -56,7 +56,7 @@ def test_package_availability(task_vars, must_have_packages, must_not_have_packa
         assert 'packages' in module_args
         assert set(module_args['packages']).issuperset(must_have_packages)
         assert not set(module_args['packages']).intersection(must_not_have_packages)
-        return return_value
+        return {'foo': return_value}
 
     result = PackageAvailability(execute_module, task_vars).run()
-    assert result is return_value
+    assert result['foo'] is return_value

+ 2 - 2
roles/openshift_health_checker/test/package_update_test.py

@@ -9,7 +9,7 @@ def test_package_update():
         assert 'packages' in module_args
         # empty list of packages means "generic check if 'yum update' will work"
         assert module_args['packages'] == []
-        return return_value
+        return {'foo': return_value}
 
     result = PackageUpdate(execute_module).run()
-    assert result is return_value
+    assert result['foo'] is return_value

+ 4 - 4
roles/openshift_health_checker/test/package_version_test.py

@@ -52,7 +52,7 @@ def test_invalid_openshift_release_format():
 ])
 def test_package_version(openshift_release):
 
-    return_value = object()
+    return_value = {"foo": object()}
 
     def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None, *_):
         assert module_name == 'aos_version'
@@ -66,7 +66,7 @@ def test_package_version(openshift_release):
 
     check = PackageVersion(execute_module, task_vars_for(openshift_release, 'origin'))
     result = check.run()
-    assert result is return_value
+    assert result == return_value
 
 
 @pytest.mark.parametrize('deployment_type,openshift_release,expected_docker_version', [
@@ -79,7 +79,7 @@ def test_package_version(openshift_release):
 ])
 def test_docker_package_version(deployment_type, openshift_release, expected_docker_version):
 
-    return_value = object()
+    return_value = {"foo": object()}
 
     def execute_module(module_name=None, module_args=None, *_):
         assert module_name == 'aos_version'
@@ -93,7 +93,7 @@ def test_docker_package_version(deployment_type, openshift_release, expected_doc
 
     check = PackageVersion(execute_module, task_vars_for(openshift_release, deployment_type))
     result = check.run()
-    assert result is return_value
+    assert result == return_value
 
 
 @pytest.mark.parametrize('group_names,is_containerized,is_active', [