Browse Source

Add support for kuryr-controller and kuryr-cni health checks

This commits modifies the previous support for kuryr-controller
pools readiness check with new functionlity added in kuryr that
performs both readiness checks as well as liveness checks for
the kuryr controller as well as for the kuryr cni (daemonized).

It does not just include the pool checking, but also the watches and
handlers, as well as kuryr-controller connectivity with neutron,
keystone and kubernetes API. For the CNI it checks the presence
of NET_ADMIN capabilities, IPDB in working order, connection to
Kubernetes API, quantity of CNI add failures, health of CNI components
and existence of memory leaks.
Luis Tomas Bolivar 7 years ago
parent
commit
10d9daf4d9

+ 3 - 0
roles/kuryr/defaults/main.yaml

@@ -14,6 +14,9 @@ kuryr_openstack_project_domain_name: default
 # Kuryr OpenShift namespace
 kuryr_namespace: openshift-infra
 
+# Kuryr health check server port
+kuryr_healthcheck_port: 8082
+
 # Whether to run the cni plugin in debug mode
 kuryr_cni_debug: "false"
 

+ 12 - 0
roles/kuryr/templates/cni-daemonset.yaml.j2

@@ -49,6 +49,18 @@ spec:
           mountPath: /host_proc
         - name: openvswitch
           mountPath: /var/run/openvswitch
+        readinessProbe:
+          httpGet:
+            path: /ready
+            port: {{ kuryr_healthcheck_port }}
+            scheme: HTTP
+          initialDelaySeconds: 15
+          timeoutSeconds: 5
+        livenessProbe:
+          httpGet:
+            path: /alive
+            port: {{ kuryr_healthcheck_port }}
+          initialDelaySeconds: 15
       volumes:
         - name: bin
           hostPath:

+ 9 - 0
roles/kuryr/templates/configmap.yaml.j2

@@ -332,6 +332,9 @@ data:
     # Pod VIF drivers vs Pool Drivers mapping allowed
     pools_vif_drivers = nested:nested-vlan,neutron:neutron-vif
 
+    [health_server]
+    port = {{ kuryr_healthcheck_port }}
+
   kuryr-cni.conf: |+
     [DEFAULT]
 
@@ -656,3 +659,9 @@ data:
     # From kuryr_kubernetes
     #
     lock_path = {{ kuryr_openstack_lock_path | default('/var/kuryr-lock') }}
+
+    [cni_health_server]
+    port = {{ kuryr_healthcheck_port }}
+
+    # Maximum memory usage (MiB) before kuryr-daemon is marked as unhealthy
+    max_memory_usage = -1

+ 10 - 6
roles/kuryr/templates/controller-deployment.yaml.j2

@@ -22,13 +22,17 @@ spec:
       - image: {{ openshift_openstack_kuryr_controller_image }}
         imagePullPolicy: IfNotPresent
         name: controller
-{% if kuryr_openstack_enable_pools | default(false) %}
         readinessProbe:
-          exec:
-            command:
-            - cat
-            - /tmp/pools_loaded
-{% endif %}
+          httpGet:
+            path: /ready
+            port: {{ kuryr_healthcheck_port }}
+            scheme: HTTP
+          timeoutSeconds: 5
+        livenessProbe:
+          httpGet:
+            path: /alive
+            port: {{ kuryr_healthcheck_port }}
+          initialDelaySeconds: 15
         terminationMessagePath: "/dev/termination-log"
         # FIXME(dulek): This shouldn't be required, but without it selinux is
         #               complaining about access to kuryr.conf.