Browse Source

Merge pull request #1361 from stenwt/sturpin-2016-02-09-registry-health

changed registry checks to alert based on number of registries with p…
Sten Turpin 9 years ago
parent
commit
346dc20c9f
1 changed files with 12 additions and 5 deletions
  1. 12 5
      roles/os_zabbix/vars/template_openshift_master.yml

+ 12 - 5
roles/os_zabbix/vars/template_openshift_master.yml

@@ -7,8 +7,8 @@ g_template_openshift_master:
     - Openshift Master
     key: openshift.master.app.create
 
-  - key: openshift.master.registry.healthz
-    description: "Shows the health status of the cluster's docker registry"
+  - key: openshift.master.registry.healthy_pct
+    description: "Shows the percentage of healthy registries in the cluster"
     type: int
     applications:
     - Openshift Master
@@ -333,9 +333,16 @@ g_template_openshift_master:
     - 'Openshift Master process not running on {HOST.NAME}'
     priority: avg
 
-  - name: 'Docker Registry check failed on {HOST.NAME}'
-    expression: '{Template Openshift Master:openshift.master.registry.healthz.max(#2)}<1'
-    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+  - name: 'One or more Docker Registries is unhealthy according to {HOST.NAME}'
+    expression: '{Template Openshift Master:openshift.master.registry.healthy_pct.last(#2)}<100 and {Template Openshift Master:openshift.master.registry.healthy_pct.max(#2)}>50'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_registry.asciidoc'
+    dependencies:
+    - 'Openshift Master process not running on {HOST.NAME}'
+    priority: avg
+
+  - name: 'Multiple Docker Registries are unhealthy according to {HOST.NAME}'
+    expression: '{Template Openshift Master:openshift.master.registry.healthy_pct.last(#2)}<51'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_registry.asciidoc'
     dependencies:
     - 'Openshift Master process not running on {HOST.NAME}'
     priority: high