Bläddra i källkod

Zabbix: added trigger dependencies to certain master checks

Matt Woodson 9 år sedan
förälder
incheckning
8c1a1a8ad0
1 ändrade filer med 37 tillägg och 26 borttagningar
  1. 37 26
      roles/os_zabbix/vars/template_openshift_master.yml

+ 37 - 26
roles/os_zabbix/vars/template_openshift_master.yml

@@ -231,26 +231,6 @@ g_template_openshift_master:
     - Openshift Master Metrics
 
   ztriggers:
-  - name: 'Application creation has failed on {HOST.NAME}'
-    expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1'
-    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'
-    priority: avg
-
-  - name: 'Openshift Master API health check is failing on {HOST.NAME}'
-    expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1'
-    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
-    priority: high
-
-  - name: 'Openshift Master API PING check is failing on {HOST.NAME}'
-    expression: '{Template Openshift Master:openshift.master.api.ping.max(#3)}<1'
-    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
-    priority: high
-
-  - name: 'Openshift Master metric PING check is failing on {HOST.NAME}'
-    expression: '{Template Openshift Master:openshift.master.metric.ping.max(#3)}<1'
-    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
-    priority: avg
-
   - name: 'Openshift Master process not running on {HOST.NAME}'
     expression: '{Template Openshift Master:openshift.master.process.count.max(#3)}<1'
     url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
@@ -261,6 +241,16 @@ g_template_openshift_master:
     url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
     priority: high
 
+  - name: 'Low number of etcd watchers on {HOST.NAME}'
+    expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc'
+    priority: avg
+
+  - name: 'Etcd ping failed on {HOST.NAME}'
+    expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc'
+    priority: high
+
   - name: 'Number of users for Openshift Master on {HOST.NAME}'
     expression: '{Template Openshift Master:openshift.master.user.count.last()}=0'
     url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
@@ -271,19 +261,40 @@ g_template_openshift_master:
     url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
     priority: info
 
-  - name: 'Low number of etcd watchers on {HOST.NAME}'
-    expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10'
-    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc'
+  # Put triggers that depend on other triggers here (deps must be created first)
+  - name: 'Application creation has failed on {HOST.NAME}'
+    expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'
+    dependencies:
+    - 'Openshift Master process not running on {HOST.NAME}'
     priority: avg
 
-  - name: 'Etcd ping failed on {HOST.NAME}'
-    expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0'
-    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc'
+  - name: 'Openshift Master API health check is failing on {HOST.NAME}'
+    expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+    dependencies:
+    - 'Openshift Master process not running on {HOST.NAME}'
     priority: high
 
+  - name: 'Openshift Master API PING check is failing on {HOST.NAME}'
+    expression: '{Template Openshift Master:openshift.master.api.ping.max(#3)}<1'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+    dependencies:
+    - 'Openshift Master process not running on {HOST.NAME}'
+    priority: high
+
+  - name: 'Openshift Master metric PING check is failing on {HOST.NAME}'
+    expression: '{Template Openshift Master:openshift.master.metric.ping.max(#3)}<1'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+    dependencies:
+    - 'Openshift Master process not running on {HOST.NAME}'
+    priority: avg
+
   - name: 'Docker Registry check failed on {HOST.NAME}'
     expression: '{Template Openshift Master:openshift.master.registry.healthz.max(#2)}<1'
     url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+    dependencies:
+    - 'Openshift Master process not running on {HOST.NAME}'
     priority: high
 
   zgraphs: