|
@@ -68,6 +68,36 @@ g_template_openshift_master:
|
|
|
applications:
|
|
|
- Openshift Master
|
|
|
|
|
|
+ - key: openshift.master.pv.total.count
|
|
|
+ description: Total number of Persistent Volumes in the Openshift Cluster
|
|
|
+ type: int
|
|
|
+ applications:
|
|
|
+ - Openshift Master
|
|
|
+
|
|
|
+ - key: openshift.master.pv.available.count
|
|
|
+ description: Total number of Available Persistent Volumes in the Openshift Cluster
|
|
|
+ type: int
|
|
|
+ applications:
|
|
|
+ - Openshift Master
|
|
|
+
|
|
|
+ - key: openshift.master.pv.released.count
|
|
|
+ description: Total number of Released Persistent Volumes in the Openshift Cluster
|
|
|
+ type: int
|
|
|
+ applications:
|
|
|
+ - Openshift Master
|
|
|
+
|
|
|
+ - key: openshift.master.pv.bound.count
|
|
|
+ description: Total number of Bound Persistent Volumes in the Openshift Cluster
|
|
|
+ type: int
|
|
|
+ applications:
|
|
|
+ - Openshift Master
|
|
|
+
|
|
|
+ - key: openshift.master.pv.failed.count
|
|
|
+ description: Total number of Failed Persistent Volumes in the Openshift Cluster
|
|
|
+ type: int
|
|
|
+ applications:
|
|
|
+ - Openshift Master
|
|
|
+
|
|
|
- key: openshift.master.etcd.create.success
|
|
|
description: Show number of successful create actions
|
|
|
type: int
|
|
@@ -201,26 +231,6 @@ g_template_openshift_master:
|
|
|
- Openshift Master Metrics
|
|
|
|
|
|
ztriggers:
|
|
|
- - name: 'Application creation has failed on {HOST.NAME}'
|
|
|
- expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1'
|
|
|
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'
|
|
|
- priority: avg
|
|
|
-
|
|
|
- - name: 'Openshift Master API health check is failing on {HOST.NAME}'
|
|
|
- expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1'
|
|
|
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
|
|
|
- priority: high
|
|
|
-
|
|
|
- - name: 'Openshift Master API PING check is failing on {HOST.NAME}'
|
|
|
- expression: '{Template Openshift Master:openshift.master.api.ping.max(#3)}<1'
|
|
|
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
|
|
|
- priority: high
|
|
|
-
|
|
|
- - name: 'Openshift Master metric PING check is failing on {HOST.NAME}'
|
|
|
- expression: '{Template Openshift Master:openshift.master.metric.ping.max(#3)}<1'
|
|
|
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
|
|
|
- priority: avg
|
|
|
-
|
|
|
- name: 'Openshift Master process not running on {HOST.NAME}'
|
|
|
expression: '{Template Openshift Master:openshift.master.process.count.max(#3)}<1'
|
|
|
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
|
|
@@ -231,6 +241,16 @@ g_template_openshift_master:
|
|
|
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
|
|
|
priority: high
|
|
|
|
|
|
+ - name: 'Low number of etcd watchers on {HOST.NAME}'
|
|
|
+ expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10'
|
|
|
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc'
|
|
|
+ priority: avg
|
|
|
+
|
|
|
+ - name: 'Etcd ping failed on {HOST.NAME}'
|
|
|
+ expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0'
|
|
|
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc'
|
|
|
+ priority: high
|
|
|
+
|
|
|
- name: 'Number of users for Openshift Master on {HOST.NAME}'
|
|
|
expression: '{Template Openshift Master:openshift.master.user.count.last()}=0'
|
|
|
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
|
|
@@ -241,19 +261,40 @@ g_template_openshift_master:
|
|
|
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
|
|
|
priority: info
|
|
|
|
|
|
- - name: 'Low number of etcd watchers on {HOST.NAME}'
|
|
|
- expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10'
|
|
|
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc'
|
|
|
+ # Put triggers that depend on other triggers here (deps must be created first)
|
|
|
+ - name: 'Application creation has failed on {HOST.NAME}'
|
|
|
+ expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1'
|
|
|
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'
|
|
|
+ dependencies:
|
|
|
+ - 'Openshift Master process not running on {HOST.NAME}'
|
|
|
priority: avg
|
|
|
|
|
|
- - name: 'Etcd ping failed on {HOST.NAME}'
|
|
|
- expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0'
|
|
|
- url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc'
|
|
|
+ - name: 'Openshift Master API health check is failing on {HOST.NAME}'
|
|
|
+ expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1'
|
|
|
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
|
|
|
+ dependencies:
|
|
|
+ - 'Openshift Master process not running on {HOST.NAME}'
|
|
|
+ priority: high
|
|
|
+
|
|
|
+ - name: 'Openshift Master API PING check is failing on {HOST.NAME}'
|
|
|
+ expression: '{Template Openshift Master:openshift.master.api.ping.max(#3)}<1'
|
|
|
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
|
|
|
+ dependencies:
|
|
|
+ - 'Openshift Master process not running on {HOST.NAME}'
|
|
|
priority: high
|
|
|
|
|
|
+ - name: 'Openshift Master metric PING check is failing on {HOST.NAME}'
|
|
|
+ expression: '{Template Openshift Master:openshift.master.metric.ping.max(#3)}<1'
|
|
|
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
|
|
|
+ dependencies:
|
|
|
+ - 'Openshift Master process not running on {HOST.NAME}'
|
|
|
+ priority: avg
|
|
|
+
|
|
|
- name: 'Docker Registry check failed on {HOST.NAME}'
|
|
|
expression: '{Template Openshift Master:openshift.master.registry.healthz.max(#2)}<1'
|
|
|
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
|
|
|
+ dependencies:
|
|
|
+ - 'Openshift Master process not running on {HOST.NAME}'
|
|
|
priority: high
|
|
|
|
|
|
zgraphs:
|