|
@@ -201,6 +201,18 @@ g_template_openshift_master:
|
|
|
applications:
|
|
|
- Openshift Master Metrics
|
|
|
|
|
|
+ - key: openshift.master.nodesnotready.count
|
|
|
+ description: "This check shows how many nodes in a cluster are in NotReady state."
|
|
|
+ type: int
|
|
|
+ applications:
|
|
|
+ - Openshift Master
|
|
|
+
|
|
|
+ - key: openshift.master.nodesnotschedulable.count
|
|
|
+ description: "This check shows how many nodes in a cluster are not schedulable."
|
|
|
+ type: int
|
|
|
+ applications:
|
|
|
+ - Openshift Master
|
|
|
+
|
|
|
- key: openshift.master.apiserver.latency.summary.pods.quantile.list.5
|
|
|
description: "Value from https://master/metrics. This is the time, in miliseconds, that 50% of the pod operations have taken to completed."
|
|
|
type: int
|
|
@@ -361,6 +373,20 @@ g_template_openshift_master:
|
|
|
- 'Openshift Master API health check is failing on {HOST.NAME}'
|
|
|
priority: high
|
|
|
|
|
|
+ - name: 'Hosts not ready according to {HOST.NAME}'
|
|
|
+ expression: '{Template Openshift Master:openshift.master.nodesnotready.count.last(#2)>0'
|
|
|
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_node.asciidoc'
|
|
|
+ dependencies:
|
|
|
+ - 'Openshift Master procecss not running on {HOST.NAME}'
|
|
|
+ priority: high
|
|
|
+
|
|
|
+ - name: 'Hosts not schedulable according to {HOST.NAME}'
|
|
|
+ expression: '{Template Openshift Master:openshift.master.nodesnotschedulable.count.last(#2)>0'
|
|
|
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_node.asciidoc'
|
|
|
+ dependencies:
|
|
|
+ - 'Openshift Master procecss not running on {HOST.NAME}'
|
|
|
+ priority: info
|
|
|
+
|
|
|
zgraphs:
|
|
|
- name: Openshift Master API Server Latency Pods LIST Quantiles
|
|
|
width: 900
|