|
@@ -1,10 +1,5 @@
|
|
|
rule_files:
|
|
|
- - 'prometheus.rules'
|
|
|
-{% if openshift_prometheus_additional_rules_file is defined and openshift_prometheus_additional_rules_file is not none %}
|
|
|
- - 'prometheus.additional.rules'
|
|
|
-{% endif %}
|
|
|
-
|
|
|
-
|
|
|
+ - '*.rules'
|
|
|
|
|
|
# A scrape configuration for running Prometheus on a Kubernetes cluster.
|
|
|
# This uses separate scrape configs for cluster components (i.e. API server, node)
|
|
@@ -39,31 +34,11 @@ scrape_configs:
|
|
|
action: keep
|
|
|
regex: default;kubernetes;https
|
|
|
|
|
|
-# Scrape config for nodes.
|
|
|
-#
|
|
|
-# Each node exposes a /metrics endpoint that contains operational metrics for
|
|
|
-# the Kubelet and other components.
|
|
|
-- job_name: 'kubernetes-nodes'
|
|
|
-
|
|
|
- scheme: https
|
|
|
- tls_config:
|
|
|
- ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
|
- bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
|
-
|
|
|
- kubernetes_sd_configs:
|
|
|
- - role: node
|
|
|
-
|
|
|
- relabel_configs:
|
|
|
- - action: labelmap
|
|
|
- regex: __meta_kubernetes_node_label_(.+)
|
|
|
-
|
|
|
# Scrape config for controllers.
|
|
|
#
|
|
|
# Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for
|
|
|
# the controllers.
|
|
|
#
|
|
|
-# TODO: move this to a pure endpoints based metrics gatherer when controllers are exposed via
|
|
|
-# endpoints.
|
|
|
- job_name: 'kubernetes-controllers'
|
|
|
|
|
|
scheme: https
|
|
@@ -87,6 +62,27 @@ scrape_configs:
|
|
|
regex: (.+)(?::\d+)
|
|
|
replacement: $1:8444
|
|
|
|
|
|
+# Scrape config for nodes.
|
|
|
+#
|
|
|
+# Each node exposes a /metrics endpoint that contains operational metrics for
|
|
|
+# the Kubelet and other components.
|
|
|
+- job_name: 'kubernetes-nodes'
|
|
|
+ scheme: https
|
|
|
+ tls_config:
|
|
|
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
|
+ bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
|
+ kubernetes_sd_configs:
|
|
|
+ - role: node
|
|
|
+ # Drop a very high cardinality metric that is incorrect in 3.7. It will be
|
|
|
+ # fixed in 3.9.
|
|
|
+ metric_relabel_configs:
|
|
|
+ - source_labels: [__name__]
|
|
|
+ action: drop
|
|
|
+ regex: 'openshift_sdn_pod_(setup|teardown)_latency(.*)'
|
|
|
+ relabel_configs:
|
|
|
+ - action: labelmap
|
|
|
+ regex: __meta_kubernetes_node_label_(.+)
|
|
|
+
|
|
|
# Scrape config for cAdvisor.
|
|
|
#
|
|
|
# Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that
|
|
@@ -107,6 +103,14 @@ scrape_configs:
|
|
|
kubernetes_sd_configs:
|
|
|
- role: node
|
|
|
|
|
|
+ # Exclude a set of high cardinality metrics that can contribute to significant
|
|
|
+ # memory use in large clusters. These can be selectively enabled as necessary
|
|
|
+ # for medium or small clusters.
|
|
|
+ metric_relabel_configs:
|
|
|
+ - source_labels: [__name__]
|
|
|
+ action: drop
|
|
|
+ regex: 'container_(cpu_user_seconds_total|cpu_cfs_periods_total|memory_usage_bytes|memory_swap|memory_working_set_bytes|memory_cache|last_seen|fs_(read_seconds_total|write_seconds_total|sector_(.*)|io_(.*)|reads_merged_total|writes_merged_total)|tasks_state|memory_failcnt|memory_failures_total|spec_memory_swap_limit_bytes|fs_(.*)_bytes_total|spec_(.*))'
|
|
|
+
|
|
|
relabel_configs:
|
|
|
- action: labelmap
|
|
|
regex: __meta_kubernetes_node_label_(.+)
|
|
@@ -133,38 +137,101 @@ scrape_configs:
|
|
|
- role: endpoints
|
|
|
|
|
|
relabel_configs:
|
|
|
- - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
|
|
|
- action: keep
|
|
|
- regex: true
|
|
|
- - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
|
|
|
- action: replace
|
|
|
- target_label: __scheme__
|
|
|
- regex: (https?)
|
|
|
- - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
|
|
|
+ # only scrape infrastructure components
|
|
|
+ - source_labels: [__meta_kubernetes_namespace]
|
|
|
+ action: keep
|
|
|
+ regex: 'default|logging|metrics|kube-.+|openshift|openshift-.+'
|
|
|
+ # drop infrastructure components managed by other scrape targets
|
|
|
+ - source_labels: [__meta_kubernetes_service_name]
|
|
|
+ action: drop
|
|
|
+ regex: 'prometheus-node-exporter'
|
|
|
+ # only those that have requested scraping
|
|
|
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
|
|
|
+ action: keep
|
|
|
+ regex: true
|
|
|
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
|
|
|
+ action: replace
|
|
|
+ target_label: __scheme__
|
|
|
+ regex: (https?)
|
|
|
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
|
|
|
+ action: replace
|
|
|
+ target_label: __metrics_path__
|
|
|
+ regex: (.+)
|
|
|
+ - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
|
|
|
+ action: replace
|
|
|
+ target_label: __address__
|
|
|
+ regex: (.+)(?::\d+);(\d+)
|
|
|
+ replacement: $1:$2
|
|
|
+ - action: labelmap
|
|
|
+ regex: __meta_kubernetes_service_label_(.+)
|
|
|
+ - source_labels: [__meta_kubernetes_namespace]
|
|
|
+ action: replace
|
|
|
+ target_label: kubernetes_namespace
|
|
|
+ - source_labels: [__meta_kubernetes_service_name]
|
|
|
+ action: replace
|
|
|
+ target_label: kubernetes_name
|
|
|
+
|
|
|
+# Scrape config for node-exporter, which is expected to be running on port 9100.
|
|
|
+- job_name: 'kubernetes-nodes-exporter'
|
|
|
+
|
|
|
+ tls_config:
|
|
|
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
|
+
|
|
|
+ kubernetes_sd_configs:
|
|
|
+ - role: node
|
|
|
+
|
|
|
+ metric_relabel_configs:
|
|
|
+ - source_labels: [__name__]
|
|
|
+ action: drop
|
|
|
+ regex: 'node_cpu|node_(disk|scrape_collector)_.+'
|
|
|
+ # preserve a subset of the network, netstat, vmstat, and filesystem series
|
|
|
+ - source_labels: [__name__]
|
|
|
action: replace
|
|
|
- target_label: __metrics_path__
|
|
|
- regex: (.+)
|
|
|
- - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
|
|
|
+ regex: '(node_(netstat_Ip_.+|vmstat_(nr|thp)_.+|filesystem_(free|size|device_error)|network_(transmit|receive)_(drop|errs)))'
|
|
|
+ target_label: __name__
|
|
|
+ replacement: renamed_$1
|
|
|
+ - source_labels: [__name__]
|
|
|
+ action: drop
|
|
|
+ regex: 'node_(netstat|vmstat|filesystem|network)_.+'
|
|
|
+ - source_labels: [__name__]
|
|
|
action: replace
|
|
|
+ regex: 'renamed_(.+)'
|
|
|
+ target_label: __name__
|
|
|
+ replacement: $1
|
|
|
+ # drop any partial expensive series
|
|
|
+ - source_labels: [__name__, device]
|
|
|
+ action: drop
|
|
|
+ regex: 'node_network_.+;veth.+'
|
|
|
+ - source_labels: [__name__, mountpoint]
|
|
|
+ action: drop
|
|
|
+ regex: 'node_filesystem_(free|size|device_error);([^/].*|/.+)'
|
|
|
+
|
|
|
+ relabel_configs:
|
|
|
+ - source_labels: [__address__]
|
|
|
+ regex: '(.*):10250'
|
|
|
+ replacement: '${1}:9100'
|
|
|
target_label: __address__
|
|
|
- regex: (.+)(?::\d+);(\d+)
|
|
|
- replacement: $1:$2
|
|
|
- - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_username]
|
|
|
- action: replace
|
|
|
- target_label: __basic_auth_username__
|
|
|
- regex: (.+)
|
|
|
- - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_password]
|
|
|
- action: replace
|
|
|
- target_label: __basic_auth_password__
|
|
|
- regex: (.+)
|
|
|
+ - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
|
|
|
+ target_label: __instance__
|
|
|
- action: labelmap
|
|
|
- regex: __meta_kubernetes_service_label_(.+)
|
|
|
- - source_labels: [__meta_kubernetes_namespace]
|
|
|
- action: replace
|
|
|
- target_label: kubernetes_namespace
|
|
|
- - source_labels: [__meta_kubernetes_service_name]
|
|
|
- action: replace
|
|
|
- target_label: kubernetes_name
|
|
|
+ regex: __meta_kubernetes_node_label_(.+)
|
|
|
+
|
|
|
+# Scrape config for the template service broker
|
|
|
+- job_name: 'openshift-template-service-broker'
|
|
|
+ scheme: https
|
|
|
+ tls_config:
|
|
|
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt
|
|
|
+ server_name: apiserver.openshift-template-service-broker.svc
|
|
|
+ bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
|
+
|
|
|
+ kubernetes_sd_configs:
|
|
|
+ - role: endpoints
|
|
|
+
|
|
|
+ relabel_configs:
|
|
|
+ - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
|
|
|
+ action: keep
|
|
|
+ regex: openshift-template-service-broker;apiserver;https
|
|
|
+
|
|
|
|
|
|
alerting:
|
|
|
alertmanagers:
|