prometheus.yml.j2 11 KB


  1. rule_files:
  2. - '*.rules'
  3. # A scrape configuration for running Prometheus on a Kubernetes cluster.
  4. # This uses separate scrape configs for cluster components (i.e. API server, node)
  5. # and services to allow each to use different authentication configs.
  6. #
  7. # Kubernetes labels will be added as Prometheus labels on metrics via the
  8. # `labelmap` relabeling action.
  9. # Scrape config for API servers.
  10. #
  11. # Kubernetes exposes API servers as endpoints to the default/kubernetes
  12. # service so this uses `endpoints` role and uses relabelling to only keep
  13. # the endpoints associated with the default/kubernetes service using the
  14. # default named port `https`. This works for single API server deployments as
  15. # well as HA API server deployments.
  16. scrape_configs:
  17. - job_name: 'kubernetes-apiservers'
  18. kubernetes_sd_configs:
  19. - role: endpoints
  20. namespaces:
  21. names:
  22. - default
  23. scheme: https
  24. tls_config:
  25. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  26. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  27. # Keep only the default/kubernetes service endpoints for the https port. This
  28. # will add targets for each API server which Kubernetes adds an endpoint to
  29. # the default/kubernetes service.
  30. relabel_configs:
  31. - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
  32. action: keep
  33. regex: kubernetes;https
  34. # Scrape config for controllers.
  35. #
  36. # Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for
  37. # the controllers.
  38. #
  39. - job_name: 'kubernetes-controllers'
  40. scheme: https
  41. tls_config:
  42. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  43. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  44. kubernetes_sd_configs:
  45. - role: endpoints
  46. namespaces:
  47. names:
  48. - default
  49. # Keep only the default/kubernetes service endpoints for the https port, and then
  50. # set the port to 8444. This is the default configuration for the controllers on OpenShift
  51. # masters.
  52. relabel_configs:
  53. - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
  54. action: keep
  55. regex: kubernetes;https
  56. - source_labels: [__address__]
  57. action: replace
  58. target_label: __address__
  59. regex: (.+)(?::\d+)
  60. replacement: $1:8444
  61. # Scrape config for nodes.
  62. #
  63. # Each node exposes a /metrics endpoint that contains operational metrics for
  64. # the Kubelet and other components.
  65. - job_name: 'kubernetes-nodes'
  66. scheme: https
  67. tls_config:
  68. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  69. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  70. kubernetes_sd_configs:
  71. - role: node
  72. # Drop a very high cardinality metric that is incorrect in 3.7. It will be
  73. # fixed in 3.9.
  74. metric_relabel_configs:
  75. - source_labels: [__name__]
  76. action: drop
  77. regex: 'openshift_sdn_pod_(setup|teardown)_latency(.*)'
  78. relabel_configs:
  79. - action: labelmap
  80. regex: __meta_kubernetes_node_label_(.+)
  81. # Scrape config for cAdvisor.
  82. #
  83. # Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that
  84. # reports container metrics for each running pod. Scrape those by default.
  85. - job_name: 'kubernetes-cadvisor'
  86. scheme: https
  87. tls_config:
  88. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  89. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  90. metrics_path: /metrics/cadvisor
  91. kubernetes_sd_configs:
  92. - role: node
  93. # Exclude a set of high cardinality metrics that can contribute to significant
  94. # memory use in large clusters. These can be selectively enabled as necessary
  95. # for medium or small clusters.
  96. metric_relabel_configs:
  97. - source_labels: [__name__]
  98. action: drop
  99. regex: 'container_(cpu_user_seconds_total|cpu_cfs_periods_total|memory_usage_bytes|memory_swap|memory_working_set_bytes|memory_cache|last_seen|fs_(read_seconds_total|write_seconds_total|sector_(.*)|io_(.*)|reads_merged_total|writes_merged_total)|tasks_state|memory_failcnt|memory_failures_total|spec_memory_swap_limit_bytes|fs_(.*)_bytes_total|spec_(.*))'
  100. relabel_configs:
  101. - action: labelmap
  102. regex: __meta_kubernetes_node_label_(.+)
  103. # Scrape config for service endpoints.
  104. #
  105. # The relabeling allows the actual service scrape endpoint to be configured
  106. # via the following annotations:
  107. #
  108. # * `prometheus.io/scrape`: Only scrape services that have a value of `true`
  109. # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
  110. # to set this to `https` & most likely set the `tls_config` of the scrape config.
  111. # * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
  112. # * `prometheus.io/port`: If the metrics are exposed on a different port to the
  113. # service then set this appropriately.
  114. - job_name: 'kubernetes-service-endpoints'
  115. tls_config:
  116. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  117. # TODO: this should be per target
  118. insecure_skip_verify: true
  119. kubernetes_sd_configs:
  120. - role: endpoints
  121. relabel_configs:
  122. # only scrape infrastructure components
  123. - source_labels: [__meta_kubernetes_namespace]
  124. action: keep
  125. regex: 'default|metrics|kube-.+|openshift|openshift-.+'
  126. # drop logging components managed by other scrape targets
  127. - source_labels: [__meta_kubernetes_namespace]
  128. action: drop
  129. regex: '{{ openshift_logging_namespace | default('openshift-logging') }}'
  130. # drop infrastructure components managed by other scrape targets
  131. - source_labels: [__meta_kubernetes_service_name]
  132. action: drop
  133. regex: 'prometheus-node-exporter'
  134. # only those that have requested scraping
  135. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
  136. action: keep
  137. regex: true
  138. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
  139. action: replace
  140. target_label: __scheme__
  141. regex: (https?)
  142. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
  143. action: replace
  144. target_label: __metrics_path__
  145. regex: (.+)
  146. - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
  147. action: replace
  148. target_label: __address__
  149. regex: (.+)(?::\d+);(\d+)
  150. replacement: $1:$2
  151. - action: labelmap
  152. regex: __meta_kubernetes_service_label_(.+)
  153. - source_labels: [__meta_kubernetes_namespace]
  154. action: replace
  155. target_label: kubernetes_namespace
  156. - source_labels: [__meta_kubernetes_service_name]
  157. action: replace
  158. target_label: kubernetes_name
  159. # Scrape logging endpoints.
  160. #
  161. # The relabeling allows the actual service scrape endpoint to be configured
  162. # via the following annotations:
  163. #
  164. # * `prometheus.io/scrape`: Only scrape services that have a value of `true`
  165. # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
  166. # to set this to `https` & most likely set the `tls_config` of the scrape config.
  167. # * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
  168. # * `prometheus.io/port`: If the metrics are exposed on a different port to the
  169. # service then set this appropriately.
  170. - job_name: 'kubernetes-logging-service-endpoints'
  171. tls_config:
  172. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  173. insecure_skip_verify: true
  174. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  175. kubernetes_sd_configs:
  176. - role: endpoints
  177. namespaces:
  178. names:
  179. - '{{ openshift_logging_namespace | default('openshift-logging') }}'
  180. relabel_configs:
  181. # drop infrastructure components managed by other scrape targets
  182. - source_labels: [__meta_kubernetes_service_name]
  183. action: drop
  184. regex: 'prometheus-node-exporter'
  185. # only those that have requested scraping
  186. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
  187. action: keep
  188. regex: true
  189. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
  190. action: replace
  191. target_label: __scheme__
  192. regex: (https?)
  193. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
  194. action: replace
  195. target_label: __metrics_path__
  196. regex: (.+)
  197. - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
  198. action: replace
  199. target_label: __address__
  200. regex: (.+)(?::\d+);(\d+)
  201. replacement: $1:$2
  202. - action: labelmap
  203. regex: __meta_kubernetes_service_label_(.+)
  204. - source_labels: [__meta_kubernetes_namespace]
  205. action: replace
  206. target_label: kubernetes_namespace
  207. - source_labels: [__meta_kubernetes_service_name]
  208. action: replace
  209. target_label: kubernetes_name
  210. # Scrape config for node-exporter, which is expected to be running on port 9102.
  211. - job_name: 'kubernetes-nodes-exporter'
  212. tls_config:
  213. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  214. kubernetes_sd_configs:
  215. - role: node
  216. metric_relabel_configs:
  217. - source_labels: [__name__]
  218. action: drop
  219. regex: 'node_cpu|node_(disk|scrape_collector)_.+'
  220. # preserve a subset of the network, netstat, vmstat, and filesystem series
  221. - source_labels: [__name__]
  222. action: replace
  223. regex: '(node_(netstat_Ip_.+|vmstat_(nr|thp)_.+|filesystem_(free|size|device_error)|network_(transmit|receive)_(drop|errs)))'
  224. target_label: __name__
  225. replacement: renamed_$1
  226. - source_labels: [__name__]
  227. action: drop
  228. regex: 'node_(netstat|vmstat|filesystem|network)_.+'
  229. - source_labels: [__name__]
  230. action: replace
  231. regex: 'renamed_(.+)'
  232. target_label: __name__
  233. replacement: $1
  234. # drop any partial expensive series
  235. - source_labels: [__name__, device]
  236. action: drop
  237. regex: 'node_network_.+;veth.+'
  238. - source_labels: [__name__, mountpoint]
  239. action: drop
  240. regex: 'node_filesystem_(free|size|device_error);([^/].*|/.+)'
  241. relabel_configs:
  242. - source_labels: [__address__]
  243. regex: '(.*):10250'
  244. replacement: '${1}:9102'
  245. target_label: __address__
  246. - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
  247. target_label: __instance__
  248. - action: labelmap
  249. regex: __meta_kubernetes_node_label_(.+)
  250. # Scrape config for the template service broker
  251. - job_name: 'openshift-template-service-broker'
  252. scheme: https
  253. tls_config:
  254. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt
  255. server_name: apiserver.openshift-template-service-broker.svc
  256. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  257. kubernetes_sd_configs:
  258. - role: endpoints
  259. namespaces:
  260. names:
  261. - openshift-template-service-broker
  262. relabel_configs:
  263. - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
  264. action: keep
  265. regex: apiserver;https
  266. # Scrape config for the router
  267. - job_name: 'openshift-router'
  268. scheme: https
  269. tls_config:
  270. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt
  271. server_name: router.default.svc
  272. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  273. kubernetes_sd_configs:
  274. - role: endpoints
  275. namespaces:
  276. names:
  277. - default
  278. relabel_configs:
  279. - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
  280. action: keep
  281. regex: router;1936-tcp
  282. alerting:
  283. alertmanagers:
  284. - scheme: http
  285. static_configs:
  286. - targets:
  287. - "localhost:9093"