prometheus.yml.j2 8.9 KB


  1. rule_files:
  2. - '*.rules'
  3. # A scrape configuration for running Prometheus on a Kubernetes cluster.
  4. # This uses separate scrape configs for cluster components (i.e. API server, node)
  5. # and services to allow each to use different authentication configs.
  6. #
  7. # Kubernetes labels will be added as Prometheus labels on metrics via the
  8. # `labelmap` relabeling action.
  9. # Scrape config for API servers.
  10. #
  11. # Kubernetes exposes API servers as endpoints to the default/kubernetes
  12. # service so this uses `endpoints` role and uses relabelling to only keep
  13. # the endpoints associated with the default/kubernetes service using the
  14. # default named port `https`. This works for single API server deployments as
  15. # well as HA API server deployments.
  16. scrape_configs:
  17. - job_name: 'kubernetes-apiservers'
  18. kubernetes_sd_configs:
  19. - role: endpoints
  20. namespaces:
  21. names:
  22. - default
  23. scheme: https
  24. tls_config:
  25. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  26. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  27. # Keep only the default/kubernetes service endpoints for the https port. This
  28. # will add targets for each API server which Kubernetes adds an endpoint to
  29. # the default/kubernetes service.
  30. relabel_configs:
  31. - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
  32. action: keep
  33. regex: kubernetes;https
  34. # Scrape config for controllers.
  35. #
  36. # Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for
  37. # the controllers.
  38. #
  39. - job_name: 'kubernetes-controllers'
  40. scheme: https
  41. tls_config:
  42. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  43. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  44. kubernetes_sd_configs:
  45. - role: endpoints
  46. namespaces:
  47. names:
  48. - default
  49. # Keep only the default/kubernetes service endpoints for the https port, and then
  50. # set the port to 8444. This is the default configuration for the controllers on OpenShift
  51. # masters.
  52. relabel_configs:
  53. - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
  54. action: keep
  55. regex: kubernetes;https
  56. - source_labels: [__address__]
  57. action: replace
  58. target_label: __address__
  59. regex: (.+)(?::\d+)
  60. replacement: $1:8444
  61. # Scrape config for nodes.
  62. #
  63. # Each node exposes a /metrics endpoint that contains operational metrics for
  64. # the Kubelet and other components.
  65. - job_name: 'kubernetes-nodes'
  66. scheme: https
  67. tls_config:
  68. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  69. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  70. kubernetes_sd_configs:
  71. - role: node
  72. # Drop a very high cardinality metric that is incorrect in 3.7. It will be
  73. # fixed in 3.9.
  74. metric_relabel_configs:
  75. - source_labels: [__name__]
  76. action: drop
  77. regex: 'openshift_sdn_pod_(setup|teardown)_latency(.*)'
  78. relabel_configs:
  79. - action: labelmap
  80. regex: __meta_kubernetes_node_label_(.+)
  81. # Scrape config for cAdvisor.
  82. #
  83. # Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that
  84. # reports container metrics for each running pod. Scrape those by default.
  85. - job_name: 'kubernetes-cadvisor'
  86. scheme: https
  87. tls_config:
  88. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  89. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  90. metrics_path: /metrics/cadvisor
  91. kubernetes_sd_configs:
  92. - role: node
  93. # Exclude a set of high cardinality metrics that can contribute to significant
  94. # memory use in large clusters. These can be selectively enabled as necessary
  95. # for medium or small clusters.
  96. metric_relabel_configs:
  97. - source_labels: [__name__]
  98. action: drop
  99. regex: 'container_(cpu_user_seconds_total|cpu_cfs_periods_total|memory_usage_bytes|memory_swap|memory_working_set_bytes|memory_cache|last_seen|fs_(read_seconds_total|write_seconds_total|sector_(.*)|io_(.*)|reads_merged_total|writes_merged_total)|tasks_state|memory_failcnt|memory_failures_total|spec_memory_swap_limit_bytes|fs_(.*)_bytes_total|spec_(.*))'
  100. relabel_configs:
  101. - action: labelmap
  102. regex: __meta_kubernetes_node_label_(.+)
  103. # Scrape config for service endpoints.
  104. #
  105. # The relabeling allows the actual service scrape endpoint to be configured
  106. # via the following annotations:
  107. #
  108. # * `prometheus.io/scrape`: Only scrape services that have a value of `true`
  109. # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
  110. # to set this to `https` & most likely set the `tls_config` of the scrape config.
  111. # * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
  112. # * `prometheus.io/port`: If the metrics are exposed on a different port to the
  113. # service then set this appropriately.
  114. - job_name: 'kubernetes-service-endpoints'
  115. tls_config:
  116. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  117. # TODO: this should be per target
  118. insecure_skip_verify: true
  119. kubernetes_sd_configs:
  120. - role: endpoints
  121. relabel_configs:
  122. # only scrape infrastructure components
  123. - source_labels: [__meta_kubernetes_namespace]
  124. action: keep
  125. regex: 'default|logging|metrics|kube-.+|openshift|openshift-.+'
  126. # drop infrastructure components managed by other scrape targets
  127. - source_labels: [__meta_kubernetes_service_name]
  128. action: drop
  129. regex: 'prometheus-node-exporter'
  130. # only those that have requested scraping
  131. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
  132. action: keep
  133. regex: true
  134. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
  135. action: replace
  136. target_label: __scheme__
  137. regex: (https?)
  138. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
  139. action: replace
  140. target_label: __metrics_path__
  141. regex: (.+)
  142. - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
  143. action: replace
  144. target_label: __address__
  145. regex: (.+)(?::\d+);(\d+)
  146. replacement: $1:$2
  147. - action: labelmap
  148. regex: __meta_kubernetes_service_label_(.+)
  149. - source_labels: [__meta_kubernetes_namespace]
  150. action: replace
  151. target_label: kubernetes_namespace
  152. - source_labels: [__meta_kubernetes_service_name]
  153. action: replace
  154. target_label: kubernetes_name
  155. # Scrape config for node-exporter, which is expected to be running on port 9100.
  156. - job_name: 'kubernetes-nodes-exporter'
  157. tls_config:
  158. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  159. kubernetes_sd_configs:
  160. - role: node
  161. metric_relabel_configs:
  162. - source_labels: [__name__]
  163. action: drop
  164. regex: 'node_cpu|node_(disk|scrape_collector)_.+'
  165. # preserve a subset of the network, netstat, vmstat, and filesystem series
  166. - source_labels: [__name__]
  167. action: replace
  168. regex: '(node_(netstat_Ip_.+|vmstat_(nr|thp)_.+|filesystem_(free|size|device_error)|network_(transmit|receive)_(drop|errs)))'
  169. target_label: __name__
  170. replacement: renamed_$1
  171. - source_labels: [__name__]
  172. action: drop
  173. regex: 'node_(netstat|vmstat|filesystem|network)_.+'
  174. - source_labels: [__name__]
  175. action: replace
  176. regex: 'renamed_(.+)'
  177. target_label: __name__
  178. replacement: $1
  179. # drop any partial expensive series
  180. - source_labels: [__name__, device]
  181. action: drop
  182. regex: 'node_network_.+;veth.+'
  183. - source_labels: [__name__, mountpoint]
  184. action: drop
  185. regex: 'node_filesystem_(free|size|device_error);([^/].*|/.+)'
  186. relabel_configs:
  187. - source_labels: [__address__]
  188. regex: '(.*):10250'
  189. replacement: '${1}:9100'
  190. target_label: __address__
  191. - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
  192. target_label: __instance__
  193. - action: labelmap
  194. regex: __meta_kubernetes_node_label_(.+)
  195. # Scrape config for the template service broker
  196. - job_name: 'openshift-template-service-broker'
  197. scheme: https
  198. tls_config:
  199. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt
  200. server_name: apiserver.openshift-template-service-broker.svc
  201. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  202. kubernetes_sd_configs:
  203. - role: endpoints
  204. namespaces:
  205. names:
  206. - openshift-template-service-broker
  207. relabel_configs:
  208. - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
  209. action: keep
  210. regex: apiserver;https
  211. # Scrape config for the router
  212. - job_name: 'openshift-router'
  213. scheme: https
  214. tls_config:
  215. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt
  216. server_name: router.default.svc
  217. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  218. kubernetes_sd_configs:
  219. - role: endpoints
  220. namespaces:
  221. names:
  222. - default
  223. relabel_configs:
  224. - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
  225. action: keep
  226. regex: router;1936-tcp
  227. alerting:
  228. alertmanagers:
  229. - scheme: http
  230. static_configs:
  231. - targets:
  232. - "localhost:9093"