prometheus.yml.j2 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. rule_files:
  2. - '*.rules'
  3. # A scrape configuration for running Prometheus on a Kubernetes cluster.
  4. # This uses separate scrape configs for cluster components (i.e. API server, node)
  5. # and services to allow each to use different authentication configs.
  6. #
  7. # Kubernetes labels will be added as Prometheus labels on metrics via the
  8. # `labelmap` relabeling action.
  9. # Scrape config for API servers.
  10. #
  11. # Kubernetes exposes API servers as endpoints to the default/kubernetes
  12. # service so this uses `endpoints` role and uses relabelling to only keep
  13. # the endpoints associated with the default/kubernetes service using the
  14. # default named port `https`. This works for single API server deployments as
  15. # well as HA API server deployments.
  16. scrape_configs:
  17. - job_name: 'kubernetes-apiservers'
  18. kubernetes_sd_configs:
  19. - role: endpoints
  20. scheme: https
  21. tls_config:
  22. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  23. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  24. # Keep only the default/kubernetes service endpoints for the https port. This
  25. # will add targets for each API server which Kubernetes adds an endpoint to
  26. # the default/kubernetes service.
  27. relabel_configs:
  28. - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
  29. action: keep
  30. regex: default;kubernetes;https
  31. # Scrape config for controllers.
  32. #
  33. # Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for
  34. # the controllers.
  35. #
  36. - job_name: 'kubernetes-controllers'
  37. scheme: https
  38. tls_config:
  39. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  40. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  41. kubernetes_sd_configs:
  42. - role: endpoints
  43. # Keep only the default/kubernetes service endpoints for the https port, and then
  44. # set the port to 8444. This is the default configuration for the controllers on OpenShift
  45. # masters.
  46. relabel_configs:
  47. - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
  48. action: keep
  49. regex: default;kubernetes;https
  50. - source_labels: [__address__]
  51. action: replace
  52. target_label: __address__
  53. regex: (.+)(?::\d+)
  54. replacement: $1:8444
  55. # Scrape config for nodes.
  56. #
  57. # Each node exposes a /metrics endpoint that contains operational metrics for
  58. # the Kubelet and other components.
  59. - job_name: 'kubernetes-nodes'
  60. scheme: https
  61. tls_config:
  62. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  63. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  64. kubernetes_sd_configs:
  65. - role: node
  66. # Drop a very high cardinality metric that is incorrect in 3.7. It will be
  67. # fixed in 3.9.
  68. metric_relabel_configs:
  69. - source_labels: [__name__]
  70. action: drop
  71. regex: 'openshift_sdn_pod_(setup|teardown)_latency(.*)'
  72. relabel_configs:
  73. - action: labelmap
  74. regex: __meta_kubernetes_node_label_(.+)
  75. # Scrape config for cAdvisor.
  76. #
  77. # Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that
  78. # reports container metrics for each running pod. Scrape those by default.
  79. - job_name: 'kubernetes-cadvisor'
  80. scheme: https
  81. tls_config:
  82. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  83. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  84. {% if kubernetes_version | float() >= 1.7 | float() %}
  85. metrics_path: /metrics/cadvisor
  86. {% else %}
  87. metrics_path: /metrics
  88. {% endif %}
  89. kubernetes_sd_configs:
  90. - role: node
  91. # Exclude a set of high cardinality metrics that can contribute to significant
  92. # memory use in large clusters. These can be selectively enabled as necessary
  93. # for medium or small clusters.
  94. metric_relabel_configs:
  95. - source_labels: [__name__]
  96. action: drop
  97. regex: 'container_(cpu_user_seconds_total|cpu_cfs_periods_total|memory_usage_bytes|memory_swap|memory_working_set_bytes|memory_cache|last_seen|fs_(read_seconds_total|write_seconds_total|sector_(.*)|io_(.*)|reads_merged_total|writes_merged_total)|tasks_state|memory_failcnt|memory_failures_total|spec_memory_swap_limit_bytes|fs_(.*)_bytes_total|spec_(.*))'
  98. relabel_configs:
  99. - action: labelmap
  100. regex: __meta_kubernetes_node_label_(.+)
  101. # Scrape config for service endpoints.
  102. #
  103. # The relabeling allows the actual service scrape endpoint to be configured
  104. # via the following annotations:
  105. #
  106. # * `prometheus.io/scrape`: Only scrape services that have a value of `true`
  107. # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
  108. # to set this to `https` & most likely set the `tls_config` of the scrape config.
  109. # * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
  110. # * `prometheus.io/port`: If the metrics are exposed on a different port to the
  111. # service then set this appropriately.
  112. - job_name: 'kubernetes-service-endpoints'
  113. tls_config:
  114. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  115. # TODO: this should be per target
  116. insecure_skip_verify: true
  117. kubernetes_sd_configs:
  118. - role: endpoints
  119. relabel_configs:
  120. # only scrape infrastructure components
  121. - source_labels: [__meta_kubernetes_namespace]
  122. action: keep
  123. regex: 'default|logging|metrics|kube-.+|openshift|openshift-.+'
  124. # drop infrastructure components managed by other scrape targets
  125. - source_labels: [__meta_kubernetes_service_name]
  126. action: drop
  127. regex: 'prometheus-node-exporter'
  128. # only those that have requested scraping
  129. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
  130. action: keep
  131. regex: true
  132. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
  133. action: replace
  134. target_label: __scheme__
  135. regex: (https?)
  136. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
  137. action: replace
  138. target_label: __metrics_path__
  139. regex: (.+)
  140. - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
  141. action: replace
  142. target_label: __address__
  143. regex: (.+)(?::\d+);(\d+)
  144. replacement: $1:$2
  145. - action: labelmap
  146. regex: __meta_kubernetes_service_label_(.+)
  147. - source_labels: [__meta_kubernetes_namespace]
  148. action: replace
  149. target_label: kubernetes_namespace
  150. - source_labels: [__meta_kubernetes_service_name]
  151. action: replace
  152. target_label: kubernetes_name
  153. # Scrape config for node-exporter, which is expected to be running on port 9100.
  154. - job_name: 'kubernetes-nodes-exporter'
  155. tls_config:
  156. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  157. kubernetes_sd_configs:
  158. - role: node
  159. metric_relabel_configs:
  160. - source_labels: [__name__]
  161. action: drop
  162. regex: 'node_cpu|node_(disk|scrape_collector)_.+'
  163. # preserve a subset of the network, netstat, vmstat, and filesystem series
  164. - source_labels: [__name__]
  165. action: replace
  166. regex: '(node_(netstat_Ip_.+|vmstat_(nr|thp)_.+|filesystem_(free|size|device_error)|network_(transmit|receive)_(drop|errs)))'
  167. target_label: __name__
  168. replacement: renamed_$1
  169. - source_labels: [__name__]
  170. action: drop
  171. regex: 'node_(netstat|vmstat|filesystem|network)_.+'
  172. - source_labels: [__name__]
  173. action: replace
  174. regex: 'renamed_(.+)'
  175. target_label: __name__
  176. replacement: $1
  177. # drop any partial expensive series
  178. - source_labels: [__name__, device]
  179. action: drop
  180. regex: 'node_network_.+;veth.+'
  181. - source_labels: [__name__, mountpoint]
  182. action: drop
  183. regex: 'node_filesystem_(free|size|device_error);([^/].*|/.+)'
  184. relabel_configs:
  185. - source_labels: [__address__]
  186. regex: '(.*):10250'
  187. replacement: '${1}:9100'
  188. target_label: __address__
  189. - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
  190. target_label: __instance__
  191. - action: labelmap
  192. regex: __meta_kubernetes_node_label_(.+)
  193. # Scrape config for the template service broker
  194. - job_name: 'openshift-template-service-broker'
  195. scheme: https
  196. tls_config:
  197. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt
  198. server_name: apiserver.openshift-template-service-broker.svc
  199. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  200. kubernetes_sd_configs:
  201. - role: endpoints
  202. relabel_configs:
  203. - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
  204. action: keep
  205. regex: openshift-template-service-broker;apiserver;https
  206. alerting:
  207. alertmanagers:
  208. - scheme: http
  209. static_configs:
  210. - targets:
  211. - "localhost:9093"