Browse Source

Rebase Prometheus example for new scrape endpoints and expose alert manager

Mangirdas 7 years ago
parent
commit
3de29f6d5a

+ 8 - 0
playbooks/openshift-prometheus/private/uninstall.yml

@@ -0,0 +1,8 @@
+---
+- name: Uninstall Prometheus
+  hosts: masters[0]
+  tasks:
+  - name: Run the Prometheus Uninstall Role Tasks
+    include_role:
+      name: openshift_prometheus
+      tasks_from: uninstall

+ 2 - 0
playbooks/openshift-prometheus/uninstall.yml

@@ -0,0 +1,2 @@
+---
+- import_playbook: private/uninstall.yml

+ 15 - 0
roles/openshift_prometheus/defaults/main.yaml

@@ -7,9 +7,24 @@ openshift_prometheus_namespace: openshift-metrics
 # defaults hosts for routes
 openshift_prometheus_hostname: prometheus-{{openshift_prometheus_namespace}}.{{openshift_master_default_subdomain}}
 openshift_prometheus_alerts_hostname: alerts-{{openshift_prometheus_namespace}}.{{openshift_master_default_subdomain}}
+openshift_prometheus_alertmanager_hostname: alertmanager-{{openshift_prometheus_namespace}}.{{openshift_master_default_subdomain}}
+
 
 openshift_prometheus_node_selector: {"region":"infra"}
 
+openshift_prometheus_service_port: 443
+openshift_prometheus_service_targetport: 8443
+openshift_prometheus_service_name: prometheus
+openshift_prometheus_alerts_service_targetport: 9443
+openshift_prometheus_alerts_service_name: alerts
+openshift_prometheus_alertmanager_service_targetport: 10443
+openshift_prometheus_alertmanager_service_name: alertmanager
+openshift_prometheus_serviceaccount_annotations: []
+l_openshift_prometheus_serviceaccount_annotations:
+  - serviceaccounts.openshift.io/oauth-redirectreference.prom='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}'
+  - serviceaccounts.openshift.io/oauth-redirectreference.alerts='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}'
+  - serviceaccounts.openshift.io/oauth-redirectreference.alertmanager='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alertmanager"}}'
+
 # additional prometheus rules file
 openshift_prometheus_additional_rules_file: null
 

+ 10 - 0
roles/openshift_prometheus/tasks/facts.yaml

@@ -0,0 +1,10 @@
+---
+# The kubernetes version impacts the prometheus scraping endpoint
+# so gathering it before constructing the configmap
+- name: get oc version
+  oc_version:
+  register: oc_version
+
+- set_fact:
+    kubernetes_version: "{{ oc_version.results.kubernetes_short | float }}"
+    openshift_prometheus_serviceaccount_annotations: "{{ l_openshift_prometheus_serviceaccount_annotations + openshift_prometheus_serviceaccount_annotations|list }}"

+ 53 - 66
roles/openshift_prometheus/tasks/install_prometheus.yaml

@@ -1,4 +1,6 @@
 ---
+# set facts
+- include_tasks: facts.yaml
 
 # namespace
 - name: Add prometheus project
@@ -9,7 +11,7 @@
     description: Prometheus
 
 # secrets
-- name: Set alert and prometheus secrets
+- name: Set alert, alertmanager and prometheus secrets
   oc_secret:
     state: present
     name: "{{ item }}-proxy"
@@ -20,30 +22,24 @@
   with_items:
     - prometheus
     - alerts
+    - alertmanager
 
 # serviceaccount
 - name: create prometheus serviceaccount
   oc_serviceaccount:
     state: present
-    name: prometheus
+    name: "{{ openshift_prometheus_service_name }}"
     namespace: "{{ openshift_prometheus_namespace }}"
-    #    TODO add annotations when supproted
-    #    annotations:
-    #      serviceaccounts.openshift.io/oauth-redirectreference.prom: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}'
-    #      serviceaccounts.openshift.io/oauth-redirectreference.alerts: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}'
-
-    secrets:
-      - prometheus-secrets
   changed_when: no
 
+
 # TODO remove this when annotations are supported by oc_serviceaccount
 - name: annotate serviceaccount
   command: >
     {{ openshift_client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }}
-    serviceaccount prometheus
-    serviceaccounts.openshift.io/oauth-redirectreference.prom='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}'
-    serviceaccounts.openshift.io/oauth-redirectreference.alerts='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}'
-
+    serviceaccount {{ openshift_prometheus_service_name }} {{ item }}
+  with_items:
+    "{{ openshift_prometheus_serviceaccount_annotations }}"
 
 # create clusterrolebinding for prometheus serviceaccount
 - name: Set cluster-reader permissions for prometheus
@@ -52,63 +48,61 @@
     namespace: "{{ openshift_prometheus_namespace }}"
     resource_kind: cluster-role
     resource_name: cluster-reader
-    user: "system:serviceaccount:{{ openshift_prometheus_namespace }}:prometheus"
+    user: "system:serviceaccount:{{ openshift_prometheus_namespace }}:{{ openshift_prometheus_service_name }}"
+
 
-# create prometheus and alerts services
-# TODO join into 1 task with loop
-- name: Create prometheus service
+- name: create services for prometheus
   oc_service:
-    state: present
-    name: "{{ item.name }}"
+    name: "{{ openshift_prometheus_service_name }}"
     namespace: "{{ openshift_prometheus_namespace }}"
-    selector:
-      app: prometheus
     labels:
-      name: "{{ item.name }}"
-      #    TODO add annotations when supported
-      #    annotations:
-      #      service.alpha.openshift.io/serving-cert-secret-name: "{{item.name}}-tls"
+      name: prometheus
+    annotations:
+      oprometheus.io/scrape: 'true'
+      oprometheus.io/scheme: https
+      service.alpha.openshift.io/serving-cert-secret-name: prometheus-tls
     ports:
-      - port: 443
-        targetPort: 8443
-  with_items:
-    - name: prometheus
+      - name: prometheus
+        port: "{{ openshift_prometheus_service_port }}"
+        targetPort: "{{ openshift_prometheus_service_targetport }}"
+        protocol: TCP
+    selector:
+      app: prometheus
 
-- name: Create alerts service
+- name: create services for alert buffer
   oc_service:
-    state: present
-    name: "{{ item.name }}"
+    name: "{{ openshift_prometheus_alerts_service_name }}"
     namespace: "{{ openshift_prometheus_namespace }}"
+    labels:
+      name: prometheus
+    annotations:
+      service.alpha.openshift.io/serving-cert-secret-name: alerts-tls
+    ports:
+      - name: prometheus
+        port: "{{ openshift_prometheus_service_port }}"
+        targetPort: "{{ openshift_prometheus_alerts_service_targetport }}"
+        protocol: TCP
     selector:
       app: prometheus
+
+- name: create services for alertmanager
+  oc_service:
+    name: "{{ openshift_prometheus_alertmanager_service_name }}"
+    namespace: "{{ openshift_prometheus_namespace }}"
     labels:
-      name: "{{ item.name }}"
-      #    TODO add annotations when supported
-      #    annotations:
-      #      service.alpha.openshift.io/serving-cert-secret-name: "{{item.name}}-tls"
+      name: prometheus
+    annotations:
+      service.alpha.openshift.io/serving-cert-secret-name: alertmanager-tls
     ports:
-      - port: 443
-        targetPort: 9443
-  with_items:
-    - name: alerts
-
-
-# Annotate services with secret name
-# TODO remove this when annotations are supported by oc_service
-- name: annotate prometheus service
-  command: >
-    {{ openshift_client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }}
-    service prometheus
-    prometheus.io/scrape='true'
-    prometheus.io/scheme=https
-    service.alpha.openshift.io/serving-cert-secret-name=prometheus-tls
-
-- name: annotate alerts service
-  command: >
-    {{ openshift_client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }}
-    service alerts 'service.alpha.openshift.io/serving-cert-secret-name=prometheus-alerts-tls'
+      - name: prometheus
+        port: "{{ openshift_prometheus_service_port }}"
+        targetPort: "{{ openshift_prometheus_alertmanager_service_targetport }}"
+        protocol: TCP
+    selector:
+      app: prometheus
 
 # create prometheus and alerts routes
+# TODO: oc_route module should support insecureEdgeTerminationPolicy: Redirect
 - name: create prometheus and alerts routes
   oc_route:
     state: present
@@ -122,6 +116,8 @@
       host: "{{ openshift_prometheus_hostname }}"
     - name: alerts
       host: "{{ openshift_prometheus_alerts_hostname }}"
+    - name: alertmanager
+      host: "{{ openshift_prometheus_alertmanager_hostname }}"
 
 # Storage
 - name: create prometheus pvc
@@ -169,15 +165,6 @@
     path: "{{ tempdir }}/prometheus.additional.rules"
   register: additional_rules_stat
 
-# The kubernetes version impacts the prometheus scraping endpoint
-# so gathering it before constructing the configmap
-- name: get oc version
-  oc_version:
-  register: oc_version
-
-- set_fact:
-    kubernetes_version: "{{ oc_version.results.kubernetes_short | float }}"
-
 - template:
     src: prometheus.yml.j2
     dest: "{{ tempdir }}/prometheus.yml"
@@ -219,7 +206,7 @@
 - name: Set alertmanager configmap
   oc_configmap:
     state: present
-    name: "prometheus-alerts"
+    name: "alertmanager"
     namespace: "{{ openshift_prometheus_namespace }}"
     from_file:
       alertmanager.yml: "{{ tempdir }}/alertmanager.yml"

+ 3 - 1
roles/openshift_prometheus/tasks/main.yaml

@@ -16,9 +16,11 @@
 - name: Create templates subdirectory
   file:
     state: directory
-    path: "{{ tempdir }}/templates"
+    path: "{{ tempdir }}/{{ item }}"
     mode: 0755
   changed_when: False
+  with_items:
+    - templates
 
 - include_tasks: install_prometheus.yaml
   when: openshift_prometheus_state == 'present'

roles/openshift_prometheus/tasks/uninstall_prometheus.yaml → roles/openshift_prometheus/tasks/uninstall.yaml


+ 71 - 21
roles/openshift_prometheus/templates/prometheus.j2

@@ -19,7 +19,7 @@ spec:
       labels:
         app: prometheus
     spec:
-      serviceAccountName: prometheus
+      serviceAccountName: "{{ openshift_prometheus_service_name }}"
 {% if openshift_prometheus_node_selector is iterable and openshift_prometheus_node_selector | length > 0 %}
       nodeSelector:
 {% for key, value in openshift_prometheus_node_selector.items() %}
@@ -47,15 +47,15 @@ spec:
             cpu: "{{ openshift_prometheus_oauth_proxy_cpu_limit }}"
 {% endif %}
         ports:
-        - containerPort: 8443
+        - containerPort: {{ openshift_prometheus_service_targetport }}
           name: web
         args:
         - -provider=openshift
-        - -https-address=:8443
+        - -https-address=:{{ openshift_prometheus_service_targetport }}
         - -http-address=
         - -email-domain=*
         - -upstream=http://localhost:9090
-        - -client-id=system:serviceaccount:{{ namespace }}:prometheus
+        - -client-id=system:serviceaccount:{{ namespace }}:{{ openshift_prometheus_service_name }}
         - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}'
         - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}'
         - -tls-cert=/etc/tls/private/tls.crt
@@ -67,9 +67,9 @@ spec:
         - -skip-auth-regex=^/metrics
         volumeMounts:
         - mountPath: /etc/tls/private
-          name: prometheus-tls
+          name: prometheus-tls-secret
         - mountPath: /etc/proxy/secrets
-          name: prometheus-secrets
+          name: prometheus-proxy-secret
         - mountPath: /prometheus
           name: prometheus-data
 
@@ -104,7 +104,7 @@ spec:
         - mountPath: /prometheus
           name: prometheus-data
 
-      # Deploy alertmanager behind prometheus-alert-buffer behind an oauth proxy
+      # Deploy alert-buffer behind oauth alerts-proxy
       - name: alerts-proxy
         image: "{{ l_openshift_prometheus_proxy_image_prefix }}oauth-proxy:{{ l_openshift_prometheus_proxy_image_version }}"
         imagePullPolicy: IfNotPresent
@@ -124,15 +124,15 @@ spec:
             cpu: "{{ openshift_prometheus_oauth_proxy_cpu_limit }}"
 {% endif %}
         ports:
-        - containerPort: 9443
+        - containerPort: {{ openshift_prometheus_alerts_service_targetport }}
           name: web
         args:
         - -provider=openshift
-        - -https-address=:9443
+        - -https-address=:{{ openshift_prometheus_alerts_service_targetport }}
         - -http-address=
         - -email-domain=*
         - -upstream=http://localhost:9099
-        - -client-id=system:serviceaccount:{{ namespace }}:prometheus
+        - -client-id=system:serviceaccount:{{ namespace }}:{{ openshift_prometheus_service_name }}
         - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}'
         - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}'
         - -tls-cert=/etc/tls/private/tls.crt
@@ -143,9 +143,9 @@ spec:
         - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
         volumeMounts:
         - mountPath: /etc/tls/private
-          name: alerts-tls
+          name: alerts-tls-secret
         - mountPath: /etc/proxy/secrets
-          name: alerts-secrets
+          name: alerts-proxy-secret
 
       - name: alert-buffer
         args:
@@ -169,11 +169,54 @@ spec:
 {% endif %}
         volumeMounts:
         - mountPath: /alert-buffer
-          name: alert-buffer-data
+          name: alerts-data
         ports:
         - containerPort: 9099
           name: alert-buf
 
+      # Deploy alertmanager behind oauth alertmanager-proxy
+      - name: alertmanager-proxy
+        image: "{{ l_openshift_prometheus_proxy_image_prefix }}oauth-proxy:{{ l_openshift_prometheus_proxy_image_version }}"
+        imagePullPolicy: IfNotPresent
+        requests:
+{% if openshift_prometheus_oauth_proxy_memory_requests is defined and openshift_prometheus_oauth_proxy_memory_requests is not none %}
+          memory: "{{ openshift_prometheus_oauth_proxy_memory_requests }}"
+{% endif %}
+{% if openshift_prometheus_oauth_proxy_cpu_requests is defined and openshift_prometheus_oauth_proxy_cpu_requests is not none %}
+          cpu: "{{ openshift_prometheus_oauth_proxy_cpu_requests }}"
+{% endif %}
+        limits:
+{% if openshift_prometheus_oauth_proxy_memory_limit is defined and openshift_prometheus_oauth_proxy_memory_limit is not none %}
+          memory: "{{ openshift_prometheus_oauth_proxy_memory_limit }}"
+{% endif %}
+{% if openshift_prometheus_oauth_proxy_cpu_limit is defined and openshift_prometheus_oauth_proxy_cpu_limit is not none %}
+          cpu: "{{ openshift_prometheus_oauth_proxy_cpu_limit }}"
+{% endif %}
+        ports:
+        - containerPort: {{ openshift_prometheus_alertmanager_service_targetport }}
+          name: web
+        args:
+        - -provider=openshift
+        - -https-address=:{{ openshift_prometheus_alertmanager_service_targetport }}
+        - -http-address=
+        - -email-domain=*
+        - -upstream=http://localhost:9093
+        - -client-id=system:serviceaccount:{{ namespace }}:{{ openshift_prometheus_service_name }}
+        - -openshift-ca=/etc/pki/tls/cert.pem
+        - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+        - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}'
+        - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}'
+        - -tls-cert=/etc/tls/private/tls.crt
+        - -tls-key=/etc/tls/private/tls.key
+        - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token
+        - -cookie-secret-file=/etc/proxy/secrets/session_secret
+        - -skip-auth-regex=^/metrics
+        volumeMounts:
+        - mountPath: /etc/tls/private
+          name: alertmanager-tls-secret
+        - mountPath: /etc/proxy/secrets
+          name: alertmanager-proxy-secret
+
       - name: alertmanager
         args:
         - -config.file=/etc/alertmanager/alertmanager.yml
@@ -205,14 +248,15 @@ spec:
 
       restartPolicy: Always
       volumes:
+      
       - name: prometheus-config
         configMap:
           defaultMode: 420
           name: prometheus
-      - name: prometheus-secrets
+      - name: prometheus-proxy-secret
         secret:
           secretName: prometheus-proxy
-      - name: prometheus-tls
+      - name: prometheus-tls-secret
         secret:
           secretName: prometheus-tls
       - name: prometheus-data
@@ -225,13 +269,19 @@ spec:
       - name: alertmanager-config
         configMap:
           defaultMode: 420
-          name: prometheus-alerts
-      - name: alerts-secrets
+          name: alertmanager
+      - name: alertmanager-proxy-secret
         secret:
-          secretName: alerts-proxy
-      - name: alerts-tls
+          secretName: alertmanager-proxy  
+      - name: alertmanager-tls-secret
+        secret:
+          secretName: alertmanager-tls 
+      - name: alerts-tls-secret
         secret:
-          secretName: prometheus-alerts-tls
+          secretName: alerts-tls
+      - name: alerts-proxy-secret
+        secret:
+          secretName: alerts-proxy
       - name: alertmanager-data
 {% if openshift_prometheus_alertmanager_storage_type == 'pvc' %}
         persistentVolumeClaim:
@@ -239,7 +289,7 @@ spec:
 {% else %}
         emptydir: {}
 {% endif %}
-      - name: alert-buffer-data
+      - name: alerts-data
 {% if openshift_prometheus_alertbuffer_storage_type == 'pvc' %}
         persistentVolumeClaim:
           claimName: {{ openshift_prometheus_alertbuffer_pvc_name }}

+ 121 - 54
roles/openshift_prometheus/templates/prometheus.yml.j2

@@ -1,10 +1,5 @@
 rule_files:
-  - 'prometheus.rules'
-{% if openshift_prometheus_additional_rules_file is defined and openshift_prometheus_additional_rules_file is not none %}
-  - 'prometheus.additional.rules'
-{% endif %}
-
-
+  - '*.rules'
 
 # A scrape configuration for running Prometheus on a Kubernetes cluster.
 # This uses separate scrape configs for cluster components (i.e. API server, node)
@@ -39,31 +34,11 @@ scrape_configs:
     action: keep
     regex: default;kubernetes;https
 
-# Scrape config for nodes.
-#
-# Each node exposes a /metrics endpoint that contains operational metrics for
-# the Kubelet and other components.
-- job_name: 'kubernetes-nodes'
-
-  scheme: https
-  tls_config:
-    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
-  bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
-
-  kubernetes_sd_configs:
-  - role: node
-
-  relabel_configs:
-  - action: labelmap
-    regex: __meta_kubernetes_node_label_(.+)
-
 # Scrape config for controllers.
 #
 # Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for
 # the controllers.
 #
-# TODO: move this to a pure endpoints based metrics gatherer when controllers are exposed via
-#       endpoints.
 - job_name: 'kubernetes-controllers'
 
   scheme: https
@@ -87,6 +62,27 @@ scrape_configs:
     regex: (.+)(?::\d+)
     replacement: $1:8444
 
+# Scrape config for nodes.
+#
+# Each node exposes a /metrics endpoint that contains operational metrics for
+# the Kubelet and other components.
+- job_name: 'kubernetes-nodes'
+  scheme: https
+  tls_config:
+    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+  bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+  kubernetes_sd_configs:
+  - role: node
+  # Drop a very high cardinality metric that is incorrect in 3.7. It will be
+  # fixed in 3.9.
+  metric_relabel_configs:
+  - source_labels: [__name__]
+    action: drop
+    regex: 'openshift_sdn_pod_(setup|teardown)_latency(.*)'
+  relabel_configs:
+  - action: labelmap
+    regex: __meta_kubernetes_node_label_(.+)
+
 # Scrape config for cAdvisor.
 #
 # Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that
@@ -107,6 +103,14 @@ scrape_configs:
   kubernetes_sd_configs:
   - role: node
 
+  # Exclude a set of high cardinality metrics that can contribute to significant
+  # memory use in large clusters. These can be selectively enabled as necessary
+  # for medium or small clusters.
+  metric_relabel_configs:
+  - source_labels: [__name__]
+    action: drop
+    regex: 'container_(cpu_user_seconds_total|cpu_cfs_periods_total|memory_usage_bytes|memory_swap|memory_working_set_bytes|memory_cache|last_seen|fs_(read_seconds_total|write_seconds_total|sector_(.*)|io_(.*)|reads_merged_total|writes_merged_total)|tasks_state|memory_failcnt|memory_failures_total|spec_memory_swap_limit_bytes|fs_(.*)_bytes_total|spec_(.*))'
+
   relabel_configs:
   - action: labelmap
     regex: __meta_kubernetes_node_label_(.+)
@@ -133,38 +137,101 @@ scrape_configs:
   - role: endpoints
 
   relabel_configs:
-  - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
-    action: keep
-    regex: true
-  - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
-    action: replace
-    target_label: __scheme__
-    regex: (https?)
-  - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
+    # only scrape infrastructure components
+    - source_labels: [__meta_kubernetes_namespace]
+      action: keep
+      regex: 'default|logging|metrics|kube-.+|openshift|openshift-.+'
+    # drop infrastructure components managed by other scrape targets
+    - source_labels: [__meta_kubernetes_service_name]
+      action: drop
+      regex: 'prometheus-node-exporter'
+    # only those that have requested scraping
+    - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
+      action: keep
+      regex: true
+    - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
+      action: replace
+      target_label: __scheme__
+      regex: (https?)
+    - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
+      action: replace
+      target_label: __metrics_path__
+      regex: (.+)
+    - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
+      action: replace
+      target_label: __address__
+      regex: (.+)(?::\d+);(\d+)
+      replacement: $1:$2
+    - action: labelmap
+      regex: __meta_kubernetes_service_label_(.+)
+    - source_labels: [__meta_kubernetes_namespace]
+      action: replace
+      target_label: kubernetes_namespace
+    - source_labels: [__meta_kubernetes_service_name]
+      action: replace
+      target_label: kubernetes_name
+
+# Scrape config for node-exporter, which is expected to be running on port 9100.
+- job_name: 'kubernetes-nodes-exporter'
+
+  tls_config:
+    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+
+  kubernetes_sd_configs:
+  - role: node
+
+  metric_relabel_configs:
+  - source_labels: [__name__]
+    action: drop
+    regex: 'node_cpu|node_(disk|scrape_collector)_.+'
+  # preserve a subset of the network, netstat, vmstat, and filesystem series
+  - source_labels: [__name__]
     action: replace
-    target_label: __metrics_path__
-    regex: (.+)
-  - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
+    regex: '(node_(netstat_Ip_.+|vmstat_(nr|thp)_.+|filesystem_(free|size|device_error)|network_(transmit|receive)_(drop|errs)))'
+    target_label: __name__
+    replacement: renamed_$1
+  - source_labels: [__name__]
+    action: drop
+    regex: 'node_(netstat|vmstat|filesystem|network)_.+'
+  - source_labels: [__name__]
     action: replace
+    regex: 'renamed_(.+)'
+    target_label: __name__
+    replacement: $1
+  # drop any partial expensive series
+  - source_labels: [__name__, device]
+    action: drop
+    regex: 'node_network_.+;veth.+'
+  - source_labels: [__name__, mountpoint]
+    action: drop
+    regex: 'node_filesystem_(free|size|device_error);([^/].*|/.+)'
+
+  relabel_configs:
+  - source_labels: [__address__]
+    regex: '(.*):10250'
+    replacement: '${1}:9100'
     target_label: __address__
-    regex: (.+)(?::\d+);(\d+)
-    replacement: $1:$2
-  - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_username]
-    action: replace
-    target_label: __basic_auth_username__
-    regex: (.+)
-  - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_password]
-    action: replace
-    target_label: __basic_auth_password__
-    regex: (.+)
+  - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
+    target_label: __instance__
   - action: labelmap
-    regex: __meta_kubernetes_service_label_(.+)
-  - source_labels: [__meta_kubernetes_namespace]
-    action: replace
-    target_label: kubernetes_namespace
-  - source_labels: [__meta_kubernetes_service_name]
-    action: replace
-    target_label: kubernetes_name
+    regex: __meta_kubernetes_node_label_(.+)
+
+# Scrape config for the template service broker
+- job_name: 'openshift-template-service-broker'
+  scheme: https
+  tls_config:
+    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt
+    server_name: apiserver.openshift-template-service-broker.svc
+  bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+
+  kubernetes_sd_configs:
+  - role: endpoints
+
+  relabel_configs:
+  - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
+    action: keep
+    regex: openshift-template-service-broker;apiserver;https
+
 
 alerting:
   alertmanagers: