Browse Source

Merge pull request #6811 from mjudeikis/prometheus-new-template

Automatic merge from submit-queue.

Prometheus new template rebase

Updating Prometheus for new templates/example.

1. New scraping rules, fixes
2. exposed alert manager
3. clean ansible
4. add a custom annotation for routes (in the example when AVI router in use we need to be able to add custom annotations)
5. Externalise some of the configs

Still work in progress...
FIY: @zgalor
OpenShift Merge Robot 7 years ago
parent
commit
2ec70a36f5

+ 8 - 0
playbooks/openshift-prometheus/private/uninstall.yml

@@ -0,0 +1,8 @@
+---
+- name: Uninstall Prometheus
+  hosts: masters[0]
+  tasks:
+  - name: Run the Prometheus Uninstall Role Tasks
+    include_role:
+      name: openshift_prometheus
+      tasks_from: uninstall

+ 2 - 0
playbooks/openshift-prometheus/uninstall.yml

@@ -0,0 +1,2 @@
+---
+- import_playbook: private/uninstall.yml

+ 15 - 0
roles/openshift_prometheus/defaults/main.yaml

@@ -7,9 +7,24 @@ openshift_prometheus_namespace: openshift-metrics
 # defaults hosts for routes
 openshift_prometheus_hostname: prometheus-{{openshift_prometheus_namespace}}.{{openshift_master_default_subdomain}}
 openshift_prometheus_alerts_hostname: alerts-{{openshift_prometheus_namespace}}.{{openshift_master_default_subdomain}}
+openshift_prometheus_alertmanager_hostname: alertmanager-{{openshift_prometheus_namespace}}.{{openshift_master_default_subdomain}}
+
 
 openshift_prometheus_node_selector: {"region":"infra"}
 
+openshift_prometheus_service_port: 443
+openshift_prometheus_service_targetport: 8443
+openshift_prometheus_service_name: prometheus
+openshift_prometheus_alerts_service_targetport: 9443
+openshift_prometheus_alerts_service_name: alerts
+openshift_prometheus_alertmanager_service_targetport: 10443
+openshift_prometheus_alertmanager_service_name: alertmanager
+openshift_prometheus_serviceaccount_annotations: []
+l_openshift_prometheus_serviceaccount_annotations:
+  - serviceaccounts.openshift.io/oauth-redirectreference.prom='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}'
+  - serviceaccounts.openshift.io/oauth-redirectreference.alerts='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}'
+  - serviceaccounts.openshift.io/oauth-redirectreference.alertmanager='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alertmanager"}}'
+
 # additional prometheus rules file
 openshift_prometheus_additional_rules_file: null
 

+ 10 - 0
roles/openshift_prometheus/tasks/facts.yaml

@@ -0,0 +1,10 @@
+---
+# The kubernetes version impacts the prometheus scraping endpoint
+# so gathering it before constructing the configmap
+- name: get oc version
+  oc_version:
+  register: oc_version
+
+- set_fact:
+    kubernetes_version: "{{ oc_version.results.kubernetes_short | float }}"
+    openshift_prometheus_serviceaccount_annotations: "{{ l_openshift_prometheus_serviceaccount_annotations + openshift_prometheus_serviceaccount_annotations|list }}"

+ 53 - 66
roles/openshift_prometheus/tasks/install_prometheus.yaml

@@ -1,4 +1,6 @@
 ---
+# set facts
+- include_tasks: facts.yaml
 
 # namespace
 - name: Add prometheus project
@@ -9,7 +11,7 @@
     description: Prometheus
 
 # secrets
-- name: Set alert and prometheus secrets
+- name: Set alert, alertmanager and prometheus secrets
   oc_secret:
     state: present
     name: "{{ item }}-proxy"
@@ -20,30 +22,24 @@
   with_items:
     - prometheus
     - alerts
+    - alertmanager
 
 # serviceaccount
 - name: create prometheus serviceaccount
   oc_serviceaccount:
     state: present
-    name: prometheus
+    name: "{{ openshift_prometheus_service_name }}"
     namespace: "{{ openshift_prometheus_namespace }}"
-    #    TODO add annotations when supproted
-    #    annotations:
-    #      serviceaccounts.openshift.io/oauth-redirectreference.prom: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}'
-    #      serviceaccounts.openshift.io/oauth-redirectreference.alerts: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}'
-
-    secrets:
-      - prometheus-secrets
   changed_when: no
 
+
 # TODO remove this when annotations are supported by oc_serviceaccount
 - name: annotate serviceaccount
   command: >
     {{ openshift_client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }}
-    serviceaccount prometheus
-    serviceaccounts.openshift.io/oauth-redirectreference.prom='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}'
-    serviceaccounts.openshift.io/oauth-redirectreference.alerts='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}'
-
+    serviceaccount {{ openshift_prometheus_service_name }} {{ item }}
+  with_items:
+    "{{ openshift_prometheus_serviceaccount_annotations }}"
 
 # create clusterrolebinding for prometheus serviceaccount
 - name: Set cluster-reader permissions for prometheus
@@ -52,63 +48,61 @@
     namespace: "{{ openshift_prometheus_namespace }}"
     resource_kind: cluster-role
     resource_name: cluster-reader
-    user: "system:serviceaccount:{{ openshift_prometheus_namespace }}:prometheus"
+    user: "system:serviceaccount:{{ openshift_prometheus_namespace }}:{{ openshift_prometheus_service_name }}"
+
 
-# create prometheus and alerts services
-# TODO join into 1 task with loop
-- name: Create prometheus service
+- name: create services for prometheus
   oc_service:
-    state: present
-    name: "{{ item.name }}"
+    name: "{{ openshift_prometheus_service_name }}"
     namespace: "{{ openshift_prometheus_namespace }}"
-    selector:
-      app: prometheus
     labels:
-      name: "{{ item.name }}"
-      #    TODO add annotations when supported
-      #    annotations:
-      #      service.alpha.openshift.io/serving-cert-secret-name: "{{item.name}}-tls"
+      name: prometheus
+    annotations:
+      oprometheus.io/scrape: 'true'
+      oprometheus.io/scheme: https
+      service.alpha.openshift.io/serving-cert-secret-name: prometheus-tls
     ports:
-      - port: 443
-        targetPort: 8443
-  with_items:
-    - name: prometheus
+      - name: prometheus
+        port: "{{ openshift_prometheus_service_port }}"
+        targetPort: "{{ openshift_prometheus_service_targetport }}"
+        protocol: TCP
+    selector:
+      app: prometheus
 
-- name: Create alerts service
+- name: create services for alert buffer
   oc_service:
-    state: present
-    name: "{{ item.name }}"
+    name: "{{ openshift_prometheus_alerts_service_name }}"
     namespace: "{{ openshift_prometheus_namespace }}"
+    labels:
+      name: prometheus
+    annotations:
+      service.alpha.openshift.io/serving-cert-secret-name: alerts-tls
+    ports:
+      - name: prometheus
+        port: "{{ openshift_prometheus_service_port }}"
+        targetPort: "{{ openshift_prometheus_alerts_service_targetport }}"
+        protocol: TCP
     selector:
       app: prometheus
+
+- name: create services for alertmanager
+  oc_service:
+    name: "{{ openshift_prometheus_alertmanager_service_name }}"
+    namespace: "{{ openshift_prometheus_namespace }}"
     labels:
-      name: "{{ item.name }}"
-      #    TODO add annotations when supported
-      #    annotations:
-      #      service.alpha.openshift.io/serving-cert-secret-name: "{{item.name}}-tls"
+      name: prometheus
+    annotations:
+      service.alpha.openshift.io/serving-cert-secret-name: alertmanager-tls
     ports:
-      - port: 443
-        targetPort: 9443
-  with_items:
-    - name: alerts
-
-
-# Annotate services with secret name
-# TODO remove this when annotations are supported by oc_service
-- name: annotate prometheus service
-  command: >
-    {{ openshift_client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }}
-    service prometheus
-    prometheus.io/scrape='true'
-    prometheus.io/scheme=https
-    service.alpha.openshift.io/serving-cert-secret-name=prometheus-tls
-
-- name: annotate alerts service
-  command: >
-    {{ openshift_client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }}
-    service alerts 'service.alpha.openshift.io/serving-cert-secret-name=prometheus-alerts-tls'
+      - name: prometheus
+        port: "{{ openshift_prometheus_service_port }}"
+        targetPort: "{{ openshift_prometheus_alertmanager_service_targetport }}"
+        protocol: TCP
+    selector:
+      app: prometheus
 
 # create prometheus and alerts routes
+# TODO: oc_route module should support insecureEdgeTerminationPolicy: Redirect
 - name: create prometheus and alerts routes
   oc_route:
     state: present
@@ -122,6 +116,8 @@
       host: "{{ openshift_prometheus_hostname }}"
     - name: alerts
       host: "{{ openshift_prometheus_alerts_hostname }}"
+    - name: alertmanager
+      host: "{{ openshift_prometheus_alertmanager_hostname }}"
 
 # Storage
 - name: create prometheus pvc
@@ -169,15 +165,6 @@
     path: "{{ tempdir }}/prometheus.additional.rules"
   register: additional_rules_stat
 
-# The kubernetes version impacts the prometheus scraping endpoint
-# so gathering it before constructing the configmap
-- name: get oc version
-  oc_version:
-  register: oc_version
-
-- set_fact:
-    kubernetes_version: "{{ oc_version.results.kubernetes_short | float }}"
-
 - template:
     src: prometheus.yml.j2
     dest: "{{ tempdir }}/prometheus.yml"
@@ -219,7 +206,7 @@
 - name: Set alertmanager configmap
   oc_configmap:
     state: present
-    name: "prometheus-alerts"
+    name: "alertmanager"
     namespace: "{{ openshift_prometheus_namespace }}"
     from_file:
       alertmanager.yml: "{{ tempdir }}/alertmanager.yml"

+ 3 - 1
roles/openshift_prometheus/tasks/main.yaml

@@ -16,9 +16,11 @@
 - name: Create templates subdirectory
   file:
     state: directory
-    path: "{{ tempdir }}/templates"
+    path: "{{ tempdir }}/{{ item }}"
     mode: 0755
   changed_when: False
+  with_items:
+    - templates
 
 - include_tasks: install_prometheus.yaml
   when: openshift_prometheus_state == 'present'

roles/openshift_prometheus/tasks/uninstall_prometheus.yaml → roles/openshift_prometheus/tasks/uninstall.yaml


+ 71 - 21
roles/openshift_prometheus/templates/prometheus.j2

@@ -19,7 +19,7 @@ spec:
       labels:
         app: prometheus
     spec:
-      serviceAccountName: prometheus
+      serviceAccountName: "{{ openshift_prometheus_service_name }}"
 {% if openshift_prometheus_node_selector is iterable and openshift_prometheus_node_selector | length > 0 %}
       nodeSelector:
 {% for key, value in openshift_prometheus_node_selector.items() %}
@@ -47,15 +47,15 @@ spec:
             cpu: "{{ openshift_prometheus_oauth_proxy_cpu_limit }}"
 {% endif %}
         ports:
-        - containerPort: 8443
+        - containerPort: {{ openshift_prometheus_service_targetport }}
           name: web
         args:
         - -provider=openshift
-        - -https-address=:8443
+        - -https-address=:{{ openshift_prometheus_service_targetport }}
         - -http-address=
         - -email-domain=*
         - -upstream=http://localhost:9090
-        - -client-id=system:serviceaccount:{{ namespace }}:prometheus
+        - -client-id=system:serviceaccount:{{ namespace }}:{{ openshift_prometheus_service_name }}
         - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}'
         - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}'
         - -tls-cert=/etc/tls/private/tls.crt
@@ -67,9 +67,9 @@ spec:
         - -skip-auth-regex=^/metrics
         volumeMounts:
         - mountPath: /etc/tls/private
-          name: prometheus-tls
+          name: prometheus-tls-secret
         - mountPath: /etc/proxy/secrets
-          name: prometheus-secrets
+          name: prometheus-proxy-secret
         - mountPath: /prometheus
           name: prometheus-data
 
@@ -104,7 +104,7 @@ spec:
         - mountPath: /prometheus
           name: prometheus-data
 
-      # Deploy alertmanager behind prometheus-alert-buffer behind an oauth proxy
+      # Deploy alert-buffer behind oauth alerts-proxy
       - name: alerts-proxy
         image: "{{ l_openshift_prometheus_proxy_image_prefix }}oauth-proxy:{{ l_openshift_prometheus_proxy_image_version }}"
         imagePullPolicy: IfNotPresent
@@ -124,15 +124,15 @@ spec:
             cpu: "{{ openshift_prometheus_oauth_proxy_cpu_limit }}"
 {% endif %}
         ports:
-        - containerPort: 9443
+        - containerPort: {{ openshift_prometheus_alerts_service_targetport }}
           name: web
         args:
         - -provider=openshift
-        - -https-address=:9443
+        - -https-address=:{{ openshift_prometheus_alerts_service_targetport }}
         - -http-address=
         - -email-domain=*
         - -upstream=http://localhost:9099
-        - -client-id=system:serviceaccount:{{ namespace }}:prometheus
+        - -client-id=system:serviceaccount:{{ namespace }}:{{ openshift_prometheus_service_name }}
         - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}'
         - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}'
         - -tls-cert=/etc/tls/private/tls.crt
@@ -143,9 +143,9 @@ spec:
         - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
         volumeMounts:
         - mountPath: /etc/tls/private
-          name: alerts-tls
+          name: alerts-tls-secret
         - mountPath: /etc/proxy/secrets
-          name: alerts-secrets
+          name: alerts-proxy-secret
 
       - name: alert-buffer
         args:
@@ -169,11 +169,54 @@ spec:
 {% endif %}
         volumeMounts:
         - mountPath: /alert-buffer
-          name: alert-buffer-data
+          name: alerts-data
         ports:
         - containerPort: 9099
           name: alert-buf
 
+      # Deploy alertmanager behind oauth alertmanager-proxy
+      - name: alertmanager-proxy
+        image: "{{ l_openshift_prometheus_proxy_image_prefix }}oauth-proxy:{{ l_openshift_prometheus_proxy_image_version }}"
+        imagePullPolicy: IfNotPresent
+        requests:
+{% if openshift_prometheus_oauth_proxy_memory_requests is defined and openshift_prometheus_oauth_proxy_memory_requests is not none %}
+          memory: "{{ openshift_prometheus_oauth_proxy_memory_requests }}"
+{% endif %}
+{% if openshift_prometheus_oauth_proxy_cpu_requests is defined and openshift_prometheus_oauth_proxy_cpu_requests is not none %}
+          cpu: "{{ openshift_prometheus_oauth_proxy_cpu_requests }}"
+{% endif %}
+        limits:
+{% if openshift_prometheus_oauth_proxy_memory_limit is defined and openshift_prometheus_oauth_proxy_memory_limit is not none %}
+          memory: "{{ openshift_prometheus_oauth_proxy_memory_limit }}"
+{% endif %}
+{% if openshift_prometheus_oauth_proxy_cpu_limit is defined and openshift_prometheus_oauth_proxy_cpu_limit is not none %}
+          cpu: "{{ openshift_prometheus_oauth_proxy_cpu_limit }}"
+{% endif %}
+        ports:
+        - containerPort: {{ openshift_prometheus_alertmanager_service_targetport }}
+          name: web
+        args:
+        - -provider=openshift
+        - -https-address=:{{ openshift_prometheus_alertmanager_service_targetport }}
+        - -http-address=
+        - -email-domain=*
+        - -upstream=http://localhost:9093
+        - -client-id=system:serviceaccount:{{ namespace }}:{{ openshift_prometheus_service_name }}
+        - -openshift-ca=/etc/pki/tls/cert.pem
+        - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+        - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}'
+        - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}'
+        - -tls-cert=/etc/tls/private/tls.crt
+        - -tls-key=/etc/tls/private/tls.key
+        - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token
+        - -cookie-secret-file=/etc/proxy/secrets/session_secret
+        - -skip-auth-regex=^/metrics
+        volumeMounts:
+        - mountPath: /etc/tls/private
+          name: alertmanager-tls-secret
+        - mountPath: /etc/proxy/secrets
+          name: alertmanager-proxy-secret
+
       - name: alertmanager
         args:
         - -config.file=/etc/alertmanager/alertmanager.yml
@@ -205,14 +248,15 @@ spec:
 
       restartPolicy: Always
       volumes:
+      
       - name: prometheus-config
         configMap:
           defaultMode: 420
           name: prometheus
-      - name: prometheus-secrets
+      - name: prometheus-proxy-secret
         secret:
           secretName: prometheus-proxy
-      - name: prometheus-tls
+      - name: prometheus-tls-secret
         secret:
           secretName: prometheus-tls
       - name: prometheus-data
@@ -225,13 +269,19 @@ spec:
       - name: alertmanager-config
         configMap:
           defaultMode: 420
-          name: prometheus-alerts
-      - name: alerts-secrets
+          name: alertmanager
+      - name: alertmanager-proxy-secret
         secret:
-          secretName: alerts-proxy
-      - name: alerts-tls
+          secretName: alertmanager-proxy  
+      - name: alertmanager-tls-secret
+        secret:
+          secretName: alertmanager-tls 
+      - name: alerts-tls-secret
         secret:
-          secretName: prometheus-alerts-tls
+          secretName: alerts-tls
+      - name: alerts-proxy-secret
+        secret:
+          secretName: alerts-proxy
       - name: alertmanager-data
 {% if openshift_prometheus_alertmanager_storage_type == 'pvc' %}
         persistentVolumeClaim:
@@ -239,7 +289,7 @@ spec:
 {% else %}
         emptydir: {}
 {% endif %}
-      - name: alert-buffer-data
+      - name: alerts-data
 {% if openshift_prometheus_alertbuffer_storage_type == 'pvc' %}
         persistentVolumeClaim:
           claimName: {{ openshift_prometheus_alertbuffer_pvc_name }}

+ 121 - 54
roles/openshift_prometheus/templates/prometheus.yml.j2

@@ -1,10 +1,5 @@
 rule_files:
-  - 'prometheus.rules'
-{% if openshift_prometheus_additional_rules_file is defined and openshift_prometheus_additional_rules_file is not none %}
-  - 'prometheus.additional.rules'
-{% endif %}
-
-
+  - '*.rules'
 
 # A scrape configuration for running Prometheus on a Kubernetes cluster.
 # This uses separate scrape configs for cluster components (i.e. API server, node)
@@ -39,31 +34,11 @@ scrape_configs:
     action: keep
     regex: default;kubernetes;https
 
-# Scrape config for nodes.
-#
-# Each node exposes a /metrics endpoint that contains operational metrics for
-# the Kubelet and other components.
-- job_name: 'kubernetes-nodes'
-
-  scheme: https
-  tls_config:
-    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
-  bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
-
-  kubernetes_sd_configs:
-  - role: node
-
-  relabel_configs:
-  - action: labelmap
-    regex: __meta_kubernetes_node_label_(.+)
-
 # Scrape config for controllers.
 #
 # Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for
 # the controllers.
 #
-# TODO: move this to a pure endpoints based metrics gatherer when controllers are exposed via
-#       endpoints.
 - job_name: 'kubernetes-controllers'
 
   scheme: https
@@ -87,6 +62,27 @@ scrape_configs:
     regex: (.+)(?::\d+)
     replacement: $1:8444
 
+# Scrape config for nodes.
+#
+# Each node exposes a /metrics endpoint that contains operational metrics for
+# the Kubelet and other components.
+- job_name: 'kubernetes-nodes'
+  scheme: https
+  tls_config:
+    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+  bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+  kubernetes_sd_configs:
+  - role: node
+  # Drop a very high cardinality metric that is incorrect in 3.7. It will be
+  # fixed in 3.9.
+  metric_relabel_configs:
+  - source_labels: [__name__]
+    action: drop
+    regex: 'openshift_sdn_pod_(setup|teardown)_latency(.*)'
+  relabel_configs:
+  - action: labelmap
+    regex: __meta_kubernetes_node_label_(.+)
+
 # Scrape config for cAdvisor.
 #
 # Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that
@@ -107,6 +103,14 @@ scrape_configs:
   kubernetes_sd_configs:
   - role: node
 
+  # Exclude a set of high cardinality metrics that can contribute to significant
+  # memory use in large clusters. These can be selectively enabled as necessary
+  # for medium or small clusters.
+  metric_relabel_configs:
+  - source_labels: [__name__]
+    action: drop
+    regex: 'container_(cpu_user_seconds_total|cpu_cfs_periods_total|memory_usage_bytes|memory_swap|memory_working_set_bytes|memory_cache|last_seen|fs_(read_seconds_total|write_seconds_total|sector_(.*)|io_(.*)|reads_merged_total|writes_merged_total)|tasks_state|memory_failcnt|memory_failures_total|spec_memory_swap_limit_bytes|fs_(.*)_bytes_total|spec_(.*))'
+
   relabel_configs:
   - action: labelmap
     regex: __meta_kubernetes_node_label_(.+)
@@ -133,38 +137,101 @@ scrape_configs:
   - role: endpoints
 
   relabel_configs:
-  - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
-    action: keep
-    regex: true
-  - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
-    action: replace
-    target_label: __scheme__
-    regex: (https?)
-  - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
+    # only scrape infrastructure components
+    - source_labels: [__meta_kubernetes_namespace]
+      action: keep
+      regex: 'default|logging|metrics|kube-.+|openshift|openshift-.+'
+    # drop infrastructure components managed by other scrape targets
+    - source_labels: [__meta_kubernetes_service_name]
+      action: drop
+      regex: 'prometheus-node-exporter'
+    # only those that have requested scraping
+    - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
+      action: keep
+      regex: true
+    - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
+      action: replace
+      target_label: __scheme__
+      regex: (https?)
+    - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
+      action: replace
+      target_label: __metrics_path__
+      regex: (.+)
+    - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
+      action: replace
+      target_label: __address__
+      regex: (.+)(?::\d+);(\d+)
+      replacement: $1:$2
+    - action: labelmap
+      regex: __meta_kubernetes_service_label_(.+)
+    - source_labels: [__meta_kubernetes_namespace]
+      action: replace
+      target_label: kubernetes_namespace
+    - source_labels: [__meta_kubernetes_service_name]
+      action: replace
+      target_label: kubernetes_name
+
+# Scrape config for node-exporter, which is expected to be running on port 9100.
+- job_name: 'kubernetes-nodes-exporter'
+
+  tls_config:
+    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+
+  kubernetes_sd_configs:
+  - role: node
+
+  metric_relabel_configs:
+  - source_labels: [__name__]
+    action: drop
+    regex: 'node_cpu|node_(disk|scrape_collector)_.+'
+  # preserve a subset of the network, netstat, vmstat, and filesystem series
+  - source_labels: [__name__]
     action: replace
-    target_label: __metrics_path__
-    regex: (.+)
-  - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
+    regex: '(node_(netstat_Ip_.+|vmstat_(nr|thp)_.+|filesystem_(free|size|device_error)|network_(transmit|receive)_(drop|errs)))'
+    target_label: __name__
+    replacement: renamed_$1
+  - source_labels: [__name__]
+    action: drop
+    regex: 'node_(netstat|vmstat|filesystem|network)_.+'
+  - source_labels: [__name__]
     action: replace
+    regex: 'renamed_(.+)'
+    target_label: __name__
+    replacement: $1
+  # drop any partial expensive series
+  - source_labels: [__name__, device]
+    action: drop
+    regex: 'node_network_.+;veth.+'
+  - source_labels: [__name__, mountpoint]
+    action: drop
+    regex: 'node_filesystem_(free|size|device_error);([^/].*|/.+)'
+
+  relabel_configs:
+  - source_labels: [__address__]
+    regex: '(.*):10250'
+    replacement: '${1}:9100'
     target_label: __address__
-    regex: (.+)(?::\d+);(\d+)
-    replacement: $1:$2
-  - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_username]
-    action: replace
-    target_label: __basic_auth_username__
-    regex: (.+)
-  - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_password]
-    action: replace
-    target_label: __basic_auth_password__
-    regex: (.+)
+  - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
+    target_label: __instance__
   - action: labelmap
-    regex: __meta_kubernetes_service_label_(.+)
-  - source_labels: [__meta_kubernetes_namespace]
-    action: replace
-    target_label: kubernetes_namespace
-  - source_labels: [__meta_kubernetes_service_name]
-    action: replace
-    target_label: kubernetes_name
+    regex: __meta_kubernetes_node_label_(.+)
+
+# Scrape config for the template service broker
+- job_name: 'openshift-template-service-broker'
+  scheme: https
+  tls_config:
+    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt
+    server_name: apiserver.openshift-template-service-broker.svc
+  bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+
+  kubernetes_sd_configs:
+  - role: endpoints
+
+  relabel_configs:
+  - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
+    action: keep
+    regex: openshift-template-service-broker;apiserver;https
+
 
 alerting:
   alertmanagers: