Browse Source

Add prometheus node-exporter

Signed-off-by: Aaron Weitekamp <aweiteka@redhat.com>
Aaron Weitekamp 7 năm trước cách đây
mục cha
commit
34e52089b2

+ 10 - 8
roles/openshift_prometheus/README.md

@@ -14,14 +14,16 @@ For default values, see [`defaults/main.yaml`](defaults/main.yaml).
 
 - `openshift_prometheus_state`: present - install/update. absent - uninstall.
 
+- `openshift_prometheus_node_exporter_install`: true (default) or false
+
 - `openshift_prometheus_namespace`: project (i.e. namespace) where the components will be
   deployed.
 
 - `openshift_prometheus_node_selector`: Selector for the nodes prometheus will be deployed on.
 
-- `openshift_prometheus_<COMPONENT>_image_prefix`: specify image prefix for the component 
+- `openshift_prometheus_<COMPONENT>_image_prefix`: specify image prefix for the component
 
-- `openshift_prometheus_<COMPONENT>_image_version`: specify image version for the component 
+- `openshift_prometheus_<COMPONENT>_image_version`: specify image version for the component
 
 - `openshift_prometheus_args`: Modify or add arguments for prometheus application
 
@@ -74,9 +76,9 @@ NOTE: Setting `openshift_prometheus_<COMPONENT>_storage_labels` overrides `opens
 
 
 ## Additional Alert Rules file variable
-An external file with alert rules can be added by setting path to additional rules variable: 
+An external file with alert rules can be added by setting path to additional rules variable:
 ```
-openshift_prometheus_additional_rules_file: <PATH> 
+openshift_prometheus_additional_rules_file: <PATH>
 ```
 
 File content should be in prometheus alert rules format.
@@ -100,12 +102,13 @@ groups:
 Each prometheus component (prometheus, alertmanager, alert-buffer, oauth-proxy) can specify a cpu and memory limits and requests by setting
 the corresponding role variable:
 ```
-openshift_prometheus_<COMPONENT>_(limits|requests)_(memory|cpu): <VALUE>
+openshift_prometheus_<COMPONENT>_(memory|cpu)_(limit|requests): <VALUE>
 ```
 e.g
 ```
-openshift_prometheus_alertmanager_limits_memory: 1Gi
-openshift_prometheus_oath_proxy_requests_cpu: 100
+openshift_prometheus_alertmanager_memory_limit: 1Gi
+openshift_prometheus_oath_proxy_cpu_request: 100
+openshift_prometheus_node_exporter_cpu_limit: 200m
 ```
 
 Dependencies
@@ -128,4 +131,3 @@ License
 -------
 
 Apache License, Version 2.0
-

+ 5 - 0
roles/openshift_prometheus/defaults/main.yaml

@@ -1,6 +1,7 @@
 ---
 # defaults file for openshift_prometheus
 openshift_prometheus_state: present
+openshift_prometheus_node_exporter_install: true
 
 openshift_prometheus_namespace: openshift-metrics
 
@@ -74,3 +75,7 @@ openshift_prometheus_oauth_proxy_cpu_limit: null
 openshift_prometheus_oauth_proxy_memory_limit: null
 openshift_prometheus_oauth_proxy_cpu_requests: null
 openshift_prometheus_oauth_proxy_memory_requests: null
+openshift_prometheus_node_exporter_cpu_limit: 200m
+openshift_prometheus_node_exporter_memory_limit: 50Mi
+openshift_prometheus_node_exporter_cpu_requests: 100m
+openshift_prometheus_node_exporter_memory_requests: 30Mi

+ 91 - 0
roles/openshift_prometheus/files/node-exporter-template.yaml

@@ -0,0 +1,91 @@
+apiVersion: template.openshift.io/v1
+kind: Template
+metadata:
+  name: prometheus-node-exporter
+  annotations:
+    openshift.io/display-name: Prometheus Node Exporter
+    description: Prometheus exporter for node host metrics
+    iconClass: fa fa-cogs
+    tags: monitoring,prometheus
+    openshift.io/support-url: https://access.redhat.com
+    openshift.io/provider-display-name: Red Hat, Inc.
+parameters:
+- name: IMAGE
+  value: openshift/prometheus-node-exporter:v0.15.2
+- name: MEMORY_REQUESTS
+  value: 30Mi
+- name: CPU_REQUESTS
+  value: 100m
+- name: MEMORY_LIMITS
+  value: 50Mi
+- name: CPU_LIMITS
+  value: 200m
+objects:
+- apiVersion: v1
+  kind: ServiceAccount
+  metadata:
+    name: prometheus-node-exporter
+- apiVersion: v1
+  kind: Service
+  metadata:
+    annotations:
+      prometheus.io/scrape: "true"
+    labels:
+      app: prometheus-node-exporter
+    name: prometheus-node-exporter
+  spec:
+    clusterIP: None
+    ports:
+    - name: scrape
+      port: 9100
+      protocol: TCP
+      targetPort: 9100
+    selector:
+      app: prometheus-node-exporter
+- apiVersion: extensions/v1beta1
+  kind: DaemonSet
+  metadata:
+    name: prometheus-node-exporter
+    labels:
+      app: prometheus-node-exporter
+      role: monitoring
+  spec:
+    updateStrategy:
+      type: RollingUpdate
+    template:
+      metadata:
+        labels:
+          app: prometheus-node-exporter
+          role: monitoring
+        name: prometheus-exporter
+      spec:
+        serviceAccountName: prometheus-node-exporter
+        hostNetwork: true
+        hostPID: true
+        containers:
+        - image: ${IMAGE}
+          name: node-exporter
+          ports:
+          - containerPort: 9100
+            name: scrape
+          resources:
+            requests:
+              memory: ${MEMORY_REQUESTS}
+              cpu: ${CPU_REQUESTS}
+            limits:
+              memory: ${MEMORY_LIMITS}
+              cpu: ${CPU_LIMITS}
+          volumeMounts:
+          - name: proc
+            readOnly:  true
+            mountPath: /host/proc
+          - name: sys
+            readOnly: true
+            mountPath: /host/sys
+        volumes:
+        - name: proc
+          hostPath:
+            path: /proc
+        - name: sys
+          hostPath:
+            path: /sys

+ 64 - 0
roles/openshift_prometheus/tasks/install_node_exporter.yaml

@@ -0,0 +1,64 @@
+---
+# set facts
+- include_tasks: facts.yaml
+
+# namespace
+- name: Add prometheus project
+  oc_project:
+    state: present
+    name: "{{ openshift_prometheus_namespace }}"
+    node_selector: ""
+    description: Prometheus
+
+- name: Make temp directory for node exporter template
+  command: mktemp -d /tmp/prometheus-ansible-XXXXXX
+  register: mktemp
+  changed_when: False
+
+- name: Copy admin client config
+  command: >
+    cp {{ openshift.common.config_base }}/master//admin.kubeconfig {{ mktemp.stdout }}/admin.kubeconfig
+  changed_when: false
+
+# create clusterrolebinding for prometheus-node-exporter serviceaccount
+- name: Set hostaccess SCC for prometheus-node-exporter
+  oc_adm_policy_user:
+    state: present
+    namespace: "{{ openshift_prometheus_namespace }}"
+    resource_kind: scc
+    resource_name: hostaccess
+    user: "system:serviceaccount:{{ openshift_prometheus_namespace }}:prometheus-node-exporter"
+
+- name: Copy node exporter templates to temp directory
+  copy:
+    src: "{{ item }}"
+    dest: "{{ mktemp.stdout }}/{{ item }}"
+  with_items:
+    - "{{ __node_exporter_template_file }}"
+
+- name: Apply the node exporter template file
+  shell: >
+    {{ openshift_client_binary }} process -f "{{ mktemp.stdout }}/{{ __node_exporter_template_file }}"
+    --param IMAGE="{{ l_openshift_prometheus_node_exporter_image_prefix }}prometheus-node-exporter:{{ l_openshift_prometheus_node_exporter_image_version }}"
+    --param MEMORY_REQUESTS="{{ openshift_prometheus_node_exporter_memory_requests }}"
+    --param CPU_REQUESTS="{{ openshift_prometheus_node_exporter_cpu_requests }}"
+    --param MEMORY_LIMITS="{{ openshift_prometheus_node_exporter_memory_limit }}"
+    --param CPU_LIMITS="{{ openshift_prometheus_node_exporter_cpu_limit }}"
+    --config={{ mktemp.stdout }}/admin.kubeconfig
+    -n "{{ openshift_prometheus_namespace }}"
+    | {{ openshift_client_binary }} apply --config={{ mktemp.stdout }}/admin.kubeconfig -f - -n "{{ openshift_prometheus_namespace }}"
+
+- name: Verify that node exporter is running
+  uri:
+    url: http://localhost:9100/metrics
+  register: node_exporter_health
+  until: node_exporter_health.status == 200
+  retries: 120
+  delay: 1
+  changed_when: false
+
+- name: Remove temp directory
+  file:
+    state: absent
+    name: "{{ mktemp.stdout }}"
+  changed_when: False

+ 1 - 1
roles/openshift_prometheus/tasks/install_prometheus.yaml

@@ -15,7 +15,7 @@
   oc_project:
     state: present
     name: "{{ openshift_prometheus_namespace }}"
-    node_selector: "{{ openshift_prometheus_node_selector | lib_utils_oo_selector_to_string_list() }}"
+    node_selector: ""
     description: Prometheus
 
 # secrets

+ 3 - 0
roles/openshift_prometheus/tasks/main.yaml

@@ -28,6 +28,9 @@
 - include_tasks: uninstall_prometheus.yaml
   when: openshift_prometheus_state == 'absent'
 
+- include_tasks: install_node_exporter.yaml
+  when: openshift_prometheus_node_exporter_install | default(true) | bool
+
 - name: Delete temp directory
   file:
     name: "{{ tempdir }}"

+ 2 - 0
roles/openshift_prometheus/vars/default_images.yml

@@ -4,9 +4,11 @@ l_openshift_prometheus_image_prefix: "{{ openshift_prometheus_image_prefix | def
 l_openshift_prometheus_proxy_image_prefix: "{{ openshift_prometheus_proxy_image_prefix | default(l_openshift_prometheus_image_prefix) }}"
 l_openshift_prometheus_alertmanager_image_prefix: "{{ openshift_prometheus_altermanager_image_prefix | default(l_openshift_prometheus_image_prefix) }}"
 l_openshift_prometheus_alertbuffer_image_prefix: "{{ openshift_prometheus_alertbuffer_image_prefix | default(l_openshift_prometheus_image_prefix) }}"
+l_openshift_prometheus_node_exporter_image_prefix: "{{ openshift_prometheus_node_exporter_image_prefix | default(l_openshift_prometheus_image_prefix) }}"
 
 # image version defaults
 l_openshift_prometheus_image_version: "{{ openshift_prometheus_image_version | default('v2.0.0') }}"
 l_openshift_prometheus_proxy_image_version: "{{ openshift_prometheus_proxy_image_version | default('v1.0.0') }}"
 l_openshift_prometheus_alertmanager_image_version: "{{ openshift_prometheus_alertmanager_image_version | default('v0.13.0') }}"
 l_openshift_prometheus_alertbuffer_image_version: "{{ openshift_prometheus_alertbuffer_image_version | default('v0.0.2') }}"
+l_openshift_prometheus_node_exporter_image_version: "{{ openshift_prometheus_node_exporter_image_version | default('v0.15.2') }}"

+ 2 - 0
roles/openshift_prometheus/vars/main.yml

@@ -0,0 +1,2 @@
+---
+__node_exporter_template_file: "node-exporter-template.yaml"

+ 2 - 0
roles/openshift_prometheus/vars/openshift-enterprise.yml

@@ -4,9 +4,11 @@ l_openshift_prometheus_image_prefix: "{{ openshift_prometheus_image_prefix | def
 l_openshift_prometheus_proxy_image_prefix: "{{ openshift_prometheus_proxy_image_prefix | default(l_openshift_prometheus_image_prefix) }}"
 l_openshift_prometheus_alertmanager_image_prefix: "{{ openshift_prometheus_altermanager_image_prefix | default(l_openshift_prometheus_image_prefix) }}"
 l_openshift_prometheus_alertbuffer_image_prefix: "{{ openshift_prometheus_alertbuffer_image_prefix | default(l_openshift_prometheus_image_prefix) }}"
+l_openshift_prometheus_node_exporter_image_prefix: "{{ openshift_prometheus_node_exporter_image_prefix | default(l_openshift_prometheus_image_prefix) }}"
 
 # image version defaults
 l_openshift_prometheus_image_version: "{{ openshift_prometheus_image_version | default(openshift_image_tag) }}"
 l_openshift_prometheus_proxy_image_version: "{{ openshift_prometheus_proxy_image_version | default(openshift_image_tag) }}"
 l_openshift_prometheus_alertmanager_image_version: "{{ openshift_prometheus_alertmanager_image_version | default(openshift_image_tag) }}"
 l_openshift_prometheus_alertbuffer_image_version: "{{ openshift_prometheus_alertbuffer_image_version | default(openshift_image_tag) }}"
+l_openshift_prometheus_node_exporter_image_version: "{{ openshift_prometheus_node_exporter_image_version | default('v0.15.2') }}"