Browse Source

Make prometheus use persistent storage by default

Make Prometheus use persistent storage by default, and expose capacity in the
installer configuration. The previous emptydir default is likely to suffer from
disk exhaustion in any realistic environment with no means to sanely adjust.

Fixes https://bugzilla.redhat.com/show_bug.cgi?id=1584386
Fixes https://bugzilla.redhat.com/show_bug.cgi?id=1584415
Dan Mace 7 years ago
parent
commit
4c9336cbfe

+ 4 - 2
roles/openshift_cluster_monitoring_operator/defaults/main.yml

@@ -1,10 +1,12 @@
 ---
 ---
-openshift_cluster_monitoring_operator_image: quay.io/coreos/cluster-monitoring-operator:v0.0.4
+openshift_cluster_monitoring_operator_image: quay.io/coreos/cluster-monitoring-operator:v0.0.6
 openshift_cluster_monitoring_operator_prometheus_operator_repo: quay.io/coreos/prometheus-operator
 openshift_cluster_monitoring_operator_prometheus_operator_repo: quay.io/coreos/prometheus-operator
 openshift_cluster_monitoring_operator_prometheus_repo: quay.io/prometheus/prometheus
 openshift_cluster_monitoring_operator_prometheus_repo: quay.io/prometheus/prometheus
 openshift_cluster_monitoring_operator_alertmanager_repo: quay.io/prometheus/alertmanager
 openshift_cluster_monitoring_operator_alertmanager_repo: quay.io/prometheus/alertmanager
 openshift_cluster_monitoring_operator_prometheus_reloader_repo: quay.io/coreos/prometheus-config-reloader
 openshift_cluster_monitoring_operator_prometheus_reloader_repo: quay.io/coreos/prometheus-config-reloader
-openshift_cluster_monitoring_oeprator_configmap_reloader_repo: quay.io/coreos/configmap-reload
+openshift_cluster_monitoring_operator_configmap_reloader_repo: quay.io/coreos/configmap-reload
+openshift_cluster_monitoring_operator_prometheus_storage_capacity: "50Gi"
+openshift_cluster_monitoring_operator_alertmanager_storage_capacity: "2Gi"
 
 
 openshift_cluster_monitoring_operator_cluster_id: "{{ openshift_clusterid | default(openshift_master_cluster_public_hostname, true) | default(openshift_master_cluster_hostname, true) | default('openshift', true) }}"
 openshift_cluster_monitoring_operator_cluster_id: "{{ openshift_clusterid | default(openshift_master_cluster_public_hostname, true) | default(openshift_master_cluster_hostname, true) | default('openshift', true) }}"
 
 

+ 16 - 2
roles/openshift_cluster_monitoring_operator/files/cluster-monitoring-operator.yaml

@@ -13,13 +13,17 @@ metadata:
 openshift.io/provider-display-name: Red Hat, Inc.
 openshift.io/provider-display-name: Red Hat, Inc.
 parameters:
 parameters:
 - name: OPERATOR_IMAGE
 - name: OPERATOR_IMAGE
-  value: quay.io/coreos/cluster-monitoring-operator:v0.0.1
+  value: quay.io/coreos/cluster-monitoring-operator:v0.0.6
 - name: PROMETHEUS_OPERATOR_IMAGE
 - name: PROMETHEUS_OPERATOR_IMAGE
   value: quay.io/coreos/prometheus-operator
   value: quay.io/coreos/prometheus-operator
 - name: ALERTMANAGER_IMAGE
 - name: ALERTMANAGER_IMAGE
   value: quay.io/prometheus/alertmanager
   value: quay.io/prometheus/alertmanager
+- name: ALERTMANAGER_STORAGE_CAPACITY
+  required: true
 - name: PROMETHEUS_IMAGE
 - name: PROMETHEUS_IMAGE
   value: quay.io/prometheus/prometheus
   value: quay.io/prometheus/prometheus
+- name: PROMETHEUS_STORAGE_CAPACITY
+  required: true
 - name: PROMETHEUS_CONFIG_RELOADER_IMAGE
 - name: PROMETHEUS_CONFIG_RELOADER_IMAGE
   value: quay.io/coreos/prometheus-config-reloader
   value: quay.io/coreos/prometheus-config-reloader
 - name: CONFIG_RELOADER_IMAGE
 - name: CONFIG_RELOADER_IMAGE
@@ -51,8 +55,18 @@ objects:
         baseImage: ${PROMETHEUS_IMAGE}
         baseImage: ${PROMETHEUS_IMAGE}
         externalLabels:
         externalLabels:
           cluster: ${CLUSTER_ID}
           cluster: ${CLUSTER_ID}
+        volumeClaimTemplate:
+          spec:
+            resources:
+              requests:
+                storage: "${PROMETHEUS_STORAGE_CAPACITY}"
       alertmanagerMain:
       alertmanagerMain:
         baseImage: ${ALERTMANAGER_IMAGE}
         baseImage: ${ALERTMANAGER_IMAGE}
+        volumeClaimTemplate:
+          spec:
+            resources:
+              requests:
+                storage: "${ALERTMANAGER_STORAGE_CAPACITY}"
 
 
 # Configures Alertmanager.
 # Configures Alertmanager.
 - apiVersion: v1
 - apiVersion: v1
@@ -148,7 +162,7 @@ objects:
     resources: [customresourcedefinitions]
     resources: [customresourcedefinitions]
     verbs: ['*']
     verbs: ['*']
   - apiGroups: [monitoring.coreos.com]
   - apiGroups: [monitoring.coreos.com]
-    resources: [alertmanagers, prometheuses, prometheuses/finalizers, alertmanagers/finalizers, servicemonitors]
+    resources: [alertmanagers, prometheuses, prometheuses/finalizers, alertmanagers/finalizers, servicemonitors, prometheusrules]
     verbs: ['*']
     verbs: ['*']
   - apiGroups: [apps]
   - apiGroups: [apps]
     resources: [statefulsets]
     resources: [statefulsets]

+ 3 - 1
roles/openshift_cluster_monitoring_operator/tasks/install.yaml

@@ -40,9 +40,11 @@
     --param ALERTMANAGER_IMAGE="{{ openshift_cluster_monitoring_operator_alertmanager_repo }}"
     --param ALERTMANAGER_IMAGE="{{ openshift_cluster_monitoring_operator_alertmanager_repo }}"
     --param PROMETHEUS_IMAGE="{{ openshift_cluster_monitoring_operator_prometheus_repo }}"
     --param PROMETHEUS_IMAGE="{{ openshift_cluster_monitoring_operator_prometheus_repo }}"
     --param PROMETHEUS_CONFIG_RELOADER_IMAGE="{{ openshift_cluster_monitoring_operator_prometheus_reloader_repo }}"
     --param PROMETHEUS_CONFIG_RELOADER_IMAGE="{{ openshift_cluster_monitoring_operator_prometheus_reloader_repo }}"
-    --param CONFIG_RELOADER_IMAGE="{{ openshift_cluster_monitoring_oeprator_configmap_reloader_repo }}"
+    --param CONFIG_RELOADER_IMAGE="{{ openshift_cluster_monitoring_operator_configmap_reloader_repo }}"
     --param ALERTMANAGER_CONFIG="{{ openshift_cluster_monitoring_operator_alertmanager_config | b64encode }}"
     --param ALERTMANAGER_CONFIG="{{ openshift_cluster_monitoring_operator_alertmanager_config | b64encode }}"
     --param CLUSTER_ID="{{ openshift_cluster_monitoring_operator_cluster_id }}"
     --param CLUSTER_ID="{{ openshift_cluster_monitoring_operator_cluster_id }}"
+    --param PROMETHEUS_STORAGE_CAPACITY="{{ openshift_cluster_monitoring_operator_prometheus_storage_capacity }}"
+    --param ALERTMANAGER_STORAGE_CAPACITY="{{ openshift_cluster_monitoring_operator_alertmanager_storage_capacity }}"
     --config={{ mktemp.stdout }}/admin.kubeconfig
     --config={{ mktemp.stdout }}/admin.kubeconfig
     | {{ openshift_client_binary }} apply --config={{ mktemp.stdout }}/admin.kubeconfig -f -
     | {{ openshift_client_binary }} apply --config={{ mktemp.stdout }}/admin.kubeconfig -f -
   with_items:
   with_items: