Browse Source

Merge pull request #7693 from elad661/tpo_upstreaming

Add a role to install the Cluster Monitoring Operator
OpenShift Merge Robot 7 years ago
parent
commit
b29bfceaf1

+ 3 - 0
playbooks/common/private/components.yml

@@ -31,6 +31,9 @@
 - import_playbook: ../../openshift-prometheus/private/config.yml
   when: openshift_hosted_prometheus_deploy | default(false) | bool
 
+- import_playbook: ../../openshift-monitoring/private/config.yml
+  when: openshift_monitoring_deploy | default(false) | bool
+
 - import_playbook: ../../openshift-service-catalog/private/config.yml
   when: openshift_enable_service_catalog | default(true) | bool
 

+ 26 - 0
playbooks/openshift-monitoring/README.md

@@ -0,0 +1,26 @@
+# OpenShift Monitoring
+
+This playbook installs the OpenShift Monitoring stack.
+
+## GCP Development
+
+1. [Launch a GCP cluster](https://github.com/openshift/release/tree/master/cluster/test-deploy).
+
+2. Hack on the installer locally.
+
+3. Make changes, and then build a new openshift-ansible image.
+
+```shell
+# in openshift-ansible
+docker build -f images/installer/Dockerfile -t openshift-ansible .
+```
+
+4. Run the openshift-monitoring GCP installer against the cluster.
+
+```shell
+# in test-deploy
+make WHAT=dmacedev OPENSHIFT_ANSIBLE_IMAGE=openshift-ansible sh
+
+# in the resulting container shell
+ansible-playbook playbooks/openshift-monitoring/install-gcp.yml
+```

+ 9 - 0
playbooks/openshift-monitoring/config.yml

@@ -0,0 +1,9 @@
+---
+- import_playbook: ../init/main.yml
+  vars:
+    l_init_fact_hosts: "oo_masters_to_config"
+    l_openshift_version_set_hosts: "oo_masters_to_config:!oo_first_master"
+    l_openshift_version_check_hosts: "all:!all"
+    l_sanity_check_hosts: "{{ groups['oo_masters_to_config'] }}"
+
+- import_playbook: private/config.yml

+ 17 - 0
playbooks/openshift-monitoring/install-gcp.yml

@@ -0,0 +1,17 @@
+---
+- hosts: localhost
+  connection: local
+  tasks:
+  - name: place all scale groups into Ansible groups
+    include_role:
+      name: openshift_gcp
+      tasks_from: setup_scale_group_facts.yml
+
+- import_playbook: ../init/main.yml
+  vars:
+    l_init_fact_hosts: "oo_masters_to_config"
+    l_openshift_version_set_hosts: "oo_masters_to_config:!oo_first_master"
+    l_openshift_version_check_hosts: "all:!all"
+    l_sanity_check_hosts: "{{ groups['oo_masters_to_config'] }}"
+
+- import_playbook: private/config.yml

+ 30 - 0
playbooks/openshift-monitoring/private/config.yml

@@ -0,0 +1,30 @@
+---
+- name: Cluster Monitoring Operator Checkpoint Start
+  hosts: all
+  gather_facts: false
+  tasks:
+  - name: Set Cluster Monitoring Operator 'In Progress'
+    run_once: true
+    set_stats:
+      data:
+        installer_phase_cluster_monitoring_operator:
+          status: "In Progress"
+          start: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}"
+
+
+- name: Configure Cluster Monitoring Operator
+  hosts: oo_first_master
+  roles:
+  - role: openshift_cluster_monitoring_operator
+
+- name: Cluster Monitoring Operator Checkpoint End
+  hosts: all
+  gather_facts: false
+  tasks:
+  - name: Set Cluster Monitoring Operator 'Complete'
+    run_once: true
+    set_stats:
+      data:
+        installer_phase_cluster_monitoring_operator:
+          status: "Complete"
+          end: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}"

+ 1 - 0
playbooks/openshift-monitoring/private/roles

@@ -0,0 +1 @@
+../../../roles/

+ 76 - 0
roles/openshift_cluster_monitoring_operator/README.md

@@ -0,0 +1,76 @@
+# OpenShift Cluster Monitoring Operator
+
+The OpenShift Cluster Monitoring Operator role manages the [Tectonic Prometheus Operator](https://github.com/coreos-inc/tectonic-prometheus-operator) deployment.
+TPO is an operator that deploys our monitoring stack (Prometheus, Alertmanager) with out-of-the-box alerts and metrics.
+
+# Component integration
+
+The following sections are to guide component owners to shipping new integrations with the monitoring stack.
+
+## Developing
+
+To develop new component integrations, try the following.
+
+1. [Create a GCE cluster](https://github.com/openshift/release/tree/master/cluster/test-deploy) with the monitoring stack enabled by editing your profile prior to launch (e.g. `gcp-dev/vars.yaml`):
+
+    ```yaml
+    openshift_monitoring_deploy: true
+    ```
+
+1. Clone the [Tectonic Prometheus Operator repository](https://github.com/coreos-inc/tectonic-prometheus-operator).
+
+1. To register a new component for metrics scraping:
+
+    1. Follow the Tectonic Prometheus Operator [instructions](https://github.com/coreos-inc/tectonic-prometheus-operator) to register a new builtin component (*Note: the Go code portions can be skipped while prototyping but must be completed before a PR is submitted*).
+    1. Create the new `ServiceMonitor` manually with:
+    
+        ```shell
+        oc apply -n openshift-monitoring -f assets/prometheus-k8s/prometheus-k8s-service-monitor-$COMPONENT.yaml`
+        ```
+
+1. To add a new alerting rule:
+  
+    1. Follow the Tectonic Prometheus Operator [instructions](https://github.com/coreos-inc/tectonic-prometheus-operator) to add a new alerting rule.
+    1. Rebuild the rules `ConfigMap` manually with:
+    
+        ```shell
+        hack/generate-rules-configmap.sh k8s | oc apply -n openshift-monitoring -f -
+        ```
+
+## Shipping
+
+To ship a new component integration, the following things must happen:
+
+1. The component must be accepted into [Tectonic Prometheus Operator](https://github.com/coreos-inc/tectonic-prometheus-operator) and available in a new Tectonic Prometheus Operator image.
+
+1. The `openshift_cluster_monitoring_operator` role must be updated to use the new Tectonic Prometheus Operator image containing the new component integration.
+
+# Installation
+
+See the [openshift-monitoring playbook](../../playbooks/openshift-monitoring) for installation options.
+
+## Role Variables
+
+For default values, see [`defaults/main.yaml`](defaults/main.yaml).
+
+- `openshift_cluster_monitoring_operator_install`: true - install/update. false - uninstall. Defaults to true.
+- `openshift_cluster_monitoring_operator_image`: TPO image to use
+- `openshift_cluster_monitoring_operator_prometheus_operator_repo`: Prometheus Operator repo to pull the image from 
+- `openshift_cluster_monitoring_operator_prometheus_repo`: Prometheus repo to pull the image from
+- `openshift_cluster_monitoring_operator_alertmanager_repo`: Alertmanager repo to pull the image from
+- `openshift_cluster_monitoring_operator_prometheus_reloader_repo`: Prometheus Reloader repo to pull the image from
+- `openshift_cluster_monitoring_oeprator_configmap_reloader_repo`: ConfigMap reloader repo to pull the image from
+
+# Requirements
+
+Ansible 2.4
+
+# Dependencies
+
+- lib_openshift
+- lib_utils
+- openshift_facts
+
+# License
+
+Apache License, Version 2.0

+ 7 - 0
roles/openshift_cluster_monitoring_operator/defaults/main.yml

@@ -0,0 +1,7 @@
+---
+openshift_cluster_monitoring_operator_image: quay.io/coreos/cluster-monitoring-operator:198d25d6
+openshift_cluster_monitoring_operator_prometheus_operator_repo: quay.io/coreos/prometheus-operator-dev
+openshift_cluster_monitoring_operator_prometheus_repo: quay.io/prometheus/prometheus
+openshift_cluster_monitoring_operator_alertmanager_repo: quay.io/prometheus/alertmanager
+openshift_cluster_monitoring_operator_prometheus_reloader_repo: quay.io/coreos/prometheus-config-reloader
+openshift_cluster_monitoring_oeprator_configmap_reloader_repo: quay.io/coreos/configmap-reload

+ 81 - 0
roles/openshift_cluster_monitoring_operator/files/cluster-monitoring-operator.yaml

@@ -0,0 +1,81 @@
+---
+apiVersion: template.openshift.io/v1
+kind: Template
+metadata:
+  name: openshift-cluster-monitoring-operator
+  annotations:
+    openshift.io/display-name: OpenShift Cluster Monitoring Operator
+    description: The OpenShift Cluster Monitoring Operator manages the deployment of the OpenShift monitoring platform.
+    iconClass: icon-openshift
+    tags: openshift,infra,monitoring
+    openshift.io/documentation-url: https://github.com/coreos-inc/tectonic-prometheus-operator/
+    openshift.io/support-url: https://access.redhat.com
+openshift.io/provider-display-name: Red Hat, Inc.
+parameters:
+- name: OPERATOR_IMAGE
+  value: quay.io/coreos/cluster-monitoring-operator:198d25d6
+- name: PROMETHEUS_OPERATOR_IMAGE
+  value: quay.io/coreos/prometheus-operator-dev
+- name: ALERTMANAGER_IMAGE
+  value: quay.io/prometheus/alertmanager
+- name: PROMETHEUS_IMAGE
+  value: quay.io/prometheus/prometheus
+- name: PROMETHEUS_CONFIG_RELOADER_IMAGE
+  value: quay.io/coreos/prometheus-config-reloader
+- name: CONFIG_RELOADER_IMAGE
+  value: quay.io/coreos/configmap-reload
+- name: NAMESPACE
+  # This namespace cannot be changed. Only `openshift-monitoring` is supported.
+  value: openshift-monitoring
+objects:
+- apiVersion: v1
+  kind: ConfigMap
+  metadata:
+    name: cluster-monitoring-config
+    namespace: ${NAMESPACE}
+  data:
+    config.yaml: |+
+      prometheusOperator:
+        baseImage: ${PROMETHEUS_OPERATOR_IMAGE}
+        prometheusConfigReloaderBaseImage: ${PROMETHEUS_CONFIG_RELOADER_IMAGE}
+        configReloaderBaseImage: ${CONFIG_RELOADER_IMAGE}
+      prometheusK8s:
+        baseImage: ${PROMETHEUS_IMAGE}
+      alertmanagerMain:
+        baseImage: ${ALERTMANAGER_IMAGE}
+- apiVersion: extensions/v1beta1
+  kind: Deployment
+  metadata:
+    name: cluster-monitoring-operator
+    namespace: ${NAMESPACE}
+    labels:
+      k8s-app: cluster-monitoring-operator
+      managed-by-channel-operator: "true"
+  spec:
+    replicas: 1
+    selector:
+      matchLabels:
+        k8s-app: cluster-monitoring-operator
+    template:
+      metadata:
+        labels:
+          k8s-app: cluster-monitoring-operator
+          tectonic-app-version-name: tectonic-monitoring
+      spec:
+        containers:
+        - image: ${OPERATOR_IMAGE}
+          name: cluster-monitoring-operator
+          args:
+          - "-namespace=${NAMESPACE}"
+          - "-configmap=cluster-monitoring-config"
+          - "-logtostderr=true"
+          - "-v=4"
+          resources:
+            limits:
+              cpu: 20m
+              memory: 50Mi
+            requests:
+              cpu: 20m
+              memory: 50Mi
+        restartPolicy: Always
+        terminationGracePeriodSeconds: 30

+ 23 - 0
roles/openshift_cluster_monitoring_operator/meta/main.yaml

@@ -0,0 +1,23 @@
+---
+galaxy_info:
+  author: OpenShift Development <dev@lists.openshift.redhat.com>
+  description: Deploy OpenShift cluster monitoring operator
+  company: Red Hat, Inc.
+  license: Apache License, Version 2.0
+  min_ansible_version: 2.4
+  platforms:
+  - name: EL
+    versions:
+    - 7
+  - name: Fedora
+    versions:
+    - all
+  categories:
+  - openshift
+  - monitoring
+  - prometheus
+  - operator
+dependencies:
+- role: lib_openshift
+- role: lib_utils
+- role: openshift_facts

+ 51 - 0
roles/openshift_cluster_monitoring_operator/tasks/install.yaml

@@ -0,0 +1,51 @@
+---
+- name: Create temp directory for doing work in on target
+  command: mktemp -td openshift-cluster-monitoring-ansible-XXXXXX
+  register: mktemp
+  changed_when: False
+
+- name: Copy files to temp directory
+  copy:
+    src: "{{ item }}"
+    dest: "{{ mktemp.stdout }}/{{ item }}"
+  with_items:
+  - cluster-monitoring-operator.yaml
+
+- name: Copy admin client config
+  command: >
+    cp {{ openshift.common.config_base }}/master/admin.kubeconfig {{ mktemp.stdout }}/admin.kubeconfig
+  changed_when: false
+
+- name: Add monitoring project
+  oc_project:
+    state: present
+    name: openshift-monitoring
+    description: Openshift Monitoring
+    node_selector: ""
+
+#TODO Remove need for cluster-admin permission
+- name: Add cluster admin permission
+  command: >
+    {{ openshift_client_binary }} adm policy add-cluster-role-to-user cluster-admin -z default
+    --config={{ mktemp.stdout }}/admin.kubeconfig
+    -n openshift-monitoring
+
+- name: Apply the cluster monitoring operator template
+  shell: >
+    {{ openshift_client_binary }} process -f "{{ mktemp.stdout 	}}/{{ item }}"
+    --param OPERATOR_IMAGE="{{ openshift_cluster_monitoring_operator_image }}"
+    --param PROMETHEUS_OPERATOR_IMAGE="{{ openshift_cluster_monitoring_operator_prometheus_operator_repo }}"
+    --param ALERTMANAGER_IMAGE="{{ openshift_cluster_monitoring_operator_alertmanager_repo }}"
+    --param PROMETHEUS_IMAGE="{{ openshift_cluster_monitoring_operator_prometheus_repo }}"
+    --param PROMETHEUS_CONFIG_RELOADER_IMAGE="{{ openshift_cluster_monitoring_operator_prometheus_reloader_repo }}"
+    --param CONFIG_RELOADER_IMAGE="{{ openshift_cluster_monitoring_oeprator_configmap_reloader_repo }}"
+    --config={{ mktemp.stdout }}/admin.kubeconfig
+    | {{ openshift_client_binary }} apply --config={{ mktemp.stdout }}/admin.kubeconfig -f -
+  with_items:
+  - cluster-monitoring-operator.yaml
+
+- name: Delete temp directory
+  file:
+    name: "{{ mktemp.stdout }}"
+    state: absent
+  changed_when: False

+ 6 - 0
roles/openshift_cluster_monitoring_operator/tasks/main.yaml

@@ -0,0 +1,6 @@
+---
+- include_tasks: install.yaml
+  when: openshift_cluster_monitoring_operator_install | default(true) | bool
+
+- include_tasks: remove.yaml
+  when: not openshift_cluster_monitoring_operator_install | default(true) | bool

+ 5 - 0
roles/openshift_cluster_monitoring_operator/tasks/remove.yaml

@@ -0,0 +1,5 @@
+---
+- name: Remove openshift-prometheus-operator project
+  oc_project:
+    name: openshift-monitoring
+    state: absent