Browse Source

Add a new monitoring availability component

Introduce a monitoring availability component which installs all builtin
availability tests into an openshift-monitor-availability namespace. These are
black box tests of several components which capture SLI metrics to verify our
SLAs. All of these tests installed here should be used to measure availability.
Dan Mace 7 years ago
parent
commit
dcbe4b2fac

+ 3 - 0
playbooks/common/private/components.yml

@@ -34,6 +34,9 @@
 - import_playbook: ../../openshift-monitoring/private/config.yml
   when: openshift_monitoring_deploy | default(false) | bool
 
+- import_playbook: ../../openshift-monitor-availability/private/config.yml
+  when: openshift_monitor_availability_install | default(false) | bool
+
 - import_playbook: ../../openshift-service-catalog/private/config.yml
   when: openshift_enable_service_catalog | default(true) | bool
 

+ 12 - 0
playbooks/openshift-monitor-availability/OWNERS

@@ -0,0 +1,12 @@
+# approval == this is a good idea /approve
+approvers:
+  - ironcladlou
+  - elad661
+  - mxinden
+  - brancz
+# review == this code is good /lgtm
+reviewers:
+  - ironcladlou
+  - elad661
+  - mxinden
+  - brancz

+ 8 - 0
playbooks/openshift-monitor-availability/README.md

@@ -0,0 +1,8 @@
+# OpenShift Availability Monitoring
+
+This playbook runs the [OpenShift Availability Monitoring role](../../roles/openshift_monitor_availability). See the role
+for more information.
+
+## GCP Development
+
+The `install-gcp.yml` playbook is useful for ad-hoc installation in an existing GCE cluster.

+ 10 - 0
playbooks/openshift-monitor-availability/config.yml

@@ -0,0 +1,10 @@
+---
+- import_playbook: ../init/main.yml
+  vars:
+    l_init_fact_hosts: "oo_masters_to_config"
+    l_openshift_version_set_hosts: "oo_masters_to_config:!oo_first_master"
+    l_openshift_version_check_hosts: "all:!all"
+    l_sanity_check_hosts: "{{ groups['oo_masters_to_config'] }}"
+
+- name: setup OpenShift availability monitoring
+  import_playbook: ./private/config.yml

+ 17 - 0
playbooks/openshift-monitor-availability/install-gcp.yml

@@ -0,0 +1,17 @@
+---
+- hosts: localhost
+  connection: local
+  tasks:
+  - name: place all scale groups into Ansible groups
+    include_role:
+      name: openshift_gcp
+      tasks_from: setup_scale_group_facts.yml
+
+- import_playbook: ../init/main.yml
+  vars:
+    l_init_fact_hosts: "oo_masters_to_config"
+    l_openshift_version_set_hosts: "oo_masters_to_config:!oo_first_master"
+    l_openshift_version_check_hosts: "all:!all"
+    l_sanity_check_hosts: "{{ groups['oo_masters_to_config'] }}"
+
+- import_playbook: private/config.yml

+ 30 - 0
playbooks/openshift-monitor-availability/private/config.yml

@@ -0,0 +1,30 @@
+---
+- name: Monitor Availability Start
+  hosts: all
+  gather_facts: false
+  tasks:
+  - name: Set Monitor Availability 'In Progress'
+    run_once: true
+    set_stats:
+      data:
+        installer_phase_monitor_availability:
+          status: "In Progress"
+          start: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}"
+
+
+- name: Configure Monitor Availability
+  hosts: oo_first_master
+  roles:
+  - role: openshift_monitor_availability
+
+- name: Monitor Availability End
+  hosts: all
+  gather_facts: false
+  tasks:
+  - name: Set Monitor Availability 'Complete'
+    run_once: true
+    set_stats:
+      data:
+        installer_phase_monitor_availability:
+          status: "Complete"
+          end: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}"

+ 1 - 0
playbooks/openshift-monitor-availability/private/roles

@@ -0,0 +1 @@
+../../../roles/

+ 7 - 0
playbooks/openshift-monitor-availability/private/upgrade.yml

@@ -0,0 +1,7 @@
+---
+- name: Upgrade OpenShift Availability Monitoring
+  hosts: oo_first_master
+  tasks:
+  - import_role:
+      name: openshift_monitor_availability
+      tasks_from: upgrade.yaml

+ 9 - 0
playbooks/openshift-monitor-availability/upgrade.yml

@@ -0,0 +1,9 @@
+---
+- import_playbook: ../init/main.yml
+  vars:
+    l_init_fact_hosts: "oo_masters_to_config"
+    l_openshift_version_set_hosts: "oo_masters_to_config:!oo_first_master"
+    l_openshift_version_check_hosts: "all:!all"
+    l_sanity_check_hosts: "{{ groups['oo_masters_to_config'] }}"
+
+- import_playbook: ./private/upgrade.yml

+ 12 - 0
roles/openshift_monitor_availability/OWNERS

@@ -0,0 +1,12 @@
+# approval == this is a good idea /approve
+approvers:
+  - ironcladlou
+  - elad661
+  - mxinden
+  - brancz
+# review == this code is good /lgtm
+reviewers:
+  - ironcladlou
+  - elad661
+  - mxinden
+  - brancz

+ 34 - 0
roles/openshift_monitor_availability/README.md

@@ -0,0 +1,34 @@
+# OpenShift Availability Monitoring
+
+### **NOTE: This component is unsupported in OCP at this time.**
+
+These are black box tests of several components which elable SLA verification by
+capturing metrics for SLIs. All of the tests in this role should be used to
+measure availability. Each test should expose metrics endpoints for scraping by
+the monitoring platform (Prometheus).
+
+All of the test applications are installed into the `openshift-monitor-availability` namespace and are enabled/disabled using the `openshift_monitor_availability_install` variable.
+
+## Adding a new application
+
+To add a new application to the installer:
+
+1. Add an OpenShift Template to the `files` directory which can be used with `oc process | oc apply` to install the application.
+2. Create an Ansible task in the `tasks` directory, e.g. `install_{APP}.yaml`. The task should install the application into the `openshift-monitor-availability` namespace.
+3. Include the new task in `install.yaml`:
+
+        - import_tasks: install_{APP}.yaml
+
+
+## Guidelines
+
+Here are some guidelines for applications:
+
+* App metrics endpoints **must be secured**. Use the [oauth-proxy](https://github.com/openshift/oauth-proxy) or [kube-rbac-proxy](https://github.com/brancz/kube-rbac-proxy).
+* Templates should be usable outside the Ansible role (e.g. directly via `oc process`); avoid Jinjia templates if possible.
+* As with all other Ansible roles in the installer, app tasks must be idempotent.
+* Minimize configuration, be opinionated.
+
+# License
+
+Apache License, Version 2.0

+ 16 - 0
roles/openshift_monitor_availability/defaults/main.yml

@@ -0,0 +1,16 @@
+---
+openshift_monitor_app_create_images:
+  origin:
+    prefix: "quay.io/redhat/openshift-monitor-project-lifecycle"
+    version: "0.0.1"
+  openshift-enterprise:
+    prefix: "quay.io/redhat/openshift-monitor-project-lifecycle"
+    version: "0.0.1"
+
+openshift_monitor_app_create_image_prefix: "{{ openshift_monitor_app_create_images[openshift_deployment_type]['prefix'] }}"
+openshift_monitor_app_create_image_version: "{{ openshift_monitor_app_create_images[openshift_deployment_type]['version'] }}"
+openshift_monitor_app_create_image: "{{ openshift_monitor_app_create_image_prefix }}:{{ openshift_monitor_app_create_image_version }}"
+
+openshift_monitor_app_create_run_interval: 5m
+openshift_monitor_app_create_timeout: 5m
+openshift_monitor_app_create_log_level: 0

+ 170 - 0
roles/openshift_monitor_availability/files/monitor-app-create.yaml

@@ -0,0 +1,170 @@
+---
+apiVersion: template.openshift.io/v1
+kind: Template
+metadata:
+  name: openshift-monitor-app-create
+  annotations:
+    openshift.io/display-name: OpenShift App Create Availability Monitor
+    description: Measures app create operation availability.
+    iconClass: icon-openshift
+    tags: openshift,infra,monitoring
+    openshift.io/documentation-url: https://github.com/openshift/monitor-project-lifecycle
+    openshift.io/support-url: https://access.redhat.com
+openshift.io/provider-display-name: Red Hat, Inc.
+parameters:
+- name: IMAGE
+  description: The application container image to use.
+  required: true
+- name: LOG_LEVEL
+  value: "0"
+  description: Application logging level.
+- name: RUN_INTERVAL
+  value: "5m"
+  description: How often to run the measurement loop, as a duration string.
+- name: TIMEOUT
+  value: "5m"
+  description: How long to wait for the test app to become available before giving up.
+- name: NAMESPACE
+  # This namespace cannot be changed.
+  value: openshift-monitor-availability
+objects:
+- apiVersion: rbac.authorization.k8s.io/v1beta1
+  kind: ClusterRole
+  metadata: {name: monitor-app-create}
+  rules:
+  # These are for the auth proxy.
+  - apiGroups: ["authentication.k8s.io"]
+    resources:
+    - tokenreviews
+    verbs: ["create"]
+  - apiGroups: ["authorization.k8s.io"]
+    resources:
+    - subjectaccessreviews
+    verbs: ["create"]
+  # These are for the app itself.
+  - apiGroups: [project.openshift.io]
+    resources: [projects, projectrequests]
+    verbs: ['*']
+
+- apiVersion: v1
+  kind: ServiceAccount
+  metadata:
+    name: monitor-app-create
+    namespace: ${NAMESPACE}
+
+- apiVersion: rbac.authorization.k8s.io/v1beta1
+  kind: ClusterRoleBinding
+  metadata:
+    name: monitor-app-create
+  roleRef:
+    apiGroup: rbac.authorization.k8s.io
+    kind: ClusterRole
+    name: monitor-app-create
+  subjects:
+  - kind: ServiceAccount
+    name: monitor-app-create
+    namespace: ${NAMESPACE}
+
+- apiVersion: v1
+  kind: Service
+  metadata:
+    name: monitor-app-create
+    namespace: ${NAMESPACE}
+    labels:
+      k8s-app: monitor-app-create
+    annotations:
+      service.alpha.openshift.io/serving-cert-secret-name: monitor-app-create-tls
+  spec:
+    ports:
+    - name: http-metrics
+      port: 443
+      protocol: TCP
+      targetPort: https
+    selector:
+      k8s-app: monitor-app-create
+
+- apiVersion: v1
+  kind: ConfigMap
+  metadata:
+    name: monitor-app-create
+    namespace: ${NAMESPACE}
+    labels:
+      k8s-app: monitor-app-create
+  data:
+    config.yaml: |
+      listenAddress: "127.0.0.1:8080"
+      runInterval: "${RUN_INTERVAL}"
+      availabilityTimeout: "${TIMEOUT}"
+      template:
+        namespace: openshift
+        name: django-psql-persistent
+        availabilityRoute: django-psql-persistent
+        parameters: # Empty, use template defaults
+
+- apiVersion: extensions/v1beta1
+  kind: Deployment
+  metadata:
+    name: monitor-app-create
+    namespace: ${NAMESPACE}
+    labels:
+      k8s-app: monitor-app-create
+  spec:
+    replicas: 1
+    selector:
+      matchLabels:
+        k8s-app: monitor-app-create
+    template:
+      metadata:
+        labels:
+          k8s-app: monitor-app-create
+      spec:
+        serviceAccountName: monitor-app-create
+        volumes:
+        - name: config
+          configMap:
+            name: monitor-app-create
+        - name: tls
+          secret:
+            secretName: monitor-app-create-tls
+        containers:
+        - name: monitor-app-create
+          image: ${IMAGE}
+          command:
+          - "/usr/bin/monitor"
+          - "run"
+          - "--alsologtostderr"
+          - "--v"
+          - "${LOG_LEVEL}"
+          - "--config"
+          - "/etc/monitor-app-create/config.yaml"
+          volumeMounts:
+          - name: config
+            mountPath: /etc/monitor-app-create
+          resources:
+            limits:
+              cpu: 20m
+              memory: 50Mi
+            requests:
+              cpu: 20m
+              memory: 50Mi
+        - name: kube-rbac-proxy
+          image: quay.io/coreos/kube-rbac-proxy:v0.3.0
+          args:
+          - "--secure-listen-address=:8081"
+          - "--upstream=http://127.0.0.1:8080/"
+          - "--tls-cert-file=/etc/tls/private/tls.crt"
+          - "--tls-private-key-file=/etc/tls/private/tls.key"
+          ports:
+          - name: https
+            containerPort: 8081
+            protocol: TCP
+          resources:
+            requests:
+              memory: 20Mi
+              cpu: 10m
+            limits:
+              memory: 40Mi
+              cpu: 20m
+          volumeMounts:
+          - mountPath: /etc/tls/private
+            name: tls

+ 23 - 0
roles/openshift_monitor_availability/meta/main.yaml

@@ -0,0 +1,23 @@
+---
+galaxy_info:
+  author: OpenShift Development <dev@lists.openshift.redhat.com>
+  description: Deploy OpenShift availability monitoring.
+  company: Red Hat, Inc.
+  license: Apache License, Version 2.0
+  min_ansible_version: 2.4
+  platforms:
+  - name: EL
+    versions:
+    - 7
+  - name: Fedora
+    versions:
+    - all
+  categories:
+  - openshift
+  - monitoring
+  - prometheus
+  - operator
+dependencies:
+- role: lib_openshift
+- role: lib_utils
+- role: openshift_facts

+ 8 - 0
roles/openshift_monitor_availability/tasks/install.yaml

@@ -0,0 +1,8 @@
+---
+- name: Ensure openshift-monitor-availability project
+  oc_project:
+    state: present
+    name: openshift-monitor-availability
+    description: Openshift availability monitoring applications.
+
+- import_tasks: install_monitor_app_create.yaml

+ 31 - 0
roles/openshift_monitor_availability/tasks/install_monitor_app_create.yaml

@@ -0,0 +1,31 @@
+---
+- name: Create temp directory for doing work in on target
+  command: mktemp -td openshift-monitor-app-create-ansible-XXXXXX
+  register: mktemp
+  changed_when: False
+
+- name: Copy files to temp directory
+  copy:
+    src: monitor-app-create.yaml
+    dest: "{{ mktemp.stdout }}/monitor-app-create.yaml"
+
+- name: Copy admin client config
+  copy:
+    src: "{{ openshift.common.config_base }}/master/admin.kubeconfig"
+    dest: "{{ mktemp.stdout }}/admin.kubeconfig"
+    remote_src: yes
+
+- name: Apply the app template
+  shell: >
+    {{ openshift_client_binary }} process -f "{{ mktemp.stdout }}/monitor-app-create.yaml"
+    --param IMAGE="{{ openshift_monitor_app_create_image }}"
+    --param RUN_INTERVAL="{{ openshift_monitor_app_create_run_interval }}"
+    --param TIMEOUT="{{ openshift_monitor_app_create_timeout }}"
+    --param LOG_LEVEL="{{ openshift_monitor_app_create_log_level }}"
+    | {{ openshift_client_binary }} apply --config={{ mktemp.stdout }}/admin.kubeconfig -f -
+
+- name: Delete temp directory
+  file:
+    name: "{{ mktemp.stdout }}"
+    state: absent
+  changed_when: False

+ 6 - 0
roles/openshift_monitor_availability/tasks/main.yaml

@@ -0,0 +1,6 @@
+---
+- include_tasks: install.yaml
+  when: openshift_monitor_availability_install | default(false) | bool
+
+- include_tasks: remove.yaml
+  when: not openshift_monitor_availability_install | default(false) | bool

+ 5 - 0
roles/openshift_monitor_availability/tasks/remove.yaml

@@ -0,0 +1,5 @@
+---
+- name: Remove availability monitoring
+  oc_project:
+    name: openshift-monitor-availability
+    state: absent

+ 6 - 0
roles/openshift_monitor_availability/tasks/upgrade.yaml

@@ -0,0 +1,6 @@
+---
+- include_tasks: install.yaml
+  when: openshift_monitor_availability_install | default(false) | bool
+
+- include_tasks: remove.yaml
+  when: not openshift_monitor_availability_install | default(false) | bool