Browse Source

Merge pull request #7938 from ironcladlou/monitoring-instruments

Add a new monitoring availability component
OpenShift Merge Robot 7 years ago
parent
commit
1f0d279892

+ 3 - 0
playbooks/common/private/components.yml

@@ -34,6 +34,9 @@
 - import_playbook: ../../openshift-monitoring/private/config.yml
   when: openshift_monitoring_deploy | default(false) | bool
 
+- import_playbook: ../../openshift-monitor-availability/private/config.yml
+  when: openshift_monitor_availability_install | default(false) | bool
+
 - import_playbook: ../../openshift-service-catalog/private/config.yml
   when: openshift_enable_service_catalog | default(true) | bool
 

+ 12 - 0
playbooks/openshift-monitor-availability/OWNERS

@@ -0,0 +1,12 @@
+# approval == this is a good idea /approve
+approvers:
+  - ironcladlou
+  - elad661
+  - mxinden
+  - brancz
+# review == this code is good /lgtm
+reviewers:
+  - ironcladlou
+  - elad661
+  - mxinden
+  - brancz

+ 8 - 0
playbooks/openshift-monitor-availability/README.md

@@ -0,0 +1,8 @@
+# OpenShift Availability Monitoring
+
+This playbook runs the [OpenShift Availability Monitoring role](../../roles/openshift_monitor_availability). See the role
+for more information.
+
+## GCP Development
+
+The `install-gcp.yml` playbook is useful for ad-hoc installation in an existing GCE cluster.

+ 10 - 0
playbooks/openshift-monitor-availability/config.yml

@@ -0,0 +1,10 @@
+---
+- import_playbook: ../init/main.yml
+  vars:
+    l_init_fact_hosts: "oo_masters_to_config"
+    l_openshift_version_set_hosts: "oo_masters_to_config:!oo_first_master"
+    l_openshift_version_check_hosts: "all:!all"
+    l_sanity_check_hosts: "{{ groups['oo_masters_to_config'] }}"
+
+- name: setup OpenShift availability monitoring
+  import_playbook: ./private/config.yml

+ 17 - 0
playbooks/openshift-monitor-availability/install-gcp.yml

@@ -0,0 +1,17 @@
+---
+- hosts: localhost
+  connection: local
+  tasks:
+  - name: place all scale groups into Ansible groups
+    include_role:
+      name: openshift_gcp
+      tasks_from: setup_scale_group_facts.yml
+
+- import_playbook: ../init/main.yml
+  vars:
+    l_init_fact_hosts: "oo_masters_to_config"
+    l_openshift_version_set_hosts: "oo_masters_to_config:!oo_first_master"
+    l_openshift_version_check_hosts: "all:!all"
+    l_sanity_check_hosts: "{{ groups['oo_masters_to_config'] }}"
+
+- import_playbook: private/config.yml

+ 30 - 0
playbooks/openshift-monitor-availability/private/config.yml

@@ -0,0 +1,30 @@
+---
+- name: Monitor Availability Start
+  hosts: all
+  gather_facts: false
+  tasks:
+  - name: Set Monitor Availability 'In Progress'
+    run_once: true
+    set_stats:
+      data:
+        installer_phase_monitor_availability:
+          status: "In Progress"
+          start: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}"
+
+
+- name: Configure Monitor Availability
+  hosts: oo_first_master
+  roles:
+  - role: openshift_monitor_availability
+
+- name: Monitor Availability End
+  hosts: all
+  gather_facts: false
+  tasks:
+  - name: Set Monitor Availability 'Complete'
+    run_once: true
+    set_stats:
+      data:
+        installer_phase_monitor_availability:
+          status: "Complete"
+          end: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}"

+ 1 - 0
playbooks/openshift-monitor-availability/private/roles

@@ -0,0 +1 @@
+../../../roles/

+ 7 - 0
playbooks/openshift-monitor-availability/private/upgrade.yml

@@ -0,0 +1,7 @@
+---
+- name: Upgrade OpenShift Availability Monitoring
+  hosts: oo_first_master
+  tasks:
+  - import_role:
+      name: openshift_monitor_availability
+      tasks_from: upgrade.yaml

+ 9 - 0
playbooks/openshift-monitor-availability/upgrade.yml

@@ -0,0 +1,9 @@
+---
+- import_playbook: ../init/main.yml
+  vars:
+    l_init_fact_hosts: "oo_masters_to_config"
+    l_openshift_version_set_hosts: "oo_masters_to_config:!oo_first_master"
+    l_openshift_version_check_hosts: "all:!all"
+    l_sanity_check_hosts: "{{ groups['oo_masters_to_config'] }}"
+
+- import_playbook: ./private/upgrade.yml

+ 12 - 0
roles/openshift_monitor_availability/OWNERS

@@ -0,0 +1,12 @@
+# approval == this is a good idea /approve
+approvers:
+  - ironcladlou
+  - elad661
+  - mxinden
+  - brancz
+# review == this code is good /lgtm
+reviewers:
+  - ironcladlou
+  - elad661
+  - mxinden
+  - brancz

+ 34 - 0
roles/openshift_monitor_availability/README.md

@@ -0,0 +1,34 @@
+# OpenShift Availability Monitoring
+
+### **NOTE: This component is unsupported in OCP at this time.**
+
+These are black box tests of several components which elable SLA verification by
+capturing metrics for SLIs. All of the tests in this role should be used to
+measure availability. Each test should expose metrics endpoints for scraping by
+the monitoring platform (Prometheus).
+
+All of the test applications are installed into the `openshift-monitor-availability` namespace and are enabled/disabled using the `openshift_monitor_availability_install` variable.
+
+## Adding a new application
+
+To add a new application to the installer:
+
+1. Add an OpenShift Template to the `files` directory which can be used with `oc process | oc apply` to install the application.
+2. Create an Ansible task in the `tasks` directory, e.g. `install_{APP}.yaml`. The task should install the application into the `openshift-monitor-availability` namespace.
+3. Include the new task in `install.yaml`:
+
+        - import_tasks: install_{APP}.yaml
+
+
+## Guidelines
+
+Here are some guidelines for applications:
+
+* App metrics endpoints **must be secured**. Use the [oauth-proxy](https://github.com/openshift/oauth-proxy) or [kube-rbac-proxy](https://github.com/brancz/kube-rbac-proxy).
+* Templates should be usable outside the Ansible role (e.g. directly via `oc process`); avoid Jinjia templates if possible.
+* As with all other Ansible roles in the installer, app tasks must be idempotent.
+* Minimize configuration, be opinionated.
+
+# License
+
+Apache License, Version 2.0

+ 16 - 0
roles/openshift_monitor_availability/defaults/main.yml

@@ -0,0 +1,16 @@
+---
+openshift_monitor_app_create_images:
+  origin:
+    prefix: "quay.io/redhat/openshift-monitor-project-lifecycle"
+    version: "0.0.1"
+  openshift-enterprise:
+    prefix: "quay.io/redhat/openshift-monitor-project-lifecycle"
+    version: "0.0.1"
+
+openshift_monitor_app_create_image_prefix: "{{ openshift_monitor_app_create_images[openshift_deployment_type]['prefix'] }}"
+openshift_monitor_app_create_image_version: "{{ openshift_monitor_app_create_images[openshift_deployment_type]['version'] }}"
+openshift_monitor_app_create_image: "{{ openshift_monitor_app_create_image_prefix }}:{{ openshift_monitor_app_create_image_version }}"
+
+openshift_monitor_app_create_run_interval: 5m
+openshift_monitor_app_create_timeout: 5m
+openshift_monitor_app_create_log_level: 0

+ 170 - 0
roles/openshift_monitor_availability/files/monitor-app-create.yaml

@@ -0,0 +1,170 @@
+---
+apiVersion: template.openshift.io/v1
+kind: Template
+metadata:
+  name: openshift-monitor-app-create
+  annotations:
+    openshift.io/display-name: OpenShift App Create Availability Monitor
+    description: Measures app create operation availability.
+    iconClass: icon-openshift
+    tags: openshift,infra,monitoring
+    openshift.io/documentation-url: https://github.com/openshift/monitor-project-lifecycle
+    openshift.io/support-url: https://access.redhat.com
+openshift.io/provider-display-name: Red Hat, Inc.
+parameters:
+- name: IMAGE
+  description: The application container image to use.
+  required: true
+- name: LOG_LEVEL
+  value: "0"
+  description: Application logging level.
+- name: RUN_INTERVAL
+  value: "5m"
+  description: How often to run the measurement loop, as a duration string.
+- name: TIMEOUT
+  value: "5m"
+  description: How long to wait for the test app to become available before giving up.
+- name: NAMESPACE
+  # This namespace cannot be changed.
+  value: openshift-monitor-availability
+objects:
+- apiVersion: rbac.authorization.k8s.io/v1beta1
+  kind: ClusterRole
+  metadata: {name: monitor-app-create}
+  rules:
+  # These are for the auth proxy.
+  - apiGroups: ["authentication.k8s.io"]
+    resources:
+    - tokenreviews
+    verbs: ["create"]
+  - apiGroups: ["authorization.k8s.io"]
+    resources:
+    - subjectaccessreviews
+    verbs: ["create"]
+  # These are for the app itself.
+  - apiGroups: [project.openshift.io]
+    resources: [projects, projectrequests]
+    verbs: ['*']
+
+- apiVersion: v1
+  kind: ServiceAccount
+  metadata:
+    name: monitor-app-create
+    namespace: ${NAMESPACE}
+
+- apiVersion: rbac.authorization.k8s.io/v1beta1
+  kind: ClusterRoleBinding
+  metadata:
+    name: monitor-app-create
+  roleRef:
+    apiGroup: rbac.authorization.k8s.io
+    kind: ClusterRole
+    name: monitor-app-create
+  subjects:
+  - kind: ServiceAccount
+    name: monitor-app-create
+    namespace: ${NAMESPACE}
+
+- apiVersion: v1
+  kind: Service
+  metadata:
+    name: monitor-app-create
+    namespace: ${NAMESPACE}
+    labels:
+      k8s-app: monitor-app-create
+    annotations:
+      service.alpha.openshift.io/serving-cert-secret-name: monitor-app-create-tls
+  spec:
+    ports:
+    - name: http-metrics
+      port: 443
+      protocol: TCP
+      targetPort: https
+    selector:
+      k8s-app: monitor-app-create
+
+- apiVersion: v1
+  kind: ConfigMap
+  metadata:
+    name: monitor-app-create
+    namespace: ${NAMESPACE}
+    labels:
+      k8s-app: monitor-app-create
+  data:
+    config.yaml: |
+      listenAddress: "127.0.0.1:8080"
+      runInterval: "${RUN_INTERVAL}"
+      availabilityTimeout: "${TIMEOUT}"
+      template:
+        namespace: openshift
+        name: django-psql-persistent
+        availabilityRoute: django-psql-persistent
+        parameters: # Empty, use template defaults
+
+- apiVersion: extensions/v1beta1
+  kind: Deployment
+  metadata:
+    name: monitor-app-create
+    namespace: ${NAMESPACE}
+    labels:
+      k8s-app: monitor-app-create
+  spec:
+    replicas: 1
+    selector:
+      matchLabels:
+        k8s-app: monitor-app-create
+    template:
+      metadata:
+        labels:
+          k8s-app: monitor-app-create
+      spec:
+        serviceAccountName: monitor-app-create
+        volumes:
+        - name: config
+          configMap:
+            name: monitor-app-create
+        - name: tls
+          secret:
+            secretName: monitor-app-create-tls
+        containers:
+        - name: monitor-app-create
+          image: ${IMAGE}
+          command:
+          - "/usr/bin/monitor"
+          - "run"
+          - "--alsologtostderr"
+          - "--v"
+          - "${LOG_LEVEL}"
+          - "--config"
+          - "/etc/monitor-app-create/config.yaml"
+          volumeMounts:
+          - name: config
+            mountPath: /etc/monitor-app-create
+          resources:
+            limits:
+              cpu: 20m
+              memory: 50Mi
+            requests:
+              cpu: 20m
+              memory: 50Mi
+        - name: kube-rbac-proxy
+          image: quay.io/coreos/kube-rbac-proxy:v0.3.0
+          args:
+          - "--secure-listen-address=:8081"
+          - "--upstream=http://127.0.0.1:8080/"
+          - "--tls-cert-file=/etc/tls/private/tls.crt"
+          - "--tls-private-key-file=/etc/tls/private/tls.key"
+          ports:
+          - name: https
+            containerPort: 8081
+            protocol: TCP
+          resources:
+            requests:
+              memory: 20Mi
+              cpu: 10m
+            limits:
+              memory: 40Mi
+              cpu: 20m
+          volumeMounts:
+          - mountPath: /etc/tls/private
+            name: tls

+ 23 - 0
roles/openshift_monitor_availability/meta/main.yaml

@@ -0,0 +1,23 @@
+---
+galaxy_info:
+  author: OpenShift Development <dev@lists.openshift.redhat.com>
+  description: Deploy OpenShift availability monitoring.
+  company: Red Hat, Inc.
+  license: Apache License, Version 2.0
+  min_ansible_version: 2.4
+  platforms:
+  - name: EL
+    versions:
+    - 7
+  - name: Fedora
+    versions:
+    - all
+  categories:
+  - openshift
+  - monitoring
+  - prometheus
+  - operator
+dependencies:
+- role: lib_openshift
+- role: lib_utils
+- role: openshift_facts

+ 8 - 0
roles/openshift_monitor_availability/tasks/install.yaml

@@ -0,0 +1,8 @@
+---
+- name: Ensure openshift-monitor-availability project
+  oc_project:
+    state: present
+    name: openshift-monitor-availability
+    description: Openshift availability monitoring applications.
+
+- import_tasks: install_monitor_app_create.yaml

+ 31 - 0
roles/openshift_monitor_availability/tasks/install_monitor_app_create.yaml

@@ -0,0 +1,31 @@
+---
+- name: Create temp directory for doing work in on target
+  command: mktemp -td openshift-monitor-app-create-ansible-XXXXXX
+  register: mktemp
+  changed_when: False
+
+- name: Copy files to temp directory
+  copy:
+    src: monitor-app-create.yaml
+    dest: "{{ mktemp.stdout }}/monitor-app-create.yaml"
+
+- name: Copy admin client config
+  copy:
+    src: "{{ openshift.common.config_base }}/master/admin.kubeconfig"
+    dest: "{{ mktemp.stdout }}/admin.kubeconfig"
+    remote_src: yes
+
+- name: Apply the app template
+  shell: >
+    {{ openshift_client_binary }} process -f "{{ mktemp.stdout }}/monitor-app-create.yaml"
+    --param IMAGE="{{ openshift_monitor_app_create_image }}"
+    --param RUN_INTERVAL="{{ openshift_monitor_app_create_run_interval }}"
+    --param TIMEOUT="{{ openshift_monitor_app_create_timeout }}"
+    --param LOG_LEVEL="{{ openshift_monitor_app_create_log_level }}"
+    | {{ openshift_client_binary }} apply --config={{ mktemp.stdout }}/admin.kubeconfig -f -
+
+- name: Delete temp directory
+  file:
+    name: "{{ mktemp.stdout }}"
+    state: absent
+  changed_when: False

+ 6 - 0
roles/openshift_monitor_availability/tasks/main.yaml

@@ -0,0 +1,6 @@
+---
+- include_tasks: install.yaml
+  when: openshift_monitor_availability_install | default(false) | bool
+
+- include_tasks: remove.yaml
+  when: not openshift_monitor_availability_install | default(false) | bool

+ 5 - 0
roles/openshift_monitor_availability/tasks/remove.yaml

@@ -0,0 +1,5 @@
+---
+- name: Remove availability monitoring
+  oc_project:
+    name: openshift-monitor-availability
+    state: absent

+ 6 - 0
roles/openshift_monitor_availability/tasks/upgrade.yaml

@@ -0,0 +1,6 @@
+---
+- include_tasks: install.yaml
+  when: openshift_monitor_availability_install | default(false) | bool
+
+- include_tasks: remove.yaml
+  when: not openshift_monitor_availability_install | default(false) | bool