Pārlūkot izejas kodu

Add auto-heal role and playbooks

This patch adds a new `openshift_autoheal` role that creates the objects
needed by the auto-heal service.

Signed-off-by: Juan Hernandez <jhernand@redhat.com>
Juan Hernandez 7 gadi atpakaļ
vecāks
revīzija
044789f84b

+ 3 - 0
playbooks/common/private/components.yml

@@ -48,3 +48,6 @@
 
 - import_playbook: ../../openshift-node-problem-detector/private/config.yml
   when: openshift_node_problem_detector_install | default(false) | bool
+
+- import_playbook: ../../openshift-autoheal/private/config.yml
+  when: openshift_autoheal_deploy | default(false) | bool

+ 6 - 0
playbooks/openshift-autoheal/README.md

@@ -0,0 +1,6 @@
+# OpenShift Auto-heal Service
+
+This playbook installs the OpenShift Auto-heal service.
+
+For more details see the documentation of the
+[role](../roles/openshift_autoheal) that is used to install the service.

+ 9 - 0
playbooks/openshift-autoheal/config.yml

@@ -0,0 +1,9 @@
+---
+- import_playbook: ../init/main.yml
+  vars:
+    l_init_fact_hosts: "oo_masters_to_config"
+    l_openshift_version_set_hosts: "oo_masters_to_config:!oo_first_master"
+    l_openshift_version_check_hosts: "all:!all"
+    l_sanity_check_hosts: "{{ groups['oo_masters_to_config'] }}"
+
+- import_playbook: private/config.yml

+ 29 - 0
playbooks/openshift-autoheal/private/config.yml

@@ -0,0 +1,29 @@
+---
+- name: Auto-heal Install Checkpoint Start
+  hosts: all
+  gather_facts: false
+  tasks:
+  - name: Set Auto-heal install 'In Progress'
+    run_once: true
+    set_stats:
+      data:
+        installer_phase_autoheal:
+          status: "In Progress"
+          start: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}"
+
+- name: Auto-heal
+  hosts: oo_first_master
+  roles:
+  - role: openshift_autoheal
+
+- name: Auto-heal Install Checkpoint End
+  hosts: all
+  gather_facts: false
+  tasks:
+  - name: Auto-heal install 'Complete'
+    run_once: true
+    set_stats:
+      data:
+        installer_phase_autoheal:
+          status: "Complete"
+          end: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}"

+ 1 - 0
playbooks/openshift-autoheal/private/roles

@@ -0,0 +1 @@
+../../roles

+ 7 - 0
playbooks/openshift-autoheal/private/uninstall.yml

@@ -0,0 +1,7 @@
+---
+- name: Uninstall Auto-heal
+  hosts: oo_first_master
+  tasks:
+  - include_role:
+      name: openshift_autoheal
+      tasks_from: uninstall.yml

+ 7 - 0
playbooks/openshift-autoheal/private/upgrade.yml

@@ -0,0 +1,7 @@
+---
+- name: Upgrade Auto-heal
+  hosts: oo_first_master
+  tasks:
+  - import_role:
+      name: openshift_autoheal
+      tasks_from: upgrade.yml

+ 9 - 0
playbooks/openshift-autoheal/uninstall.yml

@@ -0,0 +1,9 @@
+---
+- import_playbook: ../init/main.yml
+  vars:
+    l_init_fact_hosts: "oo_masters_to_config"
+    l_openshift_version_set_hosts: "oo_masters_to_config:!oo_first_master"
+    l_openshift_version_check_hosts: "all:!all"
+    l_sanity_check_hosts: "{{ groups['oo_masters_to_config'] }}"
+
+- import_playbook: private/uninstall.yml

+ 9 - 0
playbooks/openshift-autoheal/upgrade.yml

@@ -0,0 +1,9 @@
+---
+- import_playbook: ../init/main.yml
+  vars:
+    l_init_fact_hosts: "oo_masters_to_config"
+    l_openshift_version_set_hosts: "oo_masters_to_config:!oo_first_master"
+    l_openshift_version_check_hosts: "all:!all"
+    l_sanity_check_hosts: "{{ groups['oo_masters_to_config'] }}"
+
+- import_playbook: private/upgrade.yml

+ 27 - 0
roles/openshift_autoheal/README.md

@@ -0,0 +1,27 @@
+# OpenShift Auto-heal Service
+
+The OpenShift Auto-heal Service recevies alert notifications from the
+[Prometheus alert manager](https://prometheus.io/docs/alerting/alertmanager) and
+tries to solve the root cause executing Ansible
+[Tower](https://www.ansible.com/products/tower) or
+[AWX](https://github.com/ansible/awx) jobs.
+
+# Installation
+
+See the [installation playbook](../../playbooks/openshift-autoheal) uses the
+following variables:
+
+- `openshift_autoheal_deploy`: `true` - install/update. `false` - uninstall.
+  Defaults to `false`.
+
+- `openshift_autoheal_config`: The content of the configuration of the
+  auto-heal service, as described in the [documentation](https://github.com/openshift/autoheal)
+  of the service and in this [example](https://github.com/openshift/autoheal/blob/master/autoheal.yml).
+
+# Requirements
+
+Ansible 2.4.
+
+## License
+
+Apache license, version 2.0.

+ 34 - 0
roles/openshift_autoheal/defaults/main.yml

@@ -0,0 +1,34 @@
+---
+
+#
+# Image name:
+#
+openshift_autoheal_image_dict:
+  origin:
+    prefix: "docker.io/openshift/"
+    version: v0.0.1
+  openshift-enterprise:
+    prefix: "registry.access.redhat.com/openshift3/ose-"
+    version: "{{ openshift_image_tag }}"
+openshift_autoheal_image_prefix: "{{ openshift_autoheal_image_dict[openshift_deployment_type]['prefix'] }}"
+openshift_autoheal_image_version: "{{ openshift_autoheal_image_dict[openshift_deployment_type]['version'] }}"
+openshift_autoheal_image: "{{ openshift_autoheal_image_prefix }}autoheal:{{ openshift_autoheal_image_version }}"
+
+#
+# Content of the configuration file of the auto-heal service. Note that this is
+# a minimal example configuration. For more details and examples see the
+# documentation of the auto-heal service:
+#
+#   https://github.com/openshift/autoheal
+#
+# In particular see the example configuration file:
+#
+#   https://github.com/openshift/autoheal/blob/master/autoheal.yml
+#
+openshift_autoheal_config: |+
+  awx:
+    address: "https://myawx.example.com/api"
+    credentials:
+      username: "autoheal"
+      password: "..."
+    project: "Auto-heal"

+ 198 - 0
roles/openshift_autoheal/files/template.yml

@@ -0,0 +1,198 @@
+---
+
+apiVersion: v1
+kind: Template
+metadata:
+  name: autoheal-template
+  annotations:
+    description: "Auto-heal service"
+    tags: "autoheal"
+
+parameters:
+- name: IMAGE
+  description: The name of the image.
+- name: CONFIG
+  description: The BASE64 encoded content of the configuration file.
+- name: SECRET
+  description: The BASE64 encoded secret used to encrypt OAuth session cookies.
+
+objects:
+
+- apiVersion: v1
+  kind: ServiceAccount
+  metadata:
+    name: autoheal
+    labels:
+      app: autoheal
+
+- apiVersion: authorization.openshift.io/v1
+  kind: Role
+  metadata:
+    name: autoheal
+    labels:
+      app: autoheal
+  rules:
+  - apiGroups:
+    - ""
+    resources:
+    - secrets
+    resourceNames:
+    - autoheal-config
+    verbs:
+    - get
+
+- apiVersion: authorization.openshift.io/v1
+  kind: ClusterRole
+  metadata:
+    name: autoheal-access
+    labels:
+      app: autoheal
+  rules:
+  - apiGroups:
+    - ""
+    resources:
+    - secrets
+    resourceNames:
+    - autoheal-access-key
+    verbs:
+    - get
+
+- apiVersion: authorization.openshift.io/v1
+  kind: RoleBinding
+  metadata:
+    name: autoheal
+    labels:
+      app: autoheal
+  roleRef:
+    namespace: openshift-autoheal
+    name: autoheal
+  subjects:
+  - kind: ServiceAccount
+    namespace: openshift-autoheal
+    name: autoheal
+
+- apiVersion: authorization.openshift.io/v1
+  kind: RoleBinding
+  metadata:
+    name: alertmanager-autoheal-access
+    labels:
+      app: autoheal
+  roleRef:
+    kind: ClusterRole
+    name: autoheal-access
+  subjects:
+  - kind: ServiceAccount
+    namespace: openshift-monitoring
+    name: alertmanager-main
+
+- apiVersion: authorization.openshift.io/v1
+  kind: ClusterRoleBinding
+  metadata:
+    name: autoheal-auth-delegator
+    labels:
+      app: autoheal
+  roleRef:
+    kind: ClusterRole
+    name: system:auth-delegator
+  subjects:
+  - kind: ServiceAccount
+    namespace: openshift-autoheal
+    name: autoheal
+
+- apiVersion: v1
+  kind: Secret
+  metadata:
+    name: autoheal-config
+    labels:
+      app: autoheal
+  data:
+    autoheal.yml: ${CONFIG}
+
+- apiVersion: v1
+  kind: Secret
+  metadata:
+    name: autoheal-proxy-cookie
+  data:
+    session_secret: ${SECRET}
+
+- apiVersion: apps/v1beta1
+  kind: Deployment
+  metadata:
+    name: autoheal
+    labels:
+      app: autoheal
+  spec:
+    selector:
+      matchLabels:
+        app: autoheal
+    replicas: 1
+    template:
+      metadata:
+        labels:
+          app: autoheal
+      spec:
+        serviceAccountName: autoheal
+        volumes:
+        - name: config
+          secret:
+            secretName: autoheal-config
+        - name: proxy-tls
+          secret:
+            secretName: autoheal-proxy-tls
+        - name: proxy-cookie
+          secret:
+            secretName: autoheal-proxy-cookie
+        containers:
+        - name: proxy
+          image: openshift/oauth-proxy:v1.1.0
+          imagePullPolicy: IfNotPresent
+          volumeMounts:
+          - mountPath: /etc/tls/private
+            name: proxy-tls
+          - mountPath: /etc/proxy/secrets
+            name: proxy-cookie
+          ports:
+          - containerPort: 8443
+            name: public
+          args:
+          - --https-address=:8443
+          - --provider=openshift
+          - --openshift-service-account=autoheal
+          - --upstream=http://localhost:9099
+          - --tls-cert=/etc/tls/private/tls.crt
+          - -email-domain=*
+          - '-openshift-sar={ "resource": "secrets", "verb": "get", "name": "autoheal-access-key", "namespace": "openshift-autoheal" }'
+          - '-openshift-delegate-urls={ "/": { "resource": "secrets", "verb": "get", "name": "autoheal-access-key", "namespace": "openshift-autoheal" } }'
+          - -tls-key=/etc/tls/private/tls.key
+          - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token
+          - -cookie-secret-file=/etc/proxy/secrets/session_secret
+          - -openshift-ca=/etc/pki/tls/cert.pem
+          - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+        - name: receiver
+          image: ${IMAGE}
+          imagePullPolicy: IfNotPresent
+          volumeMounts:
+          - name: config
+            mountPath: /etc/autoheal/config.d
+          command:
+          - /usr/bin/autoheal
+          args:
+          - server
+          - --config-file=/etc/autoheal/config.d
+          - --logtostderr
+
+- apiVersion: v1
+  kind: Service
+  metadata:
+    name: receiver
+    labels:
+      app: autoheal
+    annotations:
+      service.alpha.openshift.io/serving-cert-secret-name: autoheal-proxy-tls
+  spec:
+    selector:
+      app: autoheal
+    ports:
+    - name: autoheal
+      port: 443
+      targetPort: 8443

+ 21 - 0
roles/openshift_autoheal/meta/main.yml

@@ -0,0 +1,21 @@
+---
+galaxy_info:
+  author: OpenShift Development <dev@lists.openshift.redhat.com>
+  description: OpenShift Auto-heal
+  company: Red Hat, Inc.
+  license: license (Apache)
+  min_ansible_version: 2.4
+  platforms:
+  - name: EL
+    versions:
+    - 7
+  - name: Fedora
+    versions:
+    - all
+  categories:
+  - autoheal
+  - monitoring
+  - openshift
+  - prometheus
+dependencies:
+- role: lib_utils

+ 51 - 0
roles/openshift_autoheal/tasks/install.yml

@@ -0,0 +1,51 @@
+---
+
+- name: Create a temporary directory for doing work in the target host
+  command: mktemp -d -p '' openshift-autoheal-XXXXXX
+  register: tmpdir
+  changed_when: False
+
+- name: Copy the template to the temporary directory
+  copy:
+    src: template.yml
+    dest: "{{ tmpdir.stdout }}"
+
+- name: Generate the proxy secret
+  command: openssl rand -base64 32
+  register: secret
+  changed_when: False
+
+- name: Create the namespace
+  oc_obj:
+    kind: Namespace
+    name: openshift-autoheal
+    content:
+      path: "{{ tmpdir.stdout }}/namespace.yml"
+      data:
+        apiVersion: v1
+        kind: Namespace
+        metadata:
+          name: openshift-autoheal
+
+- name: Create the template
+  oc_obj:
+    namespace: openshift-autoheal
+    name: autoheal-template
+    kind: Template
+    files:
+    - "{{ tmpdir.stdout }}/template.yml"
+
+- name: Apply the template
+  oc_process:
+    namespace: openshift-autoheal
+    template_name: autoheal-template
+    params:
+      IMAGE: "{{ openshift_autoheal_image }}"
+      CONFIG: "{{ openshift_autoheal_config | b64encode }}"
+      SECRET: "{{ secret.stdout }}"
+
+- name: Delete the temporary directory
+  file:
+    name: "{{ tmpdir.stdout }}"
+    state: absent
+  changed_when: False

+ 6 - 0
roles/openshift_autoheal/tasks/main.yml

@@ -0,0 +1,6 @@
+---
+- include_tasks: install.yml
+  when: openshift_autoheal_deploy | bool
+
+- include_tasks: uninstall.yml
+  when: not openshift_autoheal_deploy | bool

+ 5 - 0
roles/openshift_autoheal/tasks/uninstall.yml

@@ -0,0 +1,5 @@
+---
+- name: Remove auto-heal namespace
+  oc_project:
+    state: absent
+    name: openshift-autoheal

+ 6 - 0
roles/openshift_autoheal/tasks/upgrade.yml

@@ -0,0 +1,6 @@
+---
+- include_tasks: install.yml
+  when: openshift_autoheal_deploy | bool
+
+- include_tasks: uninstall.yml
+  when: not openshift_autoheal_deploy | bool