Procházet zdrojové kódy

Merge pull request #2052 from sdodson/metrics

Metrics improvements
Scott Dodson před 8 roky
rodič
revize
d08a872a03

+ 21 - 2
filter_plugins/oo_filters.py

@@ -17,6 +17,7 @@ import re
 import json
 import yaml
 from ansible.utils.unicode import to_unicode
+from urlparse import urlparse
 
 # Disabling too-many-public-methods, since filter methods are necessarily
 # public
@@ -709,7 +710,7 @@ class FilterModule(object):
                                         fsType=filesystem,
                                         volumeID=volume_id)))
                             persistent_volumes.append(persistent_volume)
-                        elif kind != 'object':
+                        elif not (kind == 'object' or kind == 'dynamic'):
                             msg = "|failed invalid storage kind '{0}' for component '{1}'".format(
                                 kind,
                                 component)
@@ -733,7 +734,8 @@ class FilterModule(object):
                 if 'storage' in hostvars['openshift']['hosted'][component]:
                     kind = hostvars['openshift']['hosted'][component]['storage']['kind']
                     create_pv = hostvars['openshift']['hosted'][component]['storage']['create_pv']
-                    if kind != None and create_pv:
+                    create_pvc = hostvars['openshift']['hosted'][component]['storage']['create_pvc']
+                    if kind != None and create_pv and create_pvc:
                         volume = hostvars['openshift']['hosted'][component]['storage']['volume']['name']
                         size = hostvars['openshift']['hosted'][component]['storage']['volume']['size']
                         access_modes = hostvars['openshift']['hosted'][component]['storage']['access_modes']
@@ -829,6 +831,22 @@ class FilterModule(object):
 
         return version
 
+    @staticmethod
+    def oo_hostname_from_url(url):
+        """ Returns the hostname contained in a URL
+
+            Ex: https://ose3-master.example.com/v1/api -> ose3-master.example.com
+        """
+        if not isinstance(url, basestring):
+            raise errors.AnsibleFilterError("|failed expects a string or unicode")
+        parse_result = urlparse(url)
+        if parse_result.netloc != '':
+            return parse_result.netloc
+        else:
+            # netloc wasn't parsed, assume url was missing scheme and path
+            return parse_result.path
+
+
     def filters(self):
         """ returns a mapping of filters to methods """
         return {
@@ -859,5 +877,6 @@ class FilterModule(object):
             "oo_get_hosts_from_hostvars": self.oo_get_hosts_from_hostvars,
             "oo_image_tag_to_rpm_version": self.oo_image_tag_to_rpm_version,
             "oo_merge_dicts": self.oo_merge_dicts,
+            "oo_hostname_from_url": self.oo_hostname_from_url,
             "oo_merge_hostvars": self.oo_merge_hostvars,
         }

+ 24 - 0
inventory/byo/hosts.ose.example

@@ -325,6 +325,30 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true',
 #openshift_hosted_registry_storage_s3_chunksize=26214400
 #openshift_hosted_registry_pullthrough=true
 
+# Metrics Storage Options
+#
+# NFS Host Group
+# An NFS volume will be created with path "nfs_directory/volume_name"
+# on the host within the [nfs] host group.  For example, the volume
+# path using these options would be "/exports/metrics"
+#openshift_hosted_metrics_storage_kind=nfs
+#openshift_hosted_metrics_storage_access_modes=['ReadWriteOnce']
+#openshift_hosted_metrics_storage_nfs_directory=/exports
+#openshift_hosted_metrics_storage_nfs_options='*(rw,root_squash)'
+#openshift_hosted_metrics_storage_volume_name=metrics
+#openshift_hosted_metrics_storage_volume_size=10Gi
+#
+# External NFS Host
+# NFS volume must already exist with path "nfs_directory/_volume_name" on
+# the storage_host. For example, the remote volume path using these
+# options would be "nfs.example.com:/exports/metrics"
+#openshift_hosted_metrics_storage_kind=nfs
+#openshift_hosted_metrics_storage_access_modes=['ReadWriteOnce']
+#openshift_hosted_metrics_storage_host=nfs.example.com
+#openshift_hosted_metrics_storage_nfs_directory=/exports
+#openshift_hosted_metrics_storage_volume_name=metrics
+#openshift_hosted_metrics_storage_volume_size=10Gi
+
 # Configure the multi-tenant SDN plugin (default is 'redhat/openshift-ovs-subnet')
 # os_sdn_network_plugin_name='redhat/openshift-ovs-multitenant'
 

+ 4 - 0
playbooks/byo/openshift-cluster/config.yml

@@ -5,6 +5,8 @@
   connection: local
   become: no
   gather_facts: no
+  tags:
+  - always
   tasks:
   - include_vars: ../../byo/openshift-cluster/cluster_hosts.yml
   - add_host:
@@ -14,6 +16,8 @@
 
 - hosts: l_oo_all_hosts
   gather_facts: no
+  tags:
+  - always
   tasks:
   - include_vars: ../../byo/openshift-cluster/cluster_hosts.yml
 

+ 22 - 0
playbooks/common/openshift-cluster/config.yml

@@ -1,14 +1,22 @@
 ---
 - include: evaluate_groups.yml
+  tags:
+  - always
 
 - include: initialize_facts.yml
+  tags:
+  - always
 
 - include: validate_hostnames.yml
+  tags:
+  - node
 
 - include: initialize_openshift_version.yml
 
 - name: Set oo_options
   hosts: oo_all_hosts
+  tags:
+  - always
   tasks:
   - set_fact:
       openshift_docker_additional_registries: "{{ lookup('oo_option', 'docker_additional_registries') }}"
@@ -30,15 +38,29 @@
     when: openshift_docker_log_options is not defined
 
 - include: ../openshift-etcd/config.yml
+  tags:
+  - etcd
 
 - include: ../openshift-nfs/config.yml
+  tags:
+  - nfs
 
 - include: ../openshift-loadbalancer/config.yml
+  tags:
+  - loadbalancer
 
 - include: ../openshift-master/config.yml
+  tags:
+  - master
 
 - include: additional_config.yml
+  tags:
+  - master
 
 - include: ../openshift-node/config.yml
+  tags:
+  - node
 
 - include: openshift_hosted.yml
+  tags:
+  - hosted

+ 5 - 1
playbooks/common/openshift-cluster/openshift_hosted.yml

@@ -1,5 +1,7 @@
 - name: Create persistent volumes
   hosts: oo_first_master
+  tags:
+  - hosted
   vars:
     persistent_volumes: "{{ hostvars[groups.oo_first_master.0] | oo_persistent_volumes(groups) }}"
     persistent_volume_claims: "{{ hostvars[groups.oo_first_master.0] | oo_persistent_volume_claims }}"
@@ -9,6 +11,8 @@
 
 - name: Create Hosted Resources
   hosts: oo_first_master
+  tags:
+  - hosted
   pre_tasks:
   - set_fact:
       openshift_hosted_router_registryurl: "{{ hostvars[groups.oo_first_master.0].openshift.master.registry_url }}"
@@ -36,6 +40,6 @@
     openshift_serviceaccounts_sccs:
     - privileged
     when: not openshift.common.version_gte_3_2_or_1_2
+  - role: openshift_hosted
   - role: openshift_metrics
     when: openshift.hosted.metrics.deploy | bool
-  - role: openshift_hosted

+ 6 - 0
playbooks/common/openshift-master/config.yml

@@ -48,6 +48,12 @@
   - set_fact:
       openshift_hosted_metrics_resolution: "{{ lookup('oo_option', 'openshift_hosted_metrics_resolution') | default('10s', true) }}"
     when: openshift_hosted_metrics_resolution is not defined
+  - set_fact:
+      openshift_hosted_metrics_deployer_prefix: "{{ lookup('oo_option', 'openshift_hosted_metrics_deployer_prefix') | default('openshift') }}"
+    when: openshift_hosted_metrics_deployer_prefix is not defined
+  - set_fact:
+      openshift_hosted_metrics_deployer_version: "{{ lookup('oo_option', 'openshift_hosted_metrics_deployer_version') | default('latest') }}"
+    when: openshift_hosted_metrics_deployer_prefix is not defined
   roles:
   - openshift_facts
   post_tasks:

+ 34 - 4
roles/openshift_examples/files/examples/v1.3/infrastructure-templates/enterprise/metrics-deployer.yaml

@@ -34,9 +34,11 @@ objects:
   metadata:
     generateName: metrics-deployer-
   spec:
+    securityContext: {}
     containers:
     - image: ${IMAGE_PREFIX}metrics-deployer:${IMAGE_VERSION}
       name: deployer
+      securityContext: {}
       volumeMounts:
       - name: secret
         mountPath: /secret
@@ -48,6 +50,10 @@ objects:
           valueFrom:
             fieldRef:
               fieldPath: metadata.namespace
+        - name: POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
         - name: IMAGE_PREFIX
           value: ${IMAGE_PREFIX}
         - name: IMAGE_VERSION
@@ -58,8 +64,12 @@ objects:
           value: ${MODE}
         - name: REDEPLOY
           value: ${REDEPLOY}
+        - name: IGNORE_PREFLIGHT
+          value: ${IGNORE_PREFLIGHT}
         - name: USE_PERSISTENT_STORAGE
           value: ${USE_PERSISTENT_STORAGE}
+        - name: DYNAMICALLY_PROVISION_STORAGE
+          value: ${DYNAMICALLY_PROVISION_STORAGE}
         - name: HAWKULAR_METRICS_HOSTNAME
           value: ${HAWKULAR_METRICS_HOSTNAME}
         - name: CASSANDRA_NODES
@@ -68,6 +78,10 @@ objects:
           value: ${CASSANDRA_PV_SIZE}
         - name: METRIC_DURATION
           value: ${METRIC_DURATION}
+        - name: USER_WRITE_ACCESS
+          value: ${USER_WRITE_ACCESS}
+        - name: HEAPSTER_NODE_ID
+          value: ${HEAPSTER_NODE_ID}
         - name: METRIC_RESOLUTION
           value: ${METRIC_RESOLUTION}
     dnsPolicy: ClusterFirst
@@ -87,7 +101,7 @@ parameters:
 -
   description: 'Specify version for metrics components; e.g. for "openshift/origin-metrics-deployer:latest", set version "latest"'
   name: IMAGE_VERSION
-  value: "3.2.1"
+  value: "3.3.0"
 -
   description: "Internal URL for the master, for authentication retrieval"
   name: MASTER_URL
@@ -97,7 +111,7 @@ parameters:
   name: HAWKULAR_METRICS_HOSTNAME
   required: true
 -
-  description: "Can be set to: 'deploy' to perform an initial deployment; 'refresh' to delete and redeploy all components but to keep persisted data and routes; 'redeploy' to delete and redeploy everything (losing all data in the process)"
+  description: "Can be set to: 'preflight' to perform validation before a deployment; 'deploy' to perform an initial deployment; 'refresh' to delete and redeploy all components but to keep persisted data and routes; 'redeploy' to delete and redeploy everything (losing all data in the process); 'validate' to re-run validations after a deployment"
   name: MODE
   value: "deploy"
 -
@@ -105,10 +119,18 @@ parameters:
   name: REDEPLOY
   value: "false"
 -
+  description: "If preflight validation is blocking deployment and you're sure you don't care about it, this will ignore the results and proceed to deploy."
+  name: IGNORE_PREFLIGHT
+  value: "false"
+-
   description: "Set to true for persistent storage, set to false to use non persistent storage"
   name: USE_PERSISTENT_STORAGE
   value: "true"
 -
+  description: "Set to true to dynamically provision storage, set to false to use use pre-created persistent volumes"
+  name: DYNAMICALLY_PROVISION_STORAGE
+  value: "false"
+-
   description: "The number of Cassandra Nodes to deploy for the initial cluster"
   name: CASSANDRA_NODES
   value: "1"
@@ -121,6 +143,14 @@ parameters:
   name: METRIC_DURATION
   value: "7"
 -
-  description: "How often metrics should be gathered. Defaults value of '10s' for 10 seconds"
+  description: "If a user accounts should be allowed to write metrics."
+  name: USER_WRITE_ACCESS
+  value: "false"
+-
+  description: "The identifier used when generating metric ids in Hawkular"
+  name: HEAPSTER_NODE_ID
+  value: "nodename"
+-
+  description: "How often metrics should be gathered. Defaults value of '15s' for 15 seconds"
   name: METRIC_RESOLUTION
-  value: "10s"
+  value: "15s"

+ 2 - 2
roles/openshift_examples/files/examples/v1.3/infrastructure-templates/origin/metrics-deployer.yaml

@@ -151,6 +151,6 @@ parameters:
   name: HEAPSTER_NODE_ID
   value: "nodename"
 -
-  description: "How often metrics should be gathered. Defaults value of '10s' for 10 seconds"
+  description: "How often metrics should be gathered. Defaults value of '15s' for 15 seconds"
   name: METRIC_RESOLUTION
-  value: "10s"
+  value: "15s"

+ 13 - 3
roles/openshift_facts/library/openshift_facts.py

@@ -477,6 +477,14 @@ def set_selectors(facts):
         facts['hosted']['registry'] = {}
     if 'selector' not in facts['hosted']['registry'] or facts['hosted']['registry']['selector'] in [None, 'None']:
         facts['hosted']['registry']['selector'] = selector
+    if 'metrics' not in facts['hosted']:
+        facts['hosted']['metrics'] = {}
+    if 'selector' not in facts['hosted']['metrics'] or facts['hosted']['metrics']['selector'] in [None, 'None']:
+        facts['hosted']['metrics']['selector'] = None
+    if 'logging' not in facts['hosted']:
+        facts['hosted']['logging'] = {}
+    if 'selector' not in facts['hosted']['logging'] or facts['hosted']['logging']['selector'] in [None, 'None']:
+        facts['hosted']['logging']['selector'] = None
 
     return facts
 
@@ -1791,8 +1799,9 @@ class OpenShiftFacts(object):
                             filesystem='ext4',
                             volumeID='123'),
                         host=None,
-                        access_modes=['ReadWriteMany'],
-                        create_pv=True
+                        access_modes=['ReadWriteOnce'],
+                        create_pv=True,
+                        create_pvc=False
                     )
                 ),
                 registry=dict(
@@ -1807,7 +1816,8 @@ class OpenShiftFacts(object):
                             options='*(rw,root_squash)'),
                         host=None,
                         access_modes=['ReadWriteMany'],
-                        create_pv=True
+                        create_pv=True,
+                        create_pvc=True
                     )
                 ),
                 router=dict()

+ 2 - 1
roles/openshift_hosted/tasks/registry/storage/persistent_volume.yml

@@ -5,10 +5,11 @@
 - name: Determine if volume is already attached to dc/docker-registry
   command: "{{ openshift.common.client_binary }} get -o template dc/docker-registry --template=\\{\\{.spec.template.spec.volumes\\}\\} --output-version=v1"
   changed_when: false
+  failed_when: false
   register: registry_volumes_output
 
 - set_fact:
-    volume_attached: "{{ registry_volume_claim in registry_volumes_output.stdout }}"
+    volume_attached: "{{ registry_volume_claim in (registry_volumes_output).stdout | default(['']) }}"
 
 - name: Add volume to dc/docker-registry
   command: >

+ 31 - 0
roles/openshift_metrics/handlers/main.yml

@@ -0,0 +1,31 @@
+---
+- name: restart master
+  service: name={{ openshift.common.service_type }}-master state=restarted
+  when: (openshift.master.ha is not defined or not openshift.master.ha | bool) and (not (master_service_status_changed | default(false) | bool))
+  notify: Verify API Server
+
+- name: restart master api
+  service: name={{ openshift.common.service_type }}-master-api state=restarted
+  when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_api_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
+  notify: Verify API Server
+
+- name: restart master controllers
+  service: name={{ openshift.common.service_type }}-master-controllers state=restarted
+  when: (openshift.master.ha is defined and openshift.master.ha | bool) and (not (master_controllers_service_status_changed | default(false) | bool)) and openshift.master.cluster_method == 'native'
+
+- name: Verify API Server
+  # Using curl here since the uri module requires python-httplib2 and
+  # wait_for port doesn't provide health information.
+  command: >
+    curl --silent
+    {% if openshift.common.version_gte_3_2_or_1_2 | bool %}
+    --cacert {{ openshift.common.config_base }}/master/ca-bundle.crt
+    {% else %}
+    --cacert {{ openshift.common.config_base }}/master/ca.crt
+    {% endif %}
+    {{ openshift.master.api_url }}/healthz/ready
+  register: api_available_output
+  until: api_available_output.stdout == 'ok'
+  retries: 120
+  delay: 1
+  changed_when: false

+ 114 - 0
roles/openshift_metrics/tasks/install.yml

@@ -0,0 +1,114 @@
+---
+
+- name: Test if metrics-deployer service account exists
+  command: >
+    {{ openshift.common.client_binary }}
+    --config={{ openshift_metrics_kubeconfig }}
+    --namespace=openshift-infra
+    get serviceaccount metrics-deployer -o json
+  register: serviceaccount
+  changed_when: false
+  failed_when: false
+
+- name: Create metrics-deployer Service Account
+  shell: >
+    echo {{ metrics_deployer_sa | to_json | quote }} |
+    {{ openshift.common.client_binary }}
+    --config={{ openshift_metrics_kubeconfig }}
+    --namespace openshift-infra
+    create -f -
+  when: serviceaccount.rc == 1
+
+- name: Test edit permissions
+  command: >
+    {{ openshift.common.client_binary }}
+    --config={{ openshift_metrics_kubeconfig }}
+    --namespace openshift-infra
+    get rolebindings -o jsonpath='{.items[?(@.metadata.name == "edit")].userNames}'
+  register: edit_rolebindings
+  changed_when: false
+
+- name: Add edit permission to the openshift-infra project to metrics-deployer SA
+  command: >
+    {{ openshift.common.admin_binary }}
+    --config={{ openshift_metrics_kubeconfig }}
+    --namespace openshift-infra
+    policy add-role-to-user edit
+    system:serviceaccount:openshift-infra:metrics-deployer
+  when: "'system:serviceaccount:openshift-infra:metrics-deployer' not in edit_rolebindings.stdout"
+
+- name: Test cluster-reader permissions
+  command: >
+    {{ openshift.common.client_binary }}
+    --config={{ openshift_metrics_kubeconfig }}
+    --namespace openshift-infra
+    get clusterrolebindings -o jsonpath='{.items[?(@.metadata.name == "cluster-reader")].userNames}'
+  register: cluster_reader_clusterrolebindings
+  changed_when: false
+
+- name: Add cluster-reader permission to the openshift-infra project to heapster SA
+  command: >
+    {{ openshift.common.admin_binary }}
+    --config={{ openshift_metrics_kubeconfig }}
+    --namespace openshift-infra
+    policy add-cluster-role-to-user cluster-reader
+    system:serviceaccount:openshift-infra:heapster
+  when: "'system:serviceaccount:openshift-infra:heapster' not in cluster_reader_clusterrolebindings.stdout"
+
+- name: Create metrics-deployer secret
+  command: >
+    {{ openshift.common.client_binary }}
+    --config={{ openshift_metrics_kubeconfig }}
+    --namespace openshift-infra
+    secrets new metrics-deployer nothing=/dev/null
+  register: metrics_deployer_secret
+  changed_when: metrics_deployer_secret.rc == 0
+  failed_when: "metrics_deployer_secret.rc == 1 and 'already exists' not in metrics_deployer_secret.stderr"
+
+# TODO: extend this to allow user passed in certs or generating cert with
+# OpenShift CA
+- name: Build metrics deployer command
+  set_fact:
+    deployer_cmd: "{{ openshift.common.client_binary }} process -f \
+      {{ metrics_template_dir }}/metrics-deployer.yaml -v \
+      HAWKULAR_METRICS_HOSTNAME={{ metrics_hostname }},USE_PERSISTENT_STORAGE={{metrics_persistence | string | lower }},METRIC_DURATION={{ openshift.hosted.metrics.duration }},METRIC_RESOLUTION={{ openshift.hosted.metrics.resolution }}{{ image_prefix }}{{ image_version }},MODE={{ deployment_mode }} \
+        | {{ openshift.common.client_binary }} --namespace openshift-infra \
+        --config={{ openshift_metrics_kubeconfig }} \
+        create -f -"
+
+- name: Deploy Metrics
+  shell: "{{ deployer_cmd }}"
+  register: deploy_metrics
+  failed_when: "'already exists' not in deploy_metrics.stderr and deploy_metrics.rc != 0"
+  changed_when: deploy_metrics.rc == 0
+
+- set_fact:
+    deployer_pod: "{{ deploy_metrics.stdout[1:2] }}"
+
+# TODO: re-enable this once the metrics deployer validation issue is fixed
+# when using dynamically provisioned volumes
+- name: "Wait for image pull and deployer pod"
+  shell: >
+    {{ openshift.common.client_binary }}
+    --namespace openshift-infra
+    --config={{ openshift_metrics_kubeconfig }}
+    get {{ deploy_metrics.stdout }}
+  register: deploy_result
+  until: "{{ 'Completed' in deploy_result.stdout }}"
+  failed_when: "{{ 'Completed' not in deploy_result.stdout }}"
+  retries: 60
+  delay: 10
+
+- name: Configure master for metrics
+  modify_yaml:
+    dest: "{{ openshift.common.config_base }}/master/master-config.yaml"
+    yaml_key: assetConfig.metricsPublicURL
+    yaml_value: "https://{{ metrics_hostname }}/hawkular/metrics"
+  notify: restart master
+
+- name: Store metrics public_url
+  openshift_facts:
+    role: master
+    local_facts:
+      metrics_public_url: "https://{{ metrics_hostname }}/hawkular/metrics"
+  when: deploy_result | changed

+ 75 - 51
roles/openshift_metrics/tasks/main.yaml

@@ -1,64 +1,88 @@
 ---
-- name: Copy Configuration to temporary conf
-  command: >
-    cp {{ openshift.common.config_base }}/master/admin.kubeconfig {{hawkular_tmp_conf}}
-  changed_when: false
+- fail:
+    msg: This role required openshift_master_default_subdomain or openshift_master_metrics_url be set
+  when: openshift.master.metrics_public_url | default(openshift_master_metrics_public_url | default(openshift.master.default_subdomain | default(openshift_master_default_subdomain | default(none)))) is none
 
-- name: Create metrics-deployer Service Account
-  shell: >
-    echo {{ deployer_service_account | to_json | quote }} |
-    {{ openshift.common.client_binary }} create
-    -n openshift-infra
-    --config={{hawkular_tmp_conf}}
-    -f -
-  register: deployer_create_service_account
-  failed_when: "'already exists' not in deployer_create_service_account.stderr and deployer_create_service_account.rc != 0"
-  changed_when: deployer_create_service_account.rc == 0
+- name: Create temp directory for kubeconfig
+  command: mktemp -d /tmp/openshift-ansible-XXXXXX
+  register: mktemp
+  changed_when: False
 
-- name: Create metrics-deployer Secret
-  command: >
-    {{ openshift.common.client_binary }}
-    secrets new metrics-deployer
-    nothing=/dev/null
-    --config={{hawkular_tmp_conf}}
-    -n openshift-infra
-  register: deployer_create_secret
-  failed_when: "'already exists' not in deployer_create_secret.stderr and deployer_create_secret.rc !=0"
-  changed_when: deployer_create_secret.rc == 0
+- name: Record kubeconfig tmp dir
+  set_fact:
+    openshift_metrics_kubeconfig: "{{ mktemp.stdout }}/admin.kubeconfig"
 
-- name: Configure role/user permissions
+- name: Copy the admin client config(s)
   command: >
-    {{ openshift.common.admin_binary }} {{item}}
-    --config={{hawkular_tmp_conf}}
-  with_items: "{{hawkular_permission_oc_commands}}"
-  register: hawkular_perm_task
-  failed_when: "'already exists' not in hawkular_perm_task.stderr and hawkular_perm_task.rc != 0"
-  changed_when: hawkular_perm_task.rc == 0
+    cp {{ openshift_master_config_dir }}/admin.kubeconfig {{ openshift_metrics_kubeconfig }}
+  changed_when: False
+
+- name: Set hosted metrics facts
+  openshift_facts:
+    role: hosted
+    openshift_env: "{{ hostvars
+                       | oo_merge_hostvars(vars, inventory_hostname)
+                       | oo_openshift_env }}"
+    openshift_env_structures:
+    - 'openshift.hosted.metrics.*'
+
+- set_fact:
+    # Prefer the master facts over bare variables if present, prefer
+    # metrics_public_url over creating a default using default_subdomain
+    metrics_hostname: "{{ openshift.hosted.metrics.public_url
+                          | default('hawkular-metrics.' ~ (openshift.master.default_subdomain
+                          | default(openshift_master_default_subdomain )))
+                          | oo_hostname_from_url }}"
+    metrics_persistence: True
+    #"{{ openshift.hosted.metrics.storage_kind | default(none) is not none }}"
+    metrics_dynamic_vol: "{{ openshift.hosted.metrics.storage_kind | default(none) == 'dynamic' }}"
+    metrics_template_dir: "/usr/share/openshift/examples/infrastructure-templates/{{ 'origin' if deployment_type == 'origin' else 'enterprise' }}"
+    cassandra_nodes: "{{ ',CASSANDRA_NODES=' ~ openshift.hosted.metrics.cassandra_nodes if 'cassandra' in openshift.hosted.metrics else '' }}"
+    cassandra_pv_size: "{{ ',CASSANDRA_PV_SIZE=' ~ openshift.hosted.metrics.storage_volume_size if openshift.hosted.metrics.storage_volume_size | default(none) is not none else '' }}"
+    image_prefix: "{{ ',IMAGE_PREFIX=' ~ openshift.hosted.metrics.deployer_prefix if 'deployer_prefix' in openshift.hosted.metrics else '' }}"
+    image_version: "{{ ',IMAGE_VERSION=' ~ openshift.hosted.metrics.deployer_version if 'deployer_version' in openshift.hosted.metrics else '' }}"
 
-- name: Check openshift_master_default_subdomain
-  fail: 
-    msg: "Default subdomain should be defined"
-  when: openshift.master.default_subdomain is not defined
 
-- name: Create Heapster and Hawkular/Cassandra Services
+- name: Check for existing metrics pods
   shell: >
-   {{ openshift.common.client_binary }} process -f \
-   /usr/share/openshift/examples/infrastructure-templates/{{ hawkular_type }}/metrics-deployer.yaml -v \
-    HAWKULAR_METRICS_HOSTNAME=hawkular-metrics.{{ openshift.master.default_subdomain }},USE_PERSISTENT_STORAGE={{ hawkular_persistence }},METRIC_DURATION={{ openshift.hosted.metrics.duration }},METRIC_RESOLUTION={{ openshift.hosted.metrics.resolution }} \
-    | {{ openshift.common.client_binary }} create -n openshift-infra --config={{hawkular_tmp_conf}} -f -
-  register: oex_heapster_services
-  failed_when: "'already exists' not in oex_heapster_services.stderr and oex_heapster_services.rc != 0"
+    {{ openshift.common.client_binary }}
+    --config={{ openshift_metrics_kubeconfig }}
+    --namespace openshift-infra
+    get pods -l {{ item }} | grep -q Running
+  register: metrics_pods_status
+  with_items:
+    - metrics-infra=hawkular-metrics
+    - metrics-infra=heapster
+    - metrics-infra=hawkular-cassandra
+  failed_when: false
   changed_when: false
 
-- name: Clean temporary config file
-  command: >
-    rm -rf {{hawkular_tmp_conf}}
+- name: Check for previous deployer
+  shell: >
+    {{ openshift.common.client_binary }}
+    --config={{ openshift_metrics_kubeconfig }}
+    --namespace openshift-infra
+    get pods -l metrics-infra=deployer --sort-by='{.metadata.creationTimestamp}' | tail -1 | grep metrics-deployer-
+  register: metrics_deployer_status
+  failed_when: false
   changed_when: false
 
-- name: "Wait for image pull and deployer pod"
-  shell:  "{{ openshift.common.client_binary }} get pods -n openshift-infra | grep metrics-deployer.*Completed"
-  register: result
-  until: result.rc == 0
-  retries: 60
-  delay: 10
+- name: Record current deployment status
+  set_fact:
+    greenfield: "{{ not metrics_deployer_status.rc == 0 }}"
+    failed_error: "{{ True if 'Error' in metrics_deployer_status.stdout else False }}"
+    metrics_running: "{{ metrics_pods_status.results | oo_collect(attribute='rc') == [0,0,0] }}"
+
+- name: Set deployment mode
+  set_fact:
+    deployment_mode: "{{ 'refresh' if (failed_error | bool or metrics_upgrade | bool) else 'deploy' }}"
+
+# TODO: handle non greenfield deployments in the future
+- include: install.yml
+  when: greenfield
 
+- name: Delete temp directory
+  file:
+    name: "{{ mktemp.stdout }}"
+    state: absent
+  changed_when: False

+ 8 - 7
roles/openshift_metrics/vars/main.yaml

@@ -2,13 +2,13 @@ hawkular_permission_oc_commands:
     - policy add-role-to-user edit system:serviceaccount:openshift-infra:metrics-deployer -n openshift-infra
     - policy add-cluster-role-to-user cluster-admin system:serviceaccount:openshift-infra:heapster
 
-deployer_service_account:
-    apiVersion: v1
-    kind: ServiceAccount
-    metadata:
-      name: metrics-deployer
-    secrets:
-    - name: metrics-deployer
+metrics_deployer_sa:
+  apiVersion: v1
+  kind: ServiceAccount
+  metadata:
+    name: metrics-deployer
+  secrets:
+  - name: metrics-deployer
 
 
 hawkular_tmp_conf: /tmp/hawkular_admin.kubeconfig
@@ -17,3 +17,4 @@ hawkular_persistence: "{% if openshift.hosted.metrics.storage.kind != None %}tru
 
 hawkular_type: "{{ 'origin' if deployment_type == 'origin' else 'enterprise' }}"
 
+metrics_upgrade: openshift.hosted.metrics.upgrade | default(False)

+ 19 - 3
roles/openshift_storage_nfs/tasks/main.yml

@@ -20,21 +20,37 @@
 
 - name: Ensure export directories exist
   file:
-    path: "{{ openshift.hosted.registry.storage.nfs.directory }}/{{ item }}"
+    path: "{{ item.storage.nfs.directory }}/{{ item.storage.volume.name }}"
     state: directory
     mode: 0777
     owner: nfsnobody
     group: nfsnobody
   with_items:
-  - "{{ openshift.hosted.registry.storage.volume.name }}"
+  - "{{ openshift.hosted.registry }}"
+  - "{{ openshift.hosted.metrics }}"
 
 - name: Configure exports
   template:
-    dest: /etc/exports
+    dest: /etc/exports.d/openshift-ansible.exports
     src: exports.j2
   notify:
   - restart nfs-server
 
+# Now that we're putting our exports in our own file clean up the old ones
+- name: register exports
+  command: cat /etc/exports.d/openshift-ansible.exports
+  register: exports_out
+
+- name: remove exports from /etc/exports
+  lineinfile:
+    dest: /etc/exports
+    line: "{{ item }}"
+    state: absent
+  with_items: "{{ exports_out.stdout_lines | default([]) }}"
+  when: exports_out.rc == 0
+  notify:
+  - restart nfs-server
+
 - name: Enable and start services
   service:
     name: "{{ item }}"

+ 1 - 0
roles/openshift_storage_nfs/templates/exports.j2

@@ -1 +1,2 @@
 {{ openshift.hosted.registry.storage.nfs.directory }}/{{ openshift.hosted.registry.storage.volume.name }} {{ openshift.hosted.registry.storage.nfs.options }}
+{{ openshift.hosted.metrics.storage.nfs.directory }}/{{ openshift.hosted.metrics.storage.volume.name }} {{ openshift.hosted.metrics.storage.nfs.options }}