Bläddra i källkod

Merge pull request #822 from detiber/upgradeFix2

Upgrade enhancements
Brenton Leanhardt 9 år sedan
förälder
incheckning
4517f7e1b4

+ 188 - 0
playbooks/adhoc/upgrades/files/pre-upgrade-check

@@ -0,0 +1,188 @@
+#!/usr/bin/env python
+"""
+Pre-upgrade checks that must be run on a master before proceeding with upgrade.
+"""
+# This is a script not a python module:
+# pylint: disable=invalid-name
+
+# NOTE: This script should not require any python libs other than what is
+# in the standard library.
+
+__license__ = "ASL 2.0"
+
+import json
+import os
+import subprocess
+import re
+
+# The maximum length of container.ports.name
+ALLOWED_LENGTH = 15
+# The valid structure of container.ports.name
+ALLOWED_CHARS = re.compile('^[a-z0-9][a-z0-9\\-]*[a-z0-9]$')
+AT_LEAST_ONE_LETTER = re.compile('[a-z]')
+# look at OS_PATH for the full path. Default ot 'oc'
+OC_PATH = os.getenv('OC_PATH', 'oc')
+
+
+def validate(value):
+    """
+    validate verifies that value matches required conventions
+
+    Rules of container.ports.name validation:
+
+    * must be less that 16 chars
+    * at least one letter
+    * only a-z0-9-
+    * hyphens can not be leading or trailing or next to each other
+
+    :Parameters:
+       - `value`: Value to validate
+    """
+    if len(value) > ALLOWED_LENGTH:
+        return False
+
+    if '--' in value:
+        return False
+
+    # We search since it can be anywhere
+    if not AT_LEAST_ONE_LETTER.search(value):
+        return False
+
+    # We match because it must start at the beginning
+    if not ALLOWED_CHARS.match(value):
+        return False
+    return True
+
+
+def list_items(kind):
+    """
+    list_items returns a list of items from the api
+
+    :Parameters:
+       - `kind`: Kind of item to access
+    """
+    response = subprocess.check_output([OC_PATH, 'get', '--all-namespaces', '-o', 'json', kind])
+    items = json.loads(response)
+    return items.get("items", [])
+
+
+def get(obj, *paths):
+    """
+    Gets an object
+
+    :Parameters:
+       - `obj`: A dictionary structure
+       - `path`: All other non-keyword arguments
+    """
+    ret_obj = obj
+    for path in paths:
+        if ret_obj.get(path, None) is None:
+            return []
+        ret_obj = ret_obj[path]
+    return ret_obj
+
+
+# pylint: disable=too-many-arguments
+def pretty_print_errors(namespace, kind, item_name, container_name, port_name, valid):
+    """
+    Prints out results in human friendly way.
+
+    :Parameters:
+       - `namespace`: Namespace of the resource
+       - `kind`: Kind of the resource
+       - `item_name`: Name of the resource
+       - `container_name`: Name of the container. May be "" when kind=Service.
+       - `port_name`: Name of the port
+       - `valid`: True if the port is valid
+    """
+    if not valid:
+        if len(container_name) > 0:
+            print('%s/%s -n %s (Container="%s" Port="%s")' % (
+                kind, item_name, namespace, container_name, port_name))
+        else:
+            print('%s/%s -n %s (Port="%s")' % (
+                kind, item_name, namespace, port_name))
+
+
+def print_validation_header():
+    """
+    Prints the error header. Should run on the first error to avoid
+    overwhelming the user.
+    """
+    print """\
+At least one port name does not validate. Valid port names:
+
+    * must be less that 16 chars
+    * have at least one letter
+    * only a-z0-9-
+    * do not start or end with -
+    * Dashes may not be next to eachother ('--')
+"""
+
+
+def main():
+    """
+    main is the main entry point to this script
+    """
+    try:
+        # the comma at the end suppresses the newline
+        print "Checking for oc ...",
+        subprocess.check_output([OC_PATH, 'whoami'])
+        print "found"
+    except:
+        print(
+            'Unable to run "%s whoami"\n'
+            'Please ensure OpenShift is running, and "oc" is on your system '
+            'path.\n'
+            'You can override the path with the OC_PATH environment variable.'
+            % OC_PATH)
+        raise SystemExit(1)
+
+    # Where the magic happens
+    first_error = True
+    for kind, path in [
+            ('replicationcontrollers', ("spec", "template", "spec", "containers")),
+            ('pods', ("spec", "containers")),
+            ('deploymentconfigs', ("spec", "template", "spec", "containers"))]:
+        for item in list_items(kind):
+            namespace = item["metadata"]["namespace"]
+            item_name = item["metadata"]["name"]
+            for container in get(item, *path):
+                container_name = container["name"]
+                for port in get(container, "ports"):
+                    port_name = port.get("name", None)
+                    if not port_name:
+                        # Unnamed ports are OK
+                        continue
+                    valid = validate(port_name)
+                    if not valid and first_error:
+                        first_error = False
+                        print_validation_header()
+                    pretty_print_errors(
+                        namespace, kind, item_name,
+                        container_name, port_name, valid)
+
+    # Services follow a different flow
+    for item in list_items('services'):
+        namespace = item["metadata"]["namespace"]
+        item_name = item["metadata"]["name"]
+        for port in get(item, "spec", "ports"):
+            port_name = port.get("targetPort", None)
+            if isinstance(port_name, int) or port_name is None:
+                # Integer only or unnamed ports are OK
+                continue
+            valid = validate(port_name)
+            if not valid and first_error:
+                first_error = False
+                print_validation_header()
+            pretty_print_errors(
+                namespace, "services", item_name, "", port_name, valid)
+
+    # If we had at least 1 error then exit with 1
+    if not first_error:
+        raise SystemExit(1)
+
+
+if __name__ == '__main__':
+    main()
+

+ 10 - 0
playbooks/adhoc/upgrades/files/versions.sh

@@ -0,0 +1,10 @@
+#!/bin/bash
+
+yum_installed=$(yum list installed "$@" 2>&1 | tail -n +2 | grep -v 'Installed Packages' | grep -v 'Red Hat Subscription Management' | grep -v 'Error:' | awk '{ print $2 }' | tr '\n' ' ')
+
+yum_available=$(yum list available "$@" 2>&1 | tail -n +2 | grep -v 'Available Packages' | grep -v 'Red Hat Subscription Management' | grep -v 'el7ose' | grep -v 'Error:' | awk '{ print $2 }' | tr '\n' ' ')
+
+
+echo "---"
+echo "curr_version: ${yum_installed}" 
+echo "avail_version: ${yum_available}"

+ 79 - 42
playbooks/adhoc/upgrades/library/openshift_upgrade_config.py

@@ -5,11 +5,8 @@
 """Ansible module for modifying OpenShift configs during an upgrade"""
 
 import os
-import shutil
 import yaml
 
-from datetime import datetime
-
 DOCUMENTATION = '''
 ---
 module: openshift_upgrade_config
@@ -20,75 +17,106 @@ requirements: [ ]
 EXAMPLES = '''
 '''
 
-def get_cfg_dir():
-    """Return the correct config directory to use."""
-    cfg_path = '/etc/origin/'
-    if not os.path.exists(cfg_path):
-        cfg_path = '/etc/openshift/'
-    return cfg_path
+def modify_api_levels(level_list, remove, ensure, msg_prepend='',
+                      msg_append=''):
+    """ modify_api_levels """
+    changed = False
+    changes = []
+
+    if not isinstance(remove, list):
+        remove = []
 
+    if not isinstance(ensure, list):
+        ensure = []
+
+    if not isinstance(level_list, list):
+        new_list = []
+        changed = True
+        changes.append("%s created missing %s" % (msg_prepend, msg_append))
+    else:
+        new_list = level_list
+        for level in remove:
+            if level in new_list:
+                new_list.remove(level)
+                changed = True
+                changes.append("%s removed %s %s" % (msg_prepend, level, msg_append))
 
-def upgrade_master_3_0_to_3_1(backup):
+    for level in ensure:
+        if level not in new_list:
+            new_list.append(level)
+            changed = True
+            changes.append("%s added %s %s" % (msg_prepend, level, msg_append))
+
+    return {'new_list': new_list, 'changed': changed, 'changes': changes}
+
+
+def upgrade_master_3_0_to_3_1(ansible_module, config_base, backup):
     """Main upgrade method for 3.0 to 3.1."""
-    changed = False
+    changes = []
 
     # Facts do not get transferred to the hosts where custom modules run,
     # need to make some assumptions here.
-    master_config = os.path.join(get_cfg_dir(), 'master/master-config.yaml')
+    master_config = os.path.join(config_base, 'master/master-config.yaml')
 
     master_cfg_file = open(master_config, 'r')
     config = yaml.safe_load(master_cfg_file.read())
     master_cfg_file.close()
 
-    # Remove v1beta3 from apiLevels:
-    if 'apiLevels' in config and \
-        'v1beta3' in config['apiLevels']:
-        config['apiLevels'].remove('v1beta3')
-        changed = True
-    if 'apiLevels' in config['kubernetesMasterConfig'] and \
-        'v1beta3' in config['kubernetesMasterConfig']['apiLevels']:
-        config['kubernetesMasterConfig']['apiLevels'].remove('v1beta3')
-        changed = True
 
-    # Add the new master proxy client certs:
-    # TODO: re-enable this once these certs are generated during upgrade:
-#    if 'proxyClientInfo' not in config['kubernetesMasterConfig']:
-#        config['kubernetesMasterConfig']['proxyClientInfo'] = {
-#            'certFile': 'master.proxy-client.crt',
-#            'keyFile': 'master.proxy-client.key'
-#       }
+    # Remove unsupported api versions and ensure supported api versions from
+    # master config
+    unsupported_levels = ['v1beta1', 'v1beta2', 'v1beta3']
+    supported_levels = ['v1']
+
+    result = modify_api_levels(config.get('apiLevels'), unsupported_levels,
+                               supported_levels, 'master-config.yaml:', 'from apiLevels')
+    if result['changed']:
+        config['apiLevels'] = result['new_list']
+        changes.append(result['changes'])
+
+    if 'kubernetesMasterConfig' in config and 'apiLevels' in config['kubernetesMasterConfig']:
+        config['kubernetesMasterConfig'].pop('apiLevels')
+        changes.append('master-config.yaml: removed kubernetesMasterConfig.apiLevels')
 
-    if changed:
+    # Add proxyClientInfo to master-config
+    if 'proxyClientInfo' not in config['kubernetesMasterConfig']:
+        config['kubernetesMasterConfig']['proxyClientInfo'] = {
+            'certFile': 'master.proxy-client.crt',
+            'keyFile': 'master.proxy-client.key'
+        }
+        changes.append("master-config.yaml: added proxyClientInfo")
+
+    if len(changes) > 0:
         if backup:
-            timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
-            basedir = os.path.split(master_config)[0]
-            backup_file = os.path.join(basedir, 'master-config.yaml.bak-%s'
-                                       % timestamp)
-            shutil.copyfile(master_config, backup_file)
+            # TODO: Check success:
+            ansible_module.backup_local(master_config)
+
         # Write the modified config:
         out_file = open(master_config, 'w')
         out_file.write(yaml.safe_dump(config, default_flow_style=False))
         out_file.close()
 
-    return changed
+    return changes
 
 
-def upgrade_master(from_version, to_version, backup):
+def upgrade_master(ansible_module, config_base, from_version, to_version, backup):
     """Upgrade entry point."""
     if from_version == '3.0':
         if to_version == '3.1':
-            return upgrade_master_3_0_to_3_1(backup)
+            return upgrade_master_3_0_to_3_1(ansible_module, config_base, backup)
 
 
 def main():
     """ main """
     # disabling pylint errors for global-variable-undefined and invalid-name
     # for 'global module' usage, since it is required to use ansible_facts
-    # pylint: disable=global-variable-undefined, invalid-name
+    # pylint: disable=global-variable-undefined, invalid-name,
+    # redefined-outer-name
     global module
 
     module = AnsibleModule(
         argument_spec=dict(
+            config_base=dict(required=True),
             from_version=dict(required=True, choices=['3.0']),
             to_version=dict(required=True, choices=['3.1']),
             role=dict(required=True, choices=['master']),
@@ -101,12 +129,21 @@ def main():
     to_version = module.params['to_version']
     role = module.params['role']
     backup = module.params['backup']
+    config_base = module.params['config_base']
 
-    changed = False
-    if role == 'master':
-        changed = upgrade_master(from_version, to_version, backup)
+    try:
+        changes = []
+        if role == 'master':
+            changes = upgrade_master(module, config_base, from_version,
+                                     to_version, backup)
+
+        changed = len(changes) > 0
+        return module.exit_json(changed=changed, changes=changes)
 
-    return module.exit_json(changed=changed)
+    # ignore broad-except error to avoid stack trace to ansible user
+    # pylint: disable=broad-except
+    except Exception, e:
+        return module.fail_json(msg=str(e))
 
 # ignore pylint errors related to the module_utils import
 # pylint: disable=redefined-builtin, unused-wildcard-import, wildcard-import

+ 261 - 82
playbooks/adhoc/upgrades/upgrade.yml

@@ -1,35 +1,61 @@
 ---
-- name: Update deployment type
-  hosts: OSEv3
+- name: Load master facts
+  hosts: masters
   roles:
   - openshift_facts
-  post_tasks: # technically tasks are run after roles, but post_tasks is a bit more explicit.
-  - openshift_facts:
-      role: common
-      local_facts:
-        deployment_type: "{{ deployment_type }}"
 
 - name: Verify upgrade can proceed
-  hosts: masters
+  hosts: masters[0]
+  vars:
+    openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
+  gather_facts: no
+  tasks:
+    # Pacemaker is currently the only supported upgrade path for multiple masters
+    - fail:
+        msg: "openshift_master_cluster_method must be set to 'pacemaker'"
+      when: openshift_master_ha | bool and ((openshift_master_cluster_method is not defined) or (openshift_master_cluster_method is defined and openshift_master_cluster_method != "pacemaker"))
+
+- name: Run pre-upgrade checks on first master
+  hosts: masters[0]
+  tasks:
+  # If this script errors out ansible will show the default stdout/stderr
+  # which contains details for the user:
+  - script: files/pre-upgrade-check
+
+- name: Evaluate etcd_hosts
+  hosts: localhost
   tasks:
-  # Checking the global deployment type rather than host facts, this is about
-  # what the user is requesting.
-    - fail: msg="Deployment type enterprise not supported for upgrade"
-      when: deployment_type == "enterprise"
+  - name: Evaluate etcd hosts
+    add_host:
+      name: "{{ groups.masters.0 }}"
+      groups: etcd_hosts
+    when: hostvars[groups.masters.0].openshift.master.embedded_etcd | bool
+  - name: Evaluate etcd hosts
+    add_host:
+      name: "{{ item }}"
+      groups: etcd_hosts
+    with_items: groups.etcd
+    when: not hostvars[groups.masters.0].openshift.master.embedded_etcd | bool
 
 - name: Backup etcd
-  hosts: masters
+  hosts: etcd_hosts
   vars:
     embedded_etcd: "{{ openshift.master.embedded_etcd }}"
     timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}"
   roles:
   - openshift_facts
   tasks:
+
   - stat: path=/var/lib/openshift
     register: var_lib_openshift
+
+  - stat: path=/var/lib/origin
+    register: var_lib_origin
+
   - name: Create origin symlink if necessary
     file: src=/var/lib/openshift/ dest=/var/lib/origin state=link
-    when: var_lib_openshift.stat.exists == True
+    when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False
+
   - name: Check available disk space for etcd backup
     # We assume to be using the data dir for all backups.
     shell: >
@@ -43,62 +69,71 @@
     when: embedded_etcd | bool
 
   - name: Abort if insufficient disk space for etcd backup
-    fail: msg="{{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup, {{ avail_disk.stdout }} Kb available."
+    fail:
+      msg: >
+        {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup,
+        {{ avail_disk.stdout }} Kb available.
     when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int)
+
   - name: Install etcd (for etcdctl)
-    yum: pkg=etcd state=latest
+    yum:
+      pkg: etcd
+      state: latest
+
   - name: Generate etcd backup
-    command: etcdctl backup --data-dir={{ openshift.master.etcd_data_dir }} --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}
+    command: >
+      etcdctl backup --data-dir={{ openshift.master.etcd_data_dir }}
+      --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}
+
   - name: Display location of etcd backup
-    debug: msg="Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}"
+    debug:
+      msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}"
 
-- name: Upgrade base package on masters
-  hosts: masters
+- name: Update deployment type
+  hosts: OSEv3
   roles:
   - openshift_facts
-  vars:
-    openshift_version: "{{ openshift_pkg_version | default('') }}"
-  tasks:
-    - name: Upgrade base package
-      yum: pkg={{ openshift.common.service_type }}{{ openshift_version  }} state=latest
+  post_tasks:
+  - openshift_facts:
+      role: common
+      local_facts:
+        deployment_type: "{{ deployment_type }}"
+
 
-- name: Evaluate oo_first_master
-  hosts: localhost
-  vars:
-    g_masters_group: "{{ 'masters' }}"
-  tasks:
-    - name: Evaluate oo_first_master
-      add_host:
-        name: "{{ groups[g_masters_group][0] }}"
-        groups: oo_first_master
-        ansible_ssh_user: "{{ g_ssh_user | default(omit) }}"
-        ansible_sudo: "{{ g_sudo | default(omit) }}"
-      when: g_masters_group in groups and (groups[g_masters_group] | length) > 0
-
-# TODO: ideally we would check the new version, without installing it. (some
-# kind of yum repoquery? would need to handle openshift -> atomic-openshift
-# package rename)
 - name: Perform upgrade version checking
-  hosts: oo_first_master
+  hosts: masters[0]
   tasks:
-    - name: Determine new version
-      command: >
-        rpm -q --queryformat '%{version}' {{ openshift.common.service_type }}
-      register: _new_version
+  - name: Clean yum cache
+    command: yum clean all
 
-- name: Ensure AOS 3.0.2 or Origin 1.0.6
-  hosts: oo_first_master
-  tasks:
-    fail: This playbook requires Origin 1.0.6 or Atomic OpenShift 3.0.2 or later
-    when: _new_version.stdout | version_compare('1.0.6','<') or ( _new_version.stdout | version_compare('3.0','>=' and _new_version.stdout | version_compare('3.0.2','<') )
+  - name: Determine available versions
+    script: files/versions.sh {{ openshift.common.service_type }} openshift
+    register: g_versions_result
+
+  - set_fact:
+      g_aos_versions: "{{ g_versions_result.stdout | from_yaml }}"
+
+  - set_fact:
+      g_new_version: "{{ g_aos_versions.curr_version.split('-', 1).0 if g_aos_versions.avail_version is none else g_aos_versions.avail_version.split('-', 1).0 }}"
+
+  - fail:
+      msg: This playbook requires Origin 1.0.6 or later
+    when: deployment_type == 'origin' and g_aos_versions.curr_version | version_compare('1.0.6','<')
+
+  - fail:
+      msg: This playbook requires Atomic OpenShift 3.0.2 or later
+    when: deployment_type in ['openshift-enterprise', 'atomic-openshift'] and g_aos_versions.curr_version | version_compare('3.0.2','<')
+
+  - fail:
+      msg: Atomic OpenShift 3.1 packages not found
+    when: deployment_type in ['openshift-enterprise', 'atomic-openshift'] and g_aos_versions.curr_version | version_compare('3.0.2.900','<') and (g_aos_versions.avail_version is none or g_aos_versions.avail_version | version_compare('3.0.2.900','<'))
+  # Deployment type 'enterprise' is no longer valid if we're upgrading to 3.1 or beyond.
+  # (still valid for 3.0.x to 3.0.y however) Using the global deployment_type here as
+  # we're checking what was requested by the upgrade, not the current type on the system.
+  - fail:
+      msg: "Deployment type enterprise not supported for upgrade"
+    when: deployment_type == "enterprise" and  g_aos_versions.curr_version | version_compare('3.1', '>=')
 
-- name: Verify upgrade can proceed
-  hosts: oo_first_master
-  tasks:
-  # Checking the global deployment type rather than host facts, this is about
-  # what the user is requesting.
-  - fail: msg="Deployment type 'enterprise' must be updated to 'openshift-enterprise' for upgrade to proceed"
-    when: deployment_type == "enterprise" and (_new_version.stdout | version_compare('1.0.7', '>=') or _new_version.stdout | version_compare('3.1', '>='))
 
 - name: Upgrade masters
   hosts: masters
@@ -106,16 +141,138 @@
     openshift_version: "{{ openshift_pkg_version | default('') }}"
   tasks:
     - name: Upgrade to latest available kernel
-      yum: pkg=kernel state=latest
-    - name: display just the deployment_type variable for the current host
-      debug:
-        var: hostvars[inventory_hostname]
+      yum:
+        pkg: kernel
+        state: latest
+
     - name: Upgrade master packages
       command: yum update -y {{ openshift.common.service_type }}-master{{ openshift_version }}
-    - name: Upgrade master configuration.
-      openshift_upgrade_config: from_version=3.0 to_version=3.1 role=master
-    - name: Restart master services
-      service: name="{{ openshift.common.service_type}}-master" state=restarted
+
+    - name: Ensure python-yaml present for config upgrade
+      yum:
+        pkg: python-yaml
+        state: installed
+
+    - name: Upgrade master configuration
+      openshift_upgrade_config:
+        from_version: '3.0'
+        to_version: '3.1'
+        role: master
+        config_base: "{{ hostvars[inventory_hostname].openshift.common.config_base }}"
+        when: deployment_type in ['openshift-enterprise', 'atomic-enterprise'] and g_aos_versions.curr_version | version_compare('3.1', '>=')
+
+    - set_fact:
+        master_certs_missing: True
+        master_cert_subdir: master-{{ openshift.common.hostname }}
+        master_cert_config_dir: "{{ openshift.common.config_base }}/master"
+
+- name: Create temp directory for syncing certs
+  hosts: localhost
+  gather_facts: no
+  tasks:
+  - name: Create local temp directory for syncing certs
+    local_action: command mktemp -d /tmp/openshift-ansible-XXXXXXX
+    register: g_master_mktemp
+    changed_when: False
+
+- name: Generate missing master certificates
+  hosts: masters[0]
+  vars:
+    master_hostnames: "{{ hostvars
+                          | oo_select_keys(groups.masters)
+                          | oo_collect('openshift.common.all_hostnames')
+                          | oo_flatten | unique }}"
+    master_generated_certs_dir: "{{ openshift.common.config_base }}/generated-configs"
+    masters_needing_certs: "{{ hostvars
+                               | oo_select_keys(groups.masters)
+                               | difference([groups.masters.0]) }}"
+    sync_tmpdir: "{{ hostvars.localhost.g_master_mktemp.stdout }}"
+    openshift_deployment_type: "{{ deployment_type }}"
+  roles:
+  - openshift_master_certificates
+  post_tasks:
+  - name: Remove generated etcd client certs when using external etcd
+    file:
+      path: "{{ master_generated_certs_dir }}/{{ item.0.master_cert_subdir }}/{{ item.1 }}"
+      state: absent
+    when: groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config
+    with_nested:
+    - masters_needing_certs
+    - - master.etcd-client.crt
+      - master.etcd-client.key
+
+  - name: Create a tarball of the master certs
+    command: >
+      tar -czvf {{ master_generated_certs_dir }}/{{ item.master_cert_subdir }}.tgz
+        -C {{ master_generated_certs_dir }}/{{ item.master_cert_subdir }} .
+    with_items: masters_needing_certs
+
+  - name: Retrieve the master cert tarball from the master
+    fetch:
+      src: "{{ master_generated_certs_dir }}/{{ item.master_cert_subdir }}.tgz"
+      dest: "{{ sync_tmpdir }}/"
+      flat: yes
+      fail_on_missing: yes
+      validate_checksum: yes
+    with_items: masters_needing_certs
+
+- name: Sync certs and restart masters post configuration change
+  hosts: masters
+  vars:
+    sync_tmpdir: "{{ hostvars.localhost.g_master_mktemp.stdout }}"
+    openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
+  tasks:
+  - name: Unarchive the tarball on the master
+    unarchive:
+      src: "{{ sync_tmpdir }}/{{ master_cert_subdir }}.tgz"
+      dest: "{{ master_cert_config_dir }}"
+    when: inventory_hostname != groups.masters.0
+
+  - name: Restart master services
+    service: name="{{ openshift.common.service_type}}-master" state=restarted
+    when: not openshift_master_ha | bool
+
+- name: Destroy cluster
+  hosts: masters[0]
+  vars:
+    openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
+    openshift_deployment_type: "{{ deployment_type }}"
+  pre_tasks:
+  - name: Check for configured cluster
+    stat:
+      path: /etc/corosync/corosync.conf
+    register: corosync_conf
+    when: openshift_master_ha | bool
+  - name: Destroy cluster
+    command: pcs cluster destroy --all
+    when: openshift_master_ha | bool and corosync_conf.stat.exists == true
+
+- name: Start pcsd on masters
+  hosts: masters
+  vars:
+    openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
+  tasks:
+  - name: Start pcsd
+    service: name=pcsd state=started
+    when: openshift_master_ha | bool
+
+- name: Re-create cluster
+  hosts: masters[0]
+  vars:
+    openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
+    openshift_deployment_type: "{{ deployment_type }}"
+    omc_cluster_hosts: "{{ groups.masters | join(' ') }}"
+  roles:
+  - role: openshift_master_cluster
+    when: openshift_master_ha | bool
+
+- name: Delete temporary directory on localhost
+  hosts: localhost
+  gather_facts: no
+  tasks:
+  - file: name={{ g_master_mktemp.stdout }} state=absent
+    changed_when: False
+
 
 - name: Upgrade nodes
   hosts: nodes
@@ -129,17 +286,17 @@
     - name: Restart node services
       service: name="{{ openshift.common.service_type }}-node" state=restarted
 
-- name: Update cluster policy
-  hosts: oo_first_master
+- name: Update cluster policy and policy bindings
+  hosts: masters[0]
+  vars:
+    origin_reconcile_bindings: "{{ deployment_type == 'origin' and g_new_version | version_compare('1.0.6', '>') }}"
+    ent_reconcile_bindings: "{{ deployment_type in ['openshift-enterprise', 'atomic-enterprise'] and g_new_version | version_compare('3.0.2','>') }}"
   tasks:
     - name: oadm policy reconcile-cluster-roles --confirm
       command: >
         {{ openshift.common.admin_binary}} --config={{ openshift.common.config_base }}/master/admin.kubeconfig
         policy reconcile-cluster-roles --confirm
 
-- name: Update cluster policy bindings
-  hosts: oo_first_master
-  tasks:
     - name: oadm policy reconcile-cluster-role-bindings --confirm
       command: >
         {{ openshift.common.admin_binary}} --config={{ openshift.common.config_base }}/master/admin.kubeconfig
@@ -148,12 +305,40 @@
         --exclude-groups=system:unauthenticated
         --exclude-users=system:anonymous
         --additive-only=true --confirm
-      when: ( _new_version.stdout | version_compare('1.0.6', '>') and _new_version.stdout | version_compare('3.0','<') ) or _new_version.stdout | version_compare('3.0.2','>')
+      when: origin_reconcile_bindings | bool or ent_reconcile_bindings | bool
+
 
-- name: Upgrade default router
-  hosts: oo_first_master
+- name: Restart masters post reconcile
+  hosts: masters
   vars:
-    - router_image: "{{ openshift.master.registry_url | replace( '${component}', 'haproxy-router' ) | replace ( '${version}', 'v' + _new_version.stdout ) }}"
+    openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
+  tasks:
+    - name: Restart master services
+      service: name="{{ openshift.common.service_type}}-master" state=restarted
+      when: not openshift_master_ha | bool
+
+- name: Restart cluster post reconcile
+  hosts: masters[0]
+  vars:
+    openshift_master_ha: "{{ groups['masters'] | length > 1 }}"
+  tasks:
+    - name: Restart master cluster
+      command: pcs resource restart master
+      when: openshift_master_ha | bool
+    - name: Wait for the clustered master service to be available
+      wait_for:
+        host: "{{ openshift_master_cluster_vip }}"
+        port: 8443
+        state: started
+        timeout: 180
+        delay: 90
+      when: openshift_master_ha | bool
+
+- name: Upgrade default router and registry
+  hosts: masters[0]
+  vars:
+    - registry_image: "{{  openshift.master.registry_url | replace( '${component}', 'docker-registry' )  | replace ( '${version}', 'v' + g_new_version  ) }}"
+    - router_image: "{{ openshift.master.registry_url | replace( '${component}', 'haproxy-router' ) | replace ( '${version}', 'v' + g_new_version ) }}"
     - oc_cmd: "{{ openshift.common.client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig"
   tasks:
     - name: Check for default router
@@ -188,12 +373,6 @@
         {{ oc_cmd }} patch dc/router -p
         '{"spec":{"template":{"spec":{"containers":[{"name":"router","image":"{{ router_image }}"}]}}}}'
 
-- name: Upgrade default
-  hosts: oo_first_master
-  vars:
-    - registry_image: "{{  openshift.master.registry_url | replace( '${component}', 'docker-registry' )  | replace ( '${version}', 'v' + _new_version.stdout  ) }}"
-    - oc_cmd: "{{ openshift.common.client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig"
-  tasks:
     - name: Check for default registry
       command: >
           {{ oc_cmd }} get -n default dc/docker-registry
@@ -207,7 +386,7 @@
         '{"spec":{"template":{"spec":{"containers":[{"name":"registry","image":"{{ registry_image }}"}]}}}}'
 
 - name: Update image streams and templates
-  hosts: oo_first_master
+  hosts: masters[0]
   vars:
     openshift_examples_import_command: "update"
     openshift_deployment_type: "{{ deployment_type }}"

+ 7 - 0
roles/openshift_master/tasks/main.yml

@@ -140,22 +140,27 @@
     src: atomic-openshift-master-api.service.j2
     dest: /usr/lib/systemd/system/{{ openshift.common.service_type }}-master-api.service
     force: no
+  when: openshift_master_ha | bool and openshift_master_cluster_method == "native"
 - name: Create the controllers service file
   template:
     src: atomic-openshift-master-controllers.service.j2
     dest: /usr/lib/systemd/system/{{ openshift.common.service_type }}-master-controllers.service
     force: no
+  when: openshift_master_ha | bool and openshift_master_cluster_method == "native"
 - name: Create the api env file
   template:
     src: atomic-openshift-master-api.j2
     dest: /etc/sysconfig/{{ openshift.common.service_type }}-master-api
     force: no
+  when: openshift_master_ha | bool and openshift_master_cluster_method == "native"
 - name: Create the controllers env file
   template:
     src: atomic-openshift-master-controllers.j2
     dest: /etc/sysconfig/{{ openshift.common.service_type }}-master-controllers
     force: no
+  when: openshift_master_ha | bool and openshift_master_cluster_method == "native"
 - command: systemctl daemon-reload
+  when: openshift_master_ha | bool and openshift_master_cluster_method == "native"
 # end workaround for missing systemd unit files
 
 - name: Create session secrets file
@@ -202,6 +207,7 @@
       line: "OPTIONS=--loglevel={{ openshift.master.debug_level }} --listen=https://0.0.0.0:8443 --master=https://{{ openshift.common.ip }}:8443"
     - regex: '^CONFIG_FILE='
       line: "CONFIG_FILE={{ openshift_master_config_file }}"
+  when: openshift_master_ha | bool and openshift_master_cluster_method == "native"
   notify:
   - restart master api
 
@@ -215,6 +221,7 @@
       line: "OPTIONS=--loglevel={{ openshift.master.debug_level }} --listen=https://0.0.0.0:8444"
     - regex: '^CONFIG_FILE='
       line: "CONFIG_FILE={{ openshift_master_config_file }}"
+  when: openshift_master_ha | bool and openshift_master_cluster_method == "native"
   notify:
   - restart master controllers
 

+ 19 - 8
utils/src/ooinstall/cli_installer.py

@@ -317,7 +317,9 @@ def get_installed_hosts(hosts, callback_facts):
             installed_hosts.append(host)
     return installed_hosts
 
-def get_hosts_to_run_on(oo_cfg, callback_facts, unattended, force):
+# pylint: disable=too-many-branches
+# This pylint error will be corrected shortly in separate PR.
+def get_hosts_to_run_on(oo_cfg, callback_facts, unattended, force, verbose):
 
     # Copy the list of existing hosts so we can remove any already installed nodes.
     hosts_to_run_on = list(oo_cfg.hosts)
@@ -377,7 +379,7 @@ def get_hosts_to_run_on(oo_cfg, callback_facts, unattended, force):
 
                     openshift_ansible.set_config(oo_cfg)
                     click.echo('Gathering information from hosts...')
-                    callback_facts, error = openshift_ansible.default_facts(oo_cfg.hosts)
+                    callback_facts, error = openshift_ansible.default_facts(oo_cfg.hosts, verbose)
                     if error:
                         click.echo("There was a problem fetching the required information. " \
                                    "See {} for details.".format(oo_cfg.settings['ansible_log_path']))
@@ -418,9 +420,11 @@ def get_hosts_to_run_on(oo_cfg, callback_facts, unattended, force):
         writable=True,
         readable=True),
     default="/tmp/ansible.log")
+@click.option('-v', '--verbose',
+    is_flag=True, default=False)
 #pylint: disable=too-many-arguments
 # Main CLI entrypoint, not much we can do about too many arguments.
-def cli(ctx, unattended, configuration, ansible_playbook_directory, ansible_config, ansible_log_path):
+def cli(ctx, unattended, configuration, ansible_playbook_directory, ansible_config, ansible_log_path, verbose):
     """
     The main click CLI module. Responsible for handling most common CLI options,
     assigning any defaults and adding to the context for the sub-commands.
@@ -430,6 +434,7 @@ def cli(ctx, unattended, configuration, ansible_playbook_directory, ansible_conf
     ctx.obj['configuration'] = configuration
     ctx.obj['ansible_config'] = ansible_config
     ctx.obj['ansible_log_path'] = ansible_log_path
+    ctx.obj['verbose'] = verbose
 
     oo_cfg = OOConfig(ctx.obj['configuration'])
 
@@ -460,6 +465,7 @@ def cli(ctx, unattended, configuration, ansible_playbook_directory, ansible_conf
 @click.pass_context
 def uninstall(ctx):
     oo_cfg = ctx.obj['oo_cfg']
+    verbose = ctx.obj['verbose']
 
     if len(oo_cfg.hosts) == 0:
         click.echo("No hosts defined in: %s" % oo_cfg['configuration'])
@@ -475,13 +481,14 @@ def uninstall(ctx):
             click.echo("Uninstall cancelled.")
             sys.exit(0)
 
-    openshift_ansible.run_uninstall_playbook()
+    openshift_ansible.run_uninstall_playbook(verbose)
 
 
 @click.command()
 @click.pass_context
 def upgrade(ctx):
     oo_cfg = ctx.obj['oo_cfg']
+    verbose = ctx.obj['verbose']
 
     if len(oo_cfg.hosts) == 0:
         click.echo("No hosts defined in: %s" % oo_cfg['configuration'])
@@ -508,11 +515,12 @@ def upgrade(ctx):
             click.echo("Upgrade cancelled.")
             sys.exit(0)
 
-    retcode = openshift_ansible.run_upgrade_playbook()
+    retcode = openshift_ansible.run_upgrade_playbook(verbose)
     if retcode > 0:
         click.echo("Errors encountered during upgrade, please check %s." %
             oo_cfg.settings['ansible_log_path'])
     else:
+        oo_cfg.save_to_disk()
         click.echo("Upgrade completed! Rebooting all hosts is recommended.")
 
 
@@ -521,6 +529,7 @@ def upgrade(ctx):
 @click.pass_context
 def install(ctx, force):
     oo_cfg = ctx.obj['oo_cfg']
+    verbose = ctx.obj['verbose']
 
     if ctx.obj['unattended']:
         error_if_missing_info(oo_cfg)
@@ -528,13 +537,15 @@ def install(ctx, force):
         oo_cfg = get_missing_info_from_user(oo_cfg)
 
     click.echo('Gathering information from hosts...')
-    callback_facts, error = openshift_ansible.default_facts(oo_cfg.hosts)
+    callback_facts, error = openshift_ansible.default_facts(oo_cfg.hosts,
+        verbose)
     if error:
         click.echo("There was a problem fetching the required information. " \
                    "Please see {} for details.".format(oo_cfg.settings['ansible_log_path']))
         sys.exit(1)
 
-    hosts_to_run_on, callback_facts = get_hosts_to_run_on(oo_cfg, callback_facts, ctx.obj['unattended'], force)
+    hosts_to_run_on, callback_facts = get_hosts_to_run_on(
+        oo_cfg, callback_facts, ctx.obj['unattended'], force, verbose)
 
     click.echo('Writing config to: %s' % oo_cfg.config_path)
 
@@ -556,7 +567,7 @@ If changes are needed to the values recorded by the installer please update {}.
         confirm_continue(message)
 
     error = openshift_ansible.run_main_playbook(oo_cfg.hosts,
-                                                   hosts_to_run_on)
+                                                   hosts_to_run_on, verbose)
     if error:
         # The bootstrap script will print out the log location.
         message = """

+ 22 - 19
utils/src/ooinstall/openshift_ansible.py

@@ -91,16 +91,17 @@ def write_host(host, inventory, scheduleable=True):
     inventory.write('{} {}\n'.format(host.connect_to, facts))
 
 
-def load_system_facts(inventory_file, os_facts_path, env_vars):
+def load_system_facts(inventory_file, os_facts_path, env_vars, verbose=False):
     """
     Retrieves system facts from the remote systems.
     """
     FNULL = open(os.devnull, 'w')
-    status = subprocess.call(['ansible-playbook',
-                     '--inventory-file={}'.format(inventory_file),
-                     os_facts_path],
-                     env=env_vars,
-                     stdout=FNULL)
+    args = ['ansible-playbook', '-v'] if verbose \
+        else ['ansible-playbook']
+    args.extend([
+        '--inventory-file={}'.format(inventory_file),
+        os_facts_path])
+    status = subprocess.call(args, env=env_vars, stdout=FNULL)
     if not status == 0:
         return [], 1
     callback_facts_file = open(CFG.settings['ansible_callback_facts_yaml'], 'r')
@@ -109,7 +110,7 @@ def load_system_facts(inventory_file, os_facts_path, env_vars):
     return callback_facts, 0
 
 
-def default_facts(hosts):
+def default_facts(hosts, verbose=False):
     global CFG
     inventory_file = generate_inventory(hosts)
     os_facts_path = '{}/playbooks/byo/openshift_facts.yml'.format(CFG.ansible_playbook_directory)
@@ -121,10 +122,10 @@ def default_facts(hosts):
         facts_env["ANSIBLE_LOG_PATH"] = CFG.settings['ansible_log_path']
     if 'ansible_config' in CFG.settings:
         facts_env['ANSIBLE_CONFIG'] = CFG.settings['ansible_config']
-    return load_system_facts(inventory_file, os_facts_path, facts_env)
+    return load_system_facts(inventory_file, os_facts_path, facts_env, verbose)
 
 
-def run_main_playbook(hosts, hosts_to_run_on):
+def run_main_playbook(hosts, hosts_to_run_on, verbose=False):
     global CFG
     inventory_file = generate_inventory(hosts)
     if len(hosts_to_run_on) != len(hosts):
@@ -138,17 +139,19 @@ def run_main_playbook(hosts, hosts_to_run_on):
         facts_env['ANSIBLE_LOG_PATH'] = CFG.settings['ansible_log_path']
     if 'ansible_config' in CFG.settings:
         facts_env['ANSIBLE_CONFIG'] = CFG.settings['ansible_config']
-    return run_ansible(main_playbook_path, inventory_file, facts_env)
+    return run_ansible(main_playbook_path, inventory_file, facts_env, verbose)
 
 
-def run_ansible(playbook, inventory, env_vars):
-    return subprocess.call(['ansible-playbook',
-                             '--inventory-file={}'.format(inventory),
-                             playbook],
-                             env=env_vars)
+def run_ansible(playbook, inventory, env_vars, verbose=False):
+    args = ['ansible-playbook', '-v'] if verbose \
+        else ['ansible-playbook']
+    args.extend([
+        '--inventory-file={}'.format(inventory),
+        playbook])
+    return subprocess.call(args, env=env_vars)
 
 
-def run_uninstall_playbook():
+def run_uninstall_playbook(verbose=False):
     playbook = os.path.join(CFG.settings['ansible_playbook_directory'],
         'playbooks/adhoc/uninstall.yml')
     inventory_file = generate_inventory(CFG.hosts)
@@ -157,10 +160,10 @@ def run_uninstall_playbook():
         facts_env['ANSIBLE_LOG_PATH'] = CFG.settings['ansible_log_path']
     if 'ansible_config' in CFG.settings:
         facts_env['ANSIBLE_CONFIG'] = CFG.settings['ansible_config']
-    return run_ansible(playbook, inventory_file, facts_env)
+    return run_ansible(playbook, inventory_file, facts_env, verbose)
 
 
-def run_upgrade_playbook():
+def run_upgrade_playbook(verbose=False):
     playbook = os.path.join(CFG.settings['ansible_playbook_directory'],
         'playbooks/adhoc/upgrades/upgrade.yml')
     # TODO: Upgrade inventory for upgrade?
@@ -170,5 +173,5 @@ def run_upgrade_playbook():
         facts_env['ANSIBLE_LOG_PATH'] = CFG.settings['ansible_log_path']
     if 'ansible_config' in CFG.settings:
         facts_env['ANSIBLE_CONFIG'] = CFG.settings['ansible_config']
-    return run_ansible(playbook, inventory_file, facts_env)
+    return run_ansible(playbook, inventory_file, facts_env, verbose)