Преглед на файлове

Merge pull request #2441 from dgoodwin/34-upgrade-improvements

3.4 Upgrade Improvements
Andrew Butcher преди 8 години
родител
ревизия
28f853477a
променени са 27 файла, в които са добавени 747 реда и са изтрити 472 реда
  1. 0 1
      inventory/byo/hosts.origin.example
  2. 5 5
      playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml
  3. 1 0
      playbooks/byo/openshift-cluster/upgrades/v3_3/roles
  4. 86 52
      playbooks/byo/openshift-cluster/upgrades/v3_3/upgrade.yml
  5. 100 0
      playbooks/byo/openshift-cluster/upgrades/v3_3/upgrade_control_plane.yml
  6. 102 0
      playbooks/byo/openshift-cluster/upgrades/v3_3/upgrade_nodes.yml
  7. 2 0
      playbooks/common/openshift-cluster/initialize_facts.yml
  8. 22 0
      playbooks/common/openshift-cluster/upgrades/cleanup_unused_images.yml
  9. 7 0
      playbooks/common/openshift-cluster/upgrades/create_service_signer_cert.yml
  10. 1 1
      playbooks/common/openshift-cluster/upgrades/docker/upgrade_check.yml
  11. 50 0
      playbooks/common/openshift-cluster/upgrades/init.yml
  12. 40 0
      playbooks/common/openshift-cluster/upgrades/initialize_nodes_to_upgrade.yml
  13. 0 0
      playbooks/common/openshift-cluster/upgrades/post_control_plane.yml
  14. 0 311
      playbooks/common/openshift-cluster/upgrades/pre.yml
  15. 6 0
      playbooks/common/openshift-cluster/upgrades/pre/gate_checks.yml
  16. 1 0
      playbooks/common/openshift-cluster/upgrades/pre/roles
  17. 31 0
      playbooks/common/openshift-cluster/upgrades/pre/verify_control_plane_running.yml
  18. 23 0
      playbooks/common/openshift-cluster/upgrades/pre/verify_docker_upgrade_targets.yml
  19. 37 0
      playbooks/common/openshift-cluster/upgrades/pre/verify_inventory_vars.yml
  20. 13 0
      playbooks/common/openshift-cluster/upgrades/pre/verify_nodes_running.yml
  21. 45 0
      playbooks/common/openshift-cluster/upgrades/pre/verify_upgrade_targets.yml
  22. 94 96
      playbooks/common/openshift-cluster/upgrades/upgrade.yml
  23. 75 0
      playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
  24. 0 1
      roles/docker/defaults/main.yml
  25. 4 4
      roles/docker/tasks/main.yml
  26. 1 0
      roles/openshift_cli/meta/main.yml
  27. 1 1
      roles/openshift_version/meta/main.yml

+ 0 - 1
inventory/byo/hosts.origin.example

@@ -95,7 +95,6 @@ openshift_release=v1.2
 # modify image streams to point at that registry by setting the following to true
 #openshift_examples_modify_imagestreams=true
 
-
 # Origin copr repo
 #openshift_additional_repos=[{'id': 'openshift-origin-copr', 'name': 'OpenShift Origin COPR', 'baseurl': 'https://copr-be.cloud.fedoraproject.org/results/maxamillion/origin-next/epel-7-$basearch/', 'enabled': 1, 'gpgcheck': 1, 'gpgkey': 'https://copr-be.cloud.fedoraproject.org/results/maxamillion/origin-next/pubkey.gpg'}]
 

+ 5 - 5
playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml

@@ -1,6 +1,6 @@
 
 - name: Check for appropriate Docker versions
-  hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config
+  hosts: oo_masters_to_config:oo_nodes_to_upgrade:oo_etcd_to_config
   roles:
   - openshift_facts
   tasks:
@@ -19,7 +19,7 @@
 # don't want to carry on, potentially taking out every node. The playbook can safely be re-run
 # and will not take any action on a node already running the requested docker version.
 - name: Evacuate and upgrade nodes
-  hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config
+  hosts: oo_masters_to_config:oo_nodes_to_upgrade:oo_etcd_to_config
   serial: 1
   any_errors_fatal: true
   tasks:
@@ -27,13 +27,13 @@
     command: >
       {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename }} --schedulable=false
     delegate_to: "{{ groups.oo_first_master.0 }}"
-    when: l_docker_upgrade is defined and l_docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_config
+    when: l_docker_upgrade is defined and l_docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_upgrade
 
   - name: Evacuate Node for Kubelet upgrade
     command: >
       {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename }} --evacuate --force
     delegate_to: "{{ groups.oo_first_master.0 }}"
-    when: l_docker_upgrade is defined and l_docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_config
+    when: l_docker_upgrade is defined and l_docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_upgrade
 
   - include: ../../../../common/openshift-cluster/upgrades/docker/upgrade.yml
     when: l_docker_upgrade is defined and l_docker_upgrade | bool
@@ -43,5 +43,5 @@
       {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename }} --schedulable=true
     delegate_to: "{{ groups.oo_first_master.0 }}"
     when: openshift.node.schedulable | bool
-    when: l_docker_upgrade is defined and l_docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_config and openshift.node.schedulable | bool
+    when: l_docker_upgrade is defined and l_docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_upgrade and openshift.node.schedulable | bool
 

+ 1 - 0
playbooks/byo/openshift-cluster/upgrades/v3_3/roles

@@ -0,0 +1 @@
+../../../../../roles

+ 86 - 52
playbooks/byo/openshift-cluster/upgrades/v3_3/upgrade.yml

@@ -1,67 +1,101 @@
 ---
-- include: ../../../../common/openshift-cluster/verify_ansible_version.yml
-
-- hosts: localhost
-  connection: local
-  become: no
-  gather_facts: no
-  tasks:
-  - include_vars: ../../../../byo/openshift-cluster/cluster_hosts.yml
-  - add_host:
-      name: "{{ item }}"
-      groups: l_oo_all_hosts
-    with_items: g_all_hosts | default([])
+#
+# Full Control Plane + Nodes Upgrade
+#
+- include: ../../../../common/openshift-cluster/upgrades/init.yml
+  tags:
+  - pre_upgrade
 
+# Configure the upgrade target for the common upgrade tasks:
 - hosts: l_oo_all_hosts
-  gather_facts: no
+  tags:
+  - pre_upgrade
   tasks:
-  - include_vars: ../../../../byo/openshift-cluster/cluster_hosts.yml
-
-- include: ../../../../common/openshift-cluster/evaluate_groups.yml
-  vars:
-    # Do not allow adding hosts during upgrade.
-    g_new_master_hosts: []
-    g_new_node_hosts: []
-    openshift_cluster_id: "{{ cluster_id | default('default') }}"
-    openshift_deployment_type: "{{ deployment_type }}"
-
-- name: Set oo_options
-  hosts: oo_all_hosts
-  tasks:
-  - set_fact:
-      openshift_docker_additional_registries: "{{ lookup('oo_option', 'docker_additional_registries') }}"
-    when: openshift_docker_additional_registries is not defined
-  - set_fact:
-      openshift_docker_insecure_registries: "{{ lookup('oo_option',  'docker_insecure_registries') }}"
-    when: openshift_docker_insecure_registries is not defined
-  - set_fact:
-      openshift_docker_blocked_registries: "{{ lookup('oo_option', 'docker_blocked_registries') }}"
-    when: openshift_docker_blocked_registries is not defined
   - set_fact:
-      openshift_docker_options: "{{ lookup('oo_option', 'docker_options') }}"
-    when: openshift_docker_options is not defined
-  - set_fact:
-      openshift_docker_log_driver: "{{ lookup('oo_option', 'docker_log_driver') }}"
-    when: openshift_docker_log_driver is not defined
-  - set_fact:
-      openshift_docker_log_options: "{{ lookup('oo_option', 'docker_log_options') }}"
-    when: openshift_docker_log_options is not defined
+      openshift_upgrade_target: "{{ '1.3' if deployment_type == 'origin' else '3.3' }}"
+      openshift_upgrade_min: "{{ '1.2' if deployment_type == 'origin' else '3.2' }}"
 
+# Pre-upgrade
 
-# Configure the upgrade target for the common upgrade tasks:
-- hosts: l_oo_all_hosts
+- include: ../../../../common/openshift-cluster/upgrades/initialize_nodes_to_upgrade.yml
+  tags:
+  - pre_upgrade
+
+- name: Update repos and initialize facts on all hosts
+  hosts: oo_masters_to_config:oo_nodes_to_upgrade:oo_etcd_to_config:oo_lb_to_config
+  tags:
+  - pre_upgrade
+  roles:
+  - openshift_repos
+
+- name: Set openshift_no_proxy_internal_hostnames
+  hosts: oo_masters_to_config:oo_nodes_to_upgrade
+  tags:
+  - pre_upgrade
   tasks:
   - set_fact:
-      openshift_upgrade_target: "{{ '1.3' if deployment_type == 'origin' else '3.3' }}"
-      openshift_upgrade_min: "{{ '1.2' if deployment_type == 'origin' else '3.2' }}"
+      openshift_no_proxy_internal_hostnames: "{{ hostvars | oo_select_keys(groups['oo_nodes_to_config']
+                                                    | union(groups['oo_masters_to_config'])
+                                                    | union(groups['oo_etcd_to_config'] | default([])))
+                                                | oo_collect('openshift.common.hostname') | default([]) | join (',')
+                                                }}"
+    when: "{{ (openshift_http_proxy is defined or openshift_https_proxy is defined) and
+            openshift_generate_no_proxy_hosts | default(True) | bool }}"
 
-- include: ../../../../common/openshift-cluster/upgrades/pre.yml
+- include: ../../../../common/openshift-cluster/upgrades/pre/verify_inventory_vars.yml
+  tags:
+  - pre_upgrade
+
+- include: ../../../../common/openshift-cluster/initialize_openshift_version.yml
+  tags:
+  - pre_upgrade
   vars:
-    openshift_deployment_type: "{{ deployment_type }}"
-- include: ../../../../common/openshift-cluster/upgrades/upgrade.yml
+    # Request specific openshift_release and let the openshift_version role handle converting this
+    # to a more specific version, respecting openshift_image_tag and openshift_pkg_version if
+    # defined, and overriding the normal behavior of protecting the installed version
+    openshift_release: "{{ openshift_upgrade_target }}"
+    openshift_protect_installed_version: False
+
+    # We skip the docker role at this point in upgrade to prevent
+    # unintended package, container, or config upgrades which trigger
+    # docker restarts. At this early stage of upgrade we can assume
+    # docker is configured and running.
+    skip_docker_role: True
+
+- include: ../../../../common/openshift-cluster/upgrades/pre/verify_control_plane_running.yml
+  tags:
+  - pre_upgrade
+
+- include: ../../../../common/openshift-cluster/upgrades/pre/verify_upgrade_targets.yml
+  tags:
+  - pre_upgrade
+
+- include: ../../../../common/openshift-cluster/upgrades/pre/verify_docker_upgrade_targets.yml
+  tags:
+  - pre_upgrade
+
+- include: ../../../../common/openshift-cluster/upgrades/pre/gate_checks.yml
+  tags:
+  - pre_upgrade
+
+# Pre-upgrade completed, nothing after this should be tagged pre_upgrade.
+
+# Separate step so we can execute in parallel and clear out anything unused
+# before we get into the serialized upgrade process which will then remove
+# remaining images if possible.
+- name: Cleanup unused Docker images
+  hosts: oo_masters_to_config:oo_nodes_to_upgrade:oo_etcd_to_config
+  tasks:
+  - include: ../../../../common/openshift-cluster/upgrades/cleanup_unused_images.yml
+
+- include: ../../../../common/openshift-cluster/upgrades/upgrade_control_plane.yml
   vars:
-    openshift_deployment_type: "{{ deployment_type }}"
     master_config_hook: "v3_3/master_config_upgrade.yml"
+
+- include: ../../../../common/openshift-cluster/upgrades/upgrade_nodes.yml
+  vars:
     node_config_hook: "v3_3/node_config_upgrade.yml"
+
 - include: ../../../openshift-master/restart.yml
-- include: ../../../../common/openshift-cluster/upgrades/post.yml
+
+- include: ../../../../common/openshift-cluster/upgrades/post_control_plane.yml

+ 100 - 0
playbooks/byo/openshift-cluster/upgrades/v3_3/upgrade_control_plane.yml

@@ -0,0 +1,100 @@
+---
+#
+# Control Plane Upgrade Playbook
+#
+# Upgrades masters and Docker (only on standalone etcd hosts)
+#
+# This upgrade does not include:
+# - node service running on masters
+# - docker running on masters
+# - node service running on dedicated nodes
+#
+# You can run the upgrade_nodes.yml playbook after this to upgrade these components separately.
+#
+- include: ../../../../common/openshift-cluster/upgrades/init.yml
+  tags:
+  - pre_upgrade
+
+# Configure the upgrade target for the common upgrade tasks:
+- hosts: l_oo_all_hosts
+  tags:
+  - pre_upgrade
+  tasks:
+  - set_fact:
+      openshift_upgrade_target: "{{ '1.3' if deployment_type == 'origin' else '3.3' }}"
+      openshift_upgrade_min: "{{ '1.2' if deployment_type == 'origin' else '3.2' }}"
+
+# Pre-upgrade
+
+- name: Update repos on control plane hosts
+  hosts: oo_masters_to_config:oo_etcd_to_config:oo_lb_to_config
+  tags:
+  - pre_upgrade
+  roles:
+  - openshift_repos
+
+- name: Set openshift_no_proxy_internal_hostnames
+  hosts: oo_masters_to_config:oo_nodes_to_upgrade
+  tags:
+  - pre_upgrade
+  tasks:
+  - set_fact:
+      openshift_no_proxy_internal_hostnames: "{{ hostvars | oo_select_keys(groups['oo_nodes_to_config']
+                                                    | union(groups['oo_masters_to_config'])
+                                                    | union(groups['oo_etcd_to_config'] | default([])))
+                                                | oo_collect('openshift.common.hostname') | default([]) | join (',')
+                                                }}"
+    when: "{{ (openshift_http_proxy is defined or openshift_https_proxy is defined) and
+            openshift_generate_no_proxy_hosts | default(True) | bool }}"
+
+- include: ../../../../common/openshift-cluster/upgrades/pre/verify_inventory_vars.yml
+  tags:
+  - pre_upgrade
+
+- include: ../../../../common/openshift-cluster/initialize_openshift_version.yml
+  tags:
+  - pre_upgrade
+  vars:
+    # Request specific openshift_release and let the openshift_version role handle converting this
+    # to a more specific version, respecting openshift_image_tag and openshift_pkg_version if
+    # defined, and overriding the normal behavior of protecting the installed version
+    openshift_release: "{{ openshift_upgrade_target }}"
+    openshift_protect_installed_version: False
+
+    # We skip the docker role at this point in upgrade to prevent
+    # unintended package, container, or config upgrades which trigger
+    # docker restarts. At this early stage of upgrade we can assume
+    # docker is configured and running.
+    skip_docker_role: True
+
+- include: ../../../../common/openshift-cluster/upgrades/pre/verify_control_plane_running.yml
+  tags:
+  - pre_upgrade
+
+- include: ../../../../common/openshift-cluster/upgrades/pre/verify_upgrade_targets.yml
+  tags:
+  - pre_upgrade
+
+- include: ../../../../common/openshift-cluster/upgrades/pre/verify_docker_upgrade_targets.yml
+  tags:
+  - pre_upgrade
+
+- include: ../../../../common/openshift-cluster/upgrades/pre/gate_checks.yml
+  tags:
+  - pre_upgrade
+
+# Pre-upgrade completed, nothing after this should be tagged pre_upgrade.
+
+# Separate step so we can execute in parallel and clear out anything unused
+# before we get into the serialized upgrade process which will then remove
+# remaining images if possible.
+- name: Cleanup unused Docker images
+  hosts: oo_masters_to_config:oo_etcd_to_config
+  tasks:
+  - include: ../../../../common/openshift-cluster/upgrades/cleanup_unused_images.yml
+
+- include: ../../../../common/openshift-cluster/upgrades/upgrade_control_plane.yml
+  vars:
+    master_config_hook: "v3_3/master_config_upgrade.yml"
+
+- include: ../../../../common/openshift-cluster/upgrades/post_control_plane.yml

+ 102 - 0
playbooks/byo/openshift-cluster/upgrades/v3_3/upgrade_nodes.yml

@@ -0,0 +1,102 @@
+---
+#
+# Node Upgrade Playbook
+#
+# Upgrades nodes only, but requires the control plane to have already been upgraded.
+#
+- include: ../../../../common/openshift-cluster/upgrades/init.yml
+  tags:
+  - pre_upgrade
+
+# Configure the upgrade target for the common upgrade tasks:
+- hosts: l_oo_all_hosts
+  tags:
+  - pre_upgrade
+  tasks:
+  - set_fact:
+      openshift_upgrade_target: "{{ '1.3' if deployment_type == 'origin' else '3.3' }}"
+      openshift_upgrade_min: "{{ '1.2' if deployment_type == 'origin' else '3.2' }}"
+
+# Pre-upgrade
+- include: ../../../../common/openshift-cluster/upgrades/initialize_nodes_to_upgrade.yml
+  tags:
+  - pre_upgrade
+
+- name: Update repos on nodes
+  hosts: oo_masters_to_config:oo_nodes_to_upgrade:oo_etcd_to_config:oo_lb_to_config
+  roles:
+  - openshift_repos
+  tags:
+  - pre_upgrade
+
+- name: Set openshift_no_proxy_internal_hostnames
+  hosts: oo_masters_to_config:oo_nodes_to_upgrade
+  tags:
+  - pre_upgrade
+  tasks:
+  - set_fact:
+      openshift_no_proxy_internal_hostnames: "{{ hostvars | oo_select_keys(groups['oo_nodes_to_upgrade']
+                                                    | union(groups['oo_masters_to_config'])
+                                                    | union(groups['oo_etcd_to_config'] | default([])))
+                                                | oo_collect('openshift.common.hostname') | default([]) | join (',')
+                                                }}"
+    when: "{{ (openshift_http_proxy is defined or openshift_https_proxy is defined) and
+            openshift_generate_no_proxy_hosts | default(True) | bool }}"
+
+- include: ../../../../common/openshift-cluster/upgrades/pre/verify_inventory_vars.yml
+  tags:
+  - pre_upgrade
+
+- include: ../../../../common/openshift-cluster/initialize_openshift_version.yml
+  tags:
+  - pre_upgrade
+  vars:
+    # Request specific openshift_release and let the openshift_version role handle converting this
+    # to a more specific version, respecting openshift_image_tag and openshift_pkg_version if
+    # defined, and overriding the normal behavior of protecting the installed version
+    openshift_release: "{{ openshift_upgrade_target }}"
+    openshift_protect_installed_version: False
+
+    # We skip the docker role at this point in upgrade to prevent
+    # unintended package, container, or config upgrades which trigger
+    # docker restarts. At this early stage of upgrade we can assume
+    # docker is configured and running.
+    skip_docker_role: True
+
+- name: Verify masters are already upgraded
+  hosts: oo_masters_to_config
+  tags:
+  - pre_upgrade
+  tasks:
+  - fail: msg="Master running {{ openshift.common.version }} must be upgraded to {{ openshift_version }} before node upgrade can be run."
+    when: openshift.common.version != openshift_version
+
+- include: ../../../../common/openshift-cluster/upgrades/pre/verify_control_plane_running.yml
+  tags:
+  - pre_upgrade
+
+- include: ../../../../common/openshift-cluster/upgrades/pre/verify_upgrade_targets.yml
+  tags:
+  - pre_upgrade
+
+- include: ../../../../common/openshift-cluster/upgrades/pre/verify_docker_upgrade_targets.yml
+  tags:
+  - pre_upgrade
+
+- include: ../../../../common/openshift-cluster/upgrades/pre/gate_checks.yml
+  tags:
+  - pre_upgrade
+
+# Pre-upgrade completed, nothing after this should be tagged pre_upgrade.
+
+# Separate step so we can execute in parallel and clear out anything unused
+# before we get into the serialized upgrade process which will then remove
+# remaining images if possible.
+- name: Cleanup unused Docker images
+  hosts: oo_nodes_to_upgrade
+  tasks:
+  - include: ../../../../common/openshift-cluster/upgrades/cleanup_unused_images.yml
+
+- include: ../../../../common/openshift-cluster/upgrades/upgrade_nodes.yml
+  vars:
+    node_config_hook: "v3_3/node_config_upgrade.yml"

+ 2 - 0
playbooks/common/openshift-cluster/initialize_facts.yml

@@ -11,3 +11,5 @@
         hostname: "{{ openshift_hostname | default(None) }}"
   - set_fact:
       openshift_docker_hosted_registry_network: "{{ hostvars[groups.oo_first_master.0].openshift.common.portal_net }}"
+  - set_fact:
+      openshift_deployment_type: "{{ deployment_type }}"

+ 22 - 0
playbooks/common/openshift-cluster/upgrades/cleanup_unused_images.yml

@@ -0,0 +1,22 @@
+---
+- name: Check Docker image count
+  shell: "docker images -aq | wc -l"
+  register: docker_image_count
+  when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool
+
+- debug: var=docker_image_count.stdout
+  when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool
+
+- name: Remove unused Docker images for Docker 1.10+ migration
+  shell: "docker rmi `docker images -aq`"
+  # Will fail on images still in use:
+  failed_when: false
+  when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool
+
+- name: Check Docker image count
+  shell: "docker images -aq | wc -l"
+  register: docker_image_count
+  when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool
+
+- debug: var=docker_image_count.stdout
+  when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool

+ 7 - 0
playbooks/common/openshift-cluster/upgrades/create_service_signer_cert.yml

@@ -9,6 +9,7 @@
     local_action: command mktemp -d /tmp/openshift-ansible-XXXXXXX
     register: local_cert_sync_tmpdir
     changed_when: false
+    when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool)
 
 - name: Create service signer certificate
   hosts: oo_first_master
@@ -17,6 +18,7 @@
     command: mktemp -d /tmp/openshift-ansible-XXXXXXX
     register: remote_cert_create_tmpdir
     changed_when: false
+    when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool)
 
   - name: Create service signer certificate
     command: >
@@ -27,6 +29,7 @@
       --serial=service-signer.serial.txt
     args:
       chdir: "{{ remote_cert_create_tmpdir.stdout }}/"
+    when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool)
 
   - name: Retrieve service signer certificate
     fetch:
@@ -38,12 +41,14 @@
     with_items:
     - "service-signer.crt"
     - "service-signer.key"
+    when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool)
 
   - name: Delete remote temp directory
     file:
       name: "{{ remote_cert_create_tmpdir.stdout }}"
       state: absent
     changed_when: false
+    when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool)
 
 - name: Deploy service signer certificate
   hosts: oo_masters_to_config
@@ -55,6 +60,7 @@
     with_items:
     - "service-signer.crt"
     - "service-signer.key"
+    when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool)
 
 - name: Delete local temp directory
   hosts: localhost
@@ -67,3 +73,4 @@
       name: "{{ local_cert_sync_tmpdir.stdout }}"
       state: absent
     changed_when: false
+    when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool)

+ 1 - 1
playbooks/common/openshift-cluster/upgrades/docker/upgrade_check.yml

@@ -1,7 +1,7 @@
 ---
 
 # This snippet determines if a Docker upgrade is required by checking the inventory
-# variables, the available packages, and sets l_docker_version to True if so.
+# variables, the available packages, and sets l_docker_upgrade to True if so.
 
 - set_fact:
     docker_upgrade: True

+ 50 - 0
playbooks/common/openshift-cluster/upgrades/init.yml

@@ -0,0 +1,50 @@
+---
+- include: ../verify_ansible_version.yml
+
+- hosts: localhost
+  connection: local
+  become: no
+  gather_facts: no
+  tasks:
+  - include_vars: ../../../byo/openshift-cluster/cluster_hosts.yml
+  - add_host:
+      name: "{{ item }}"
+      groups: l_oo_all_hosts
+    with_items: g_all_hosts | default([])
+
+- hosts: l_oo_all_hosts
+  gather_facts: no
+  tasks:
+  - include_vars: ../../../byo/openshift-cluster/cluster_hosts.yml
+
+- include: ../evaluate_groups.yml
+  vars:
+    # Do not allow adding hosts during upgrade.
+    g_new_master_hosts: []
+    g_new_node_hosts: []
+    openshift_cluster_id: "{{ cluster_id | default('default') }}"
+    openshift_deployment_type: "{{ deployment_type }}"
+
+- name: Set oo_options
+  hosts: oo_all_hosts
+  tasks:
+  - set_fact:
+      openshift_docker_additional_registries: "{{ lookup('oo_option', 'docker_additional_registries') }}"
+    when: openshift_docker_additional_registries is not defined
+  - set_fact:
+      openshift_docker_insecure_registries: "{{ lookup('oo_option',  'docker_insecure_registries') }}"
+    when: openshift_docker_insecure_registries is not defined
+  - set_fact:
+      openshift_docker_blocked_registries: "{{ lookup('oo_option', 'docker_blocked_registries') }}"
+    when: openshift_docker_blocked_registries is not defined
+  - set_fact:
+      openshift_docker_options: "{{ lookup('oo_option', 'docker_options') }}"
+    when: openshift_docker_options is not defined
+  - set_fact:
+      openshift_docker_log_driver: "{{ lookup('oo_option', 'docker_log_driver') }}"
+    when: openshift_docker_log_driver is not defined
+  - set_fact:
+      openshift_docker_log_options: "{{ lookup('oo_option', 'docker_log_options') }}"
+    when: openshift_docker_log_options is not defined
+
+- include: ../initialize_facts.yml

+ 40 - 0
playbooks/common/openshift-cluster/upgrades/initialize_nodes_to_upgrade.yml

@@ -0,0 +1,40 @@
+---
+- name: Filter list of nodes to be upgraded if necessary
+  hosts: oo_first_master
+  tasks:
+  - name: Retrieve list of openshift nodes matching upgrade label
+    command: >
+      {{ openshift.common.client_binary }}
+      get nodes
+      --config={{ openshift.common.config_base }}/master/admin.kubeconfig
+      --selector={{ openshift_upgrade_nodes_label }}
+      -o jsonpath='{.items[*].metadata.name}'
+    register: matching_nodes
+    changed_when: false
+    when: openshift_upgrade_nodes_label is defined
+
+  - set_fact:
+      nodes_to_upgrade: "{{ matching_nodes.stdout.split(' ') }}"
+    when: openshift_upgrade_nodes_label is defined
+
+  # We got a list of nodes with the label, now we need to match these with inventory hosts
+  # using their openshift.common.hostname fact.
+  - name: Map labelled nodes to inventory hosts
+    add_host:
+      name: "{{ item }}"
+      groups: temp_nodes_to_upgrade
+      ansible_ssh_user: "{{ g_ssh_user | default(omit) }}"
+      ansible_become: "{{ g_sudo | default(omit) }}"
+    with_items: " {{ groups['oo_nodes_to_config'] }}"
+    when: openshift_upgrade_nodes_label is defined and hostvars[item].openshift.common.hostname in nodes_to_upgrade
+    changed_when: false
+
+  # Build up the oo_nodes_to_upgrade group, use the list filtered by label if
+  # present, otherwise hit all nodes:
+  - name: Evaluate oo_nodes_to_upgrade
+    add_host:
+      name: "{{ item }}"
+      groups: oo_nodes_to_upgrade
+      ansible_ssh_user: "{{ g_ssh_user | default(omit) }}"
+      ansible_become: "{{ g_sudo | default(omit) }}"
+    with_items: "{{ groups['temp_nodes_to_upgrade'] | default(groups['oo_nodes_to_config']) }}"

playbooks/common/openshift-cluster/upgrades/post.yml → playbooks/common/openshift-cluster/upgrades/post_control_plane.yml


+ 0 - 311
playbooks/common/openshift-cluster/upgrades/pre.yml

@@ -1,311 +0,0 @@
----
-###############################################################################
-# Evaluate host groups and gather facts
-###############################################################################
-
-- include: ../initialize_facts.yml
-
-- name: Update repos and initialize facts on all hosts
-  hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config:oo_lb_to_config
-  roles:
-  - openshift_repos
-
-- name: Set openshift_no_proxy_internal_hostnames
-  hosts: oo_masters_to_config:oo_nodes_to_config
-  tasks:
-  - set_fact:
-      openshift_no_proxy_internal_hostnames: "{{ hostvars | oo_select_keys(groups['oo_nodes_to_config']
-                                                    | union(groups['oo_masters_to_config'])
-                                                    | union(groups['oo_etcd_to_config'] | default([])))
-                                                | oo_collect('openshift.common.hostname') | default([]) | join (',')
-                                                }}"
-    when: "{{ (openshift_http_proxy is defined or openshift_https_proxy is defined) and
-            openshift_generate_no_proxy_hosts | default(True) | bool }}"
-
-- name: Evaluate additional groups for upgrade
-  hosts: localhost
-  connection: local
-  become: no
-  tasks:
-  - name: Evaluate etcd_hosts_to_backup
-    add_host:
-      name: "{{ item }}"
-      groups: etcd_hosts_to_backup
-    with_items: groups.oo_etcd_to_config if groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config | length > 0 else groups.oo_first_master
-
-###############################################################################
-# Pre-upgrade checks
-###############################################################################
-- name: Verify upgrade can proceed on first master
-  hosts: oo_first_master
-  vars:
-    g_pacemaker_upgrade_url_segment: "{{ 'org/latest' if deployment_type =='origin' else '.com/enterprise/3.1' }}"
-  gather_facts: no
-  tasks:
-  - fail:
-      msg: >
-        This upgrade is only supported for atomic-enterprise, origin, openshift-enterprise, and online
-        deployment types
-    when: deployment_type not in ['atomic-enterprise', 'origin','openshift-enterprise', 'online']
-
-  - fail:
-      msg: >
-        This upgrade does not support Pacemaker:
-        https://docs.openshift.{{ g_pacemaker_upgrade_url_segment }}/install_config/upgrading/pacemaker_to_native_ha.html
-    when: openshift.master.cluster_method is defined and openshift.master.cluster_method == 'pacemaker'
-
-  # Error out in situations where the user has older versions specified in their
-  # inventory in any of the openshift_release, openshift_image_tag, and
-  # openshift_pkg_version variables. These must be removed or updated to proceed
-  # with upgrade.
-  # TODO: Should we block if you're *over* the next major release version as well?
-  - fail:
-      msg: >
-        openshift_pkg_version is {{ openshift_pkg_version }} which is not a
-        valid version for a {{ openshift_upgrade_target }} upgrade
-    when: openshift_pkg_version is defined and openshift_pkg_version.split('-',1).1 | version_compare(openshift_upgrade_target ,'<')
-
-  - fail:
-      msg: >
-        openshift_image_tag is {{ openshift_image_tag }} which is not a
-        valid version for a {{ openshift_upgrade_target }} upgrade
-    when: openshift_image_tag is defined and openshift_image_tag.split('v',1).1 | version_compare(openshift_upgrade_target ,'<')
-
-  - set_fact:
-      openshift_release: "{{ openshift_release[1:] }}"
-    when: openshift_release is defined and openshift_release[0] == 'v'
-
-  - fail:
-      msg: >
-        openshift_release is {{ openshift_release }} which is not a
-        valid release for a {{ openshift_upgrade_target }} upgrade
-    when: openshift_release is defined and not openshift_release | version_compare(openshift_upgrade_target ,'=')
-
-- include: ../../../common/openshift-cluster/initialize_openshift_version.yml
-  vars:
-    # Request specific openshift_release and let the openshift_version role handle converting this
-    # to a more specific version, respecting openshift_image_tag and openshift_pkg_version if
-    # defined, and overriding the normal behavior of protecting the installed version
-    openshift_release: "{{ openshift_upgrade_target }}"
-    openshift_protect_installed_version: False
-    # Docker role (a dependency) should be told not to do anything to installed version
-    # of docker, we handle this separately during upgrade. (the inventory may have a
-    # docker_version defined, we don't want to actually do it until later)
-    docker_protect_installed_version: True
-
-- name: Verify master processes
-  hosts: oo_masters_to_config
-  roles:
-  - openshift_facts
-  tasks:
-  - openshift_facts:
-      role: master
-      local_facts:
-        ha: "{{ groups.oo_masters_to_config | length > 1 }}"
-
-  - name: Ensure Master is running
-    service:
-      name: "{{ openshift.common.service_type }}-master"
-      state: started
-      enabled: yes
-    when: openshift.master.ha is defined and not openshift.master.ha | bool and openshift.common.is_containerized | bool
-
-  - name: Ensure HA Master is running
-    service:
-      name: "{{ openshift.common.service_type }}-master-api"
-      state: started
-      enabled: yes
-    when: openshift.master.ha is defined and openshift.master.ha | bool and openshift.common.is_containerized | bool
-
-  - name: Ensure HA Master is running
-    service:
-      name: "{{ openshift.common.service_type }}-master-controllers"
-      state: started
-      enabled: yes
-    when: openshift.master.ha is defined and openshift.master.ha | bool and openshift.common.is_containerized | bool
-
-- name: Verify node processes
-  hosts: oo_nodes_to_config
-  roles:
-  - openshift_facts
-  - openshift_docker_facts
-  tasks:
-  - name: Ensure Node is running
-    service:
-      name: "{{ openshift.common.service_type }}-node"
-      state: started
-      enabled: yes
-    when: openshift.common.is_containerized | bool
-
-- name: Verify upgrade targets
-  hosts: oo_masters_to_config:oo_nodes_to_config
-  vars:
-    openshift_docker_hosted_registry_network: "{{ hostvars[groups.oo_first_master.0].openshift.common.portal_net }}"
-  pre_tasks:
-  - fail:
-      msg: Verify OpenShift is already installed
-    when: openshift.common.version is not defined
-
-  - fail:
-      msg: Verify the correct version was found
-    when: verify_upgrade_version is defined and openshift_version != verify_upgrade_version
-
-  - name: Clean package cache
-    command: "{{ ansible_pkg_mgr }} clean all"
-    when: not openshift.common.is_atomic | bool
-
-  - set_fact:
-      g_new_service_name: "{{ 'origin' if deployment_type =='origin' else 'atomic-openshift' }}"
-    when: not openshift.common.is_containerized | bool
-
-  - name: Verify containers are available for upgrade
-    command: >
-      docker pull {{ openshift.common.cli_image }}:{{ openshift_image_tag }}
-    register: pull_result
-    changed_when: "'Downloaded newer image' in pull_result.stdout"
-    when: openshift.common.is_containerized | bool
-
-  - name: Check latest available OpenShift RPM version
-    command: >
-      {{ repoquery_cmd }} --qf '%{version}' "{{ openshift.common.service_type }}"
-    failed_when: false
-    changed_when: false
-    register: avail_openshift_version
-    when: not openshift.common.is_containerized | bool
-
-  - name: Verify OpenShift RPMs are available for upgrade
-    fail:
-      msg: "OpenShift {{ avail_openshift_version.stdout }} is available, but {{ openshift_upgrade_target }} or greater is required"
-    when: not openshift.common.is_containerized | bool and not avail_openshift_version | skipped and avail_openshift_version.stdout | default('0.0', True) | version_compare(openshift_release, '<')
-
-  - fail:
-      msg: "This upgrade playbook must be run against OpenShift {{ openshift_upgrade_min }} or later"
-    when: deployment_type == 'origin' and openshift.common.version | version_compare(openshift_upgrade_min,'<')
-
-- name: Verify docker upgrade targets
-  hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config
-  tasks:
-  # Only check if docker upgrade is required if docker_upgrade is not
-  # already set to False.
-  - include: docker/upgrade_check.yml
-    when: docker_upgrade is not defined or docker_upgrade | bool and not openshift.common.is_atomic | bool
-
-  # Additional checks for Atomic hosts:
-
-  - name: Determine available Docker
-    shell: "rpm -q --queryformat '---\ncurr_version: %{VERSION}\navail_version: \n' docker"
-    register: g_atomic_docker_version_result
-    when: openshift.common.is_atomic | bool
-
-  - set_fact:
-      l_docker_version: "{{ g_atomic_docker_version_result.stdout | from_yaml }}"
-    when: openshift.common.is_atomic | bool
-
-  - fail:
-      msg: This playbook requires access to Docker 1.10 or later
-    when: openshift.common.is_atomic | bool and l_docker_version.avail_version | default(l_docker_version.curr_version, true) | version_compare('1.10','<')
-
-  - set_fact:
-      pre_upgrade_complete: True
-
-
-##############################################################################
-# Gate on pre-upgrade checks
-##############################################################################
-- name: Gate on pre-upgrade checks
-  hosts: localhost
-  connection: local
-  become: no
-  vars:
-    pre_upgrade_hosts: "{{ groups.oo_masters_to_config | union(groups.oo_nodes_to_config) }}"
-  tasks:
-  - set_fact:
-      pre_upgrade_completed: "{{ hostvars
-                                 | oo_select_keys(pre_upgrade_hosts)
-                                 | oo_collect('inventory_hostname', {'pre_upgrade_complete': true}) }}"
-  - set_fact:
-      pre_upgrade_failed: "{{ pre_upgrade_hosts | difference(pre_upgrade_completed) }}"
-  - fail:
-      msg: "Upgrade cannot continue. The following hosts did not complete pre-upgrade checks: {{ pre_upgrade_failed | join(',') }}"
-    when: pre_upgrade_failed | length > 0
-
-###############################################################################
-# Backup etcd
-###############################################################################
-- name: Backup etcd
-  hosts: etcd_hosts_to_backup
-  vars:
-    embedded_etcd: "{{ hostvars[groups.oo_first_master.0].openshift.master.embedded_etcd }}"
-    timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}"
-  roles:
-  - openshift_facts
-  tasks:
-  # Ensure we persist the etcd role for this host in openshift_facts
-  - openshift_facts:
-      role: etcd
-      local_facts: {}
-    when: "'etcd' not in openshift"
-
-  - stat: path=/var/lib/openshift
-    register: var_lib_openshift
-
-  - stat: path=/var/lib/origin
-    register: var_lib_origin
-
-  - name: Create origin symlink if necessary
-    file: src=/var/lib/openshift/ dest=/var/lib/origin state=link
-    when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False
-
-  # TODO: replace shell module with command and update later checks
-  # We assume to be using the data dir for all backups.
-  - name: Check available disk space for etcd backup
-    shell: df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1
-    register: avail_disk
-
-  # TODO: replace shell module with command and update later checks
-  - name: Check current embedded etcd disk usage
-    shell: du -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1
-    register: etcd_disk_usage
-    when: embedded_etcd | bool
-
-  - name: Abort if insufficient disk space for etcd backup
-    fail:
-      msg: >
-        {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup,
-        {{ avail_disk.stdout }} Kb available.
-    when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int)
-
-  - name: Install etcd (for etcdctl)
-    action: "{{ ansible_pkg_mgr }} name=etcd state=latest"
-    when: not openshift.common.is_atomic | bool
-
-  - name: Generate etcd backup
-    command: >
-      etcdctl backup --data-dir={{ openshift.etcd.etcd_data_dir }}
-      --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}
-
-  - set_fact:
-      etcd_backup_complete: True
-
-  - name: Display location of etcd backup
-    debug:
-      msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}"
-
-
-##############################################################################
-# Gate on etcd backup
-##############################################################################
-- name: Gate on etcd backup
-  hosts: localhost
-  connection: local
-  become: no
-  tasks:
-  - set_fact:
-      etcd_backup_completed: "{{ hostvars
-                                 | oo_select_keys(groups.etcd_hosts_to_backup)
-                                 | oo_collect('inventory_hostname', {'etcd_backup_complete': true}) }}"
-  - set_fact:
-      etcd_backup_failed: "{{ groups.etcd_hosts_to_backup | difference(etcd_backup_completed) }}"
-  - fail:
-      msg: "Upgrade cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}"
-    when: etcd_backup_failed | length > 0

+ 6 - 0
playbooks/common/openshift-cluster/upgrades/pre/gate_checks.yml

@@ -0,0 +1,6 @@
+---
+- name: Flag pre-upgrade checks complete for hosts without errors
+  hosts: oo_masters_to_config:oo_nodes_to_upgrade:oo_etcd_to_config
+  tasks:
+  - set_fact:
+      pre_upgrade_complete: True

+ 1 - 0
playbooks/common/openshift-cluster/upgrades/pre/roles

@@ -0,0 +1 @@
+../../../../../roles/

+ 31 - 0
playbooks/common/openshift-cluster/upgrades/pre/verify_control_plane_running.yml

@@ -0,0 +1,31 @@
+---
+- name: Verify master processes
+  hosts: oo_masters_to_config
+  roles:
+  - openshift_facts
+  tasks:
+  - openshift_facts:
+      role: master
+      local_facts:
+        ha: "{{ groups.oo_masters_to_config | length > 1 }}"
+
+  - name: Ensure Master is running
+    service:
+      name: "{{ openshift.common.service_type }}-master"
+      state: started
+      enabled: yes
+    when: openshift.master.ha is defined and not openshift.master.ha | bool and openshift.common.is_containerized | bool
+
+  - name: Ensure HA Master is running
+    service:
+      name: "{{ openshift.common.service_type }}-master-api"
+      state: started
+      enabled: yes
+    when: openshift.master.ha is defined and openshift.master.ha | bool and openshift.common.is_containerized | bool
+
+  - name: Ensure HA Master is running
+    service:
+      name: "{{ openshift.common.service_type }}-master-controllers"
+      state: started
+      enabled: yes
+    when: openshift.master.ha is defined and openshift.master.ha | bool and openshift.common.is_containerized | bool

+ 23 - 0
playbooks/common/openshift-cluster/upgrades/pre/verify_docker_upgrade_targets.yml

@@ -0,0 +1,23 @@
+---
+- name: Verify docker upgrade targets
+  hosts: oo_masters_to_config:oo_nodes_to_upgrade:oo_etcd_to_config
+  tasks:
+  # Only check if docker upgrade is required if docker_upgrade is not
+  # already set to False.
+  - include: ../docker/upgrade_check.yml
+    when: docker_upgrade is not defined or docker_upgrade | bool and not openshift.common.is_atomic | bool
+
+  # Additional checks for Atomic hosts:
+
+  - name: Determine available Docker
+    shell: "rpm -q --queryformat '---\ncurr_version: %{VERSION}\navail_version: \n' docker"
+    register: g_atomic_docker_version_result
+    when: openshift.common.is_atomic | bool
+
+  - set_fact:
+      l_docker_version: "{{ g_atomic_docker_version_result.stdout | from_yaml }}"
+    when: openshift.common.is_atomic | bool
+
+  - fail:
+      msg: This playbook requires access to Docker 1.10 or later
+    when: openshift.common.is_atomic | bool and l_docker_version.avail_version | default(l_docker_version.curr_version, true) | version_compare('1.10','<')

+ 37 - 0
playbooks/common/openshift-cluster/upgrades/pre/verify_inventory_vars.yml

@@ -0,0 +1,37 @@
+---
+- name: Verify upgrade can proceed on first master
+  hosts: oo_first_master
+  gather_facts: no
+  tasks:
+  - fail:
+      msg: >
+        This upgrade is only supported for origin, openshift-enterprise, and online
+        deployment types
+    when: deployment_type not in ['origin','openshift-enterprise', 'online']
+
+  # Error out in situations where the user has older versions specified in their
+  # inventory in any of the openshift_release, openshift_image_tag, and
+  # openshift_pkg_version variables. These must be removed or updated to proceed
+  # with upgrade.
+  # TODO: Should we block if you're *over* the next major release version as well?
+  - fail:
+      msg: >
+        openshift_pkg_version is {{ openshift_pkg_version }} which is not a
+        valid version for a {{ openshift_upgrade_target }} upgrade
+    when: openshift_pkg_version is defined and openshift_pkg_version.split('-',1).1 | version_compare(openshift_upgrade_target ,'<')
+
+  - fail:
+      msg: >
+        openshift_image_tag is {{ openshift_image_tag }} which is not a
+        valid version for a {{ openshift_upgrade_target }} upgrade
+    when: openshift_image_tag is defined and openshift_image_tag.split('v',1).1 | version_compare(openshift_upgrade_target ,'<')
+
+  - set_fact:
+      openshift_release: "{{ openshift_release[1:] }}"
+    when: openshift_release is defined and openshift_release[0] == 'v'
+
+  - fail:
+      msg: >
+        openshift_release is {{ openshift_release }} which is not a
+        valid release for a {{ openshift_upgrade_target }} upgrade
+    when: openshift_release is defined and not openshift_release | version_compare(openshift_upgrade_target ,'=')

+ 13 - 0
playbooks/common/openshift-cluster/upgrades/pre/verify_nodes_running.yml

@@ -0,0 +1,13 @@
+---
+- name: Verify node processes
+  hosts: oo_nodes_to_config
+  roles:
+  - openshift_facts
+  - openshift_docker_facts
+  tasks:
+  - name: Ensure Node is running
+    service:
+      name: "{{ openshift.common.service_type }}-node"
+      state: started
+      enabled: yes
+    when: openshift.common.is_containerized | bool

+ 45 - 0
playbooks/common/openshift-cluster/upgrades/pre/verify_upgrade_targets.yml

@@ -0,0 +1,45 @@
+---
+- name: Verify upgrade targets
+  hosts: oo_masters_to_config:oo_nodes_to_upgrade
+  vars:
+    openshift_docker_hosted_registry_network: "{{ hostvars[groups.oo_first_master.0].openshift.common.portal_net }}"
+  pre_tasks:
+  - fail:
+      msg: Verify OpenShift is already installed
+    when: openshift.common.version is not defined
+
+  - fail:
+      msg: Verify the correct version was found
+    when: verify_upgrade_version is defined and openshift_version != verify_upgrade_version
+
+  - name: Clean package cache
+    command: "{{ ansible_pkg_mgr }} clean all"
+    when: not openshift.common.is_atomic | bool
+
+  - set_fact:
+      g_new_service_name: "{{ 'origin' if deployment_type =='origin' else 'atomic-openshift' }}"
+    when: not openshift.common.is_containerized | bool
+
+  - name: Verify containers are available for upgrade
+    command: >
+      docker pull {{ openshift.common.cli_image }}:{{ openshift_image_tag }}
+    register: pull_result
+    changed_when: "'Downloaded newer image' in pull_result.stdout"
+    when: openshift.common.is_containerized | bool
+
+  - name: Check latest available OpenShift RPM version
+    command: >
+      {{ repoquery_cmd }} --qf '%{version}' "{{ openshift.common.service_type }}"
+    failed_when: false
+    changed_when: false
+    register: avail_openshift_version
+    when: not openshift.common.is_containerized | bool
+
+  - name: Verify OpenShift RPMs are available for upgrade
+    fail:
+      msg: "OpenShift {{ avail_openshift_version.stdout }} is available, but {{ openshift_upgrade_target }} or greater is required"
+    when: not openshift.common.is_containerized | bool and not avail_openshift_version | skipped and avail_openshift_version.stdout | default('0.0', True) | version_compare(openshift_release, '<')
+
+  - fail:
+      msg: "This upgrade playbook must be run against OpenShift {{ openshift_upgrade_min }} or later"
+    when: deployment_type == 'origin' and openshift.common.version | version_compare(openshift_upgrade_min,'<')

+ 94 - 96
playbooks/common/openshift-cluster/upgrades/upgrade.yml

@@ -1,39 +1,93 @@
 ---
 ###############################################################################
-# The restart playbook should be run after this playbook completes.
+# Upgrade Masters
 ###############################################################################
-
-# Separate step so we can execute in parallel and clear out anything unused
-# before we get into the serialized upgrade process which will then remove
-# remaining images if possible.
-- name: Cleanup unused Docker images
-  hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config
+- name: Evaluate additional groups for upgrade
+  hosts: localhost
+  connection: local
+  become: no
   tasks:
-  - name: Check Docker image count
-    shell: "docker images -aq | wc -l"
-    register: docker_image_count
-    when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool
+  - name: Evaluate etcd_hosts_to_backup
+    add_host:
+      name: "{{ item }}"
+      groups: etcd_hosts_to_backup
+    with_items: groups.oo_etcd_to_config if groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config | length > 0 else groups.oo_first_master
+
+- name: Backup etcd
+  hosts: etcd_hosts_to_backup
+  vars:
+    embedded_etcd: "{{ hostvars[groups.oo_first_master.0].openshift.master.embedded_etcd }}"
+    timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}"
+  roles:
+  - openshift_facts
+  tasks:
+  # Ensure we persist the etcd role for this host in openshift_facts
+  - openshift_facts:
+      role: etcd
+      local_facts: {}
+    when: "'etcd' not in openshift"
+
+  - stat: path=/var/lib/openshift
+    register: var_lib_openshift
+
+  - stat: path=/var/lib/origin
+    register: var_lib_origin
+
+  - name: Create origin symlink if necessary
+    file: src=/var/lib/openshift/ dest=/var/lib/origin state=link
+    when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False
+
+  # TODO: replace shell module with command and update later checks
+  # We assume to be using the data dir for all backups.
+  - name: Check available disk space for etcd backup
+    shell: df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1
+    register: avail_disk
+
+  # TODO: replace shell module with command and update later checks
+  - name: Check current embedded etcd disk usage
+    shell: du -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1
+    register: etcd_disk_usage
+    when: embedded_etcd | bool
+
+  - name: Abort if insufficient disk space for etcd backup
+    fail:
+      msg: >
+        {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup,
+        {{ avail_disk.stdout }} Kb available.
+    when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int)
+
+  - name: Install etcd (for etcdctl)
+    action: "{{ ansible_pkg_mgr }} name=etcd state=latest"
+    when: not openshift.common.is_atomic | bool
+
+  - name: Generate etcd backup
+    command: >
+      etcdctl backup --data-dir={{ openshift.etcd.etcd_data_dir }}
+      --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}
 
-  - debug: var=docker_image_count.stdout
-    when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool
+  - set_fact:
+      etcd_backup_complete: True
 
-  - name: Remove unused Docker images for Docker 1.10+ migration
-    shell: "docker rmi `docker images -aq`"
-    # Will fail on images still in use:
-    failed_when: false
-    when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool
+  - name: Display location of etcd backup
+    debug:
+      msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}"
 
-  - name: Check Docker image count
-    shell: "docker images -aq | wc -l"
-    register: docker_image_count
-    when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool
 
-  - debug: var=docker_image_count.stdout
-    when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool
+- name: Gate on etcd backup
+  hosts: localhost
+  connection: local
+  become: no
+  tasks:
+  - set_fact:
+      etcd_backup_completed: "{{ hostvars
+                                 | oo_select_keys(groups.etcd_hosts_to_backup)
+                                 | oo_collect('inventory_hostname', {'etcd_backup_complete': true}) }}"
+  - set_fact:
+      etcd_backup_failed: "{{ groups.etcd_hosts_to_backup | difference(etcd_backup_completed) }}"
+  - fail:
+      msg: "Upgrade cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}"
+    when: etcd_backup_failed | length > 0
 
-###############################################################################
-# Upgrade Masters
-###############################################################################
 - name: Upgrade master packages
   hosts: oo_masters_to_config
   handlers:
@@ -57,7 +111,6 @@
 # Create service signer cert when missing. Service signer certificate
 # is added to master config in the master config hook for v3_3.
 - include: create_service_signer_cert.yml
-  when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool)
 
 - name: Upgrade master config and systemd units
   hosts: oo_masters_to_config
@@ -143,9 +196,9 @@
     origin_reconcile_bindings: "{{ deployment_type == 'origin' and openshift_version | version_compare('1.0.6', '>') }}"
     ent_reconcile_bindings: true
     openshift_docker_hosted_registry_network: "{{ hostvars[groups.oo_first_master.0].openshift.common.portal_net }}"
-    # Similar to pre.yml, we don't want to upgrade docker during the openshift_cli role,
-    # it will be updated when we perform node upgrade.
-    docker_protect_installed_version: True
+    # Another spot where we assume docker is running and do not want to accidentally trigger an unsafe
+    # restart.
+    skip_docker_role: True
   tasks:
   - name: Verifying the correct commandline tools are available
     shell: grep {{ verify_upgrade_version }} {{ openshift.common.admin_binary}}
@@ -177,71 +230,6 @@
   - set_fact:
       reconcile_complete: True
 
-###############################################################################
-# Upgrade Nodes
-###############################################################################
-
-# Here we handle all tasks that might require a node evac. (upgrading docker, and the node service)
-- name: Perform upgrades that may require node evacuation
-  hosts: oo_masters_to_config:oo_etcd_to_config:oo_nodes_to_config
-  serial: 1
-  any_errors_fatal: true
-  roles:
-  - openshift_facts
-  handlers:
-  - include: ../../../../roles/openshift_node/handlers/main.yml
-    static: yes
-  tasks:
-  # TODO: To better handle re-trying failed upgrades, it would be nice to check if the node
-  # or docker actually needs an upgrade before proceeding. Perhaps best to save this until
-  # we merge upgrade functionality into the base roles and a normal config.yml playbook run.
-  - name: Determine if node is currently scheduleable
-    command: >
-      {{ openshift.common.client_binary }} get node {{ openshift.node.nodename }} -o json
-    register: node_output
-    delegate_to: "{{ groups.oo_first_master.0 }}"
-    changed_when: false
-    when: inventory_hostname in groups.oo_nodes_to_config
-
-  - set_fact:
-      was_schedulable: "{{ 'unschedulable' not in (node_output.stdout | from_json).spec }}"
-    when: inventory_hostname in groups.oo_nodes_to_config
-
-  - name: Mark unschedulable if host is a node
-    command: >
-      {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename }} --schedulable=false
-    delegate_to: "{{ groups.oo_first_master.0 }}"
-    when: inventory_hostname in groups.oo_nodes_to_config
-
-  - name: Evacuate Node for Kubelet upgrade
-    command: >
-      {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename }} --evacuate --force
-    delegate_to: "{{ groups.oo_first_master.0 }}"
-    when: inventory_hostname in groups.oo_nodes_to_config
-
-  - include: docker/upgrade.yml
-    when: l_docker_upgrade is defined and l_docker_upgrade | bool and not openshift.common.is_atomic | bool
-  - include: "{{ node_config_hook }}"
-    when: node_config_hook is defined and inventory_hostname in groups.oo_nodes_to_config
-
-  - include: rpm_upgrade.yml
-    vars:
-       component: "node"
-       openshift_version: "{{ openshift_pkg_version | default('') }}"
-    when: inventory_hostname in groups.oo_nodes_to_config and not openshift.common.is_containerized | bool
-
-  - include: containerized_node_upgrade.yml
-    when: inventory_hostname in groups.oo_nodes_to_config and openshift.common.is_containerized | bool
-
-  - meta: flush_handlers
-
-  - name: Set node schedulability
-    command: >
-      {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename }} --schedulable=true
-    delegate_to: "{{ groups.oo_first_master.0 }}"
-    when: inventory_hostname in groups.oo_nodes_to_config and was_schedulable | bool
-
-
 ##############################################################################
 # Gate on reconcile
 ##############################################################################
@@ -259,3 +247,13 @@
   - fail:
       msg: "Upgrade cannot continue. The following masters did not finish reconciling: {{ reconcile_failed | join(',') }}"
     when: reconcile_failed | length > 0
+
+- name: Upgrade Docker on dedicated containerized etcd hosts
+  hosts: oo_etcd_to_config:!oo_nodes_to_upgrade
+  serial: 1
+  any_errors_fatal: true
+  roles:
+  - openshift_facts
+  tasks:
+  - include: docker/upgrade.yml
+    when: l_docker_upgrade is defined and l_docker_upgrade | bool and not openshift.common.is_atomic | bool

+ 75 - 0
playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml

@@ -0,0 +1,75 @@
+---
+- name: Evacuate and upgrade nodes
+  hosts: oo_nodes_to_upgrade
+  # This var must be set with -e on invocation, as it is not a per-host inventory var
+  # and is evaluated early. Values such as "20%" can also be used.
+  serial: "{{ openshift_upgrade_nodes_serial | default(1) }}"
+  any_errors_fatal: true
+  roles:
+  - openshift_facts
+  - docker
+  handlers:
+  - include: ../../../../roles/openshift_node/handlers/main.yml
+    static: yes
+  pre_tasks:
+  # TODO: To better handle re-trying failed upgrades, it would be nice to check if the node
+  # or docker actually needs an upgrade before proceeding. Perhaps best to save this until
+  # we merge upgrade functionality into the base roles and a normal config.yml playbook run.
+  - name: Determine if node is currently scheduleable
+    command: >
+      {{ openshift.common.client_binary }} get node {{ openshift.node.nodename | lower }} -o json
+    register: node_output
+    delegate_to: "{{ groups.oo_first_master.0 }}"
+    changed_when: false
+    when: inventory_hostname in groups.oo_nodes_to_upgrade
+
+  - set_fact:
+      was_schedulable: "{{ 'unschedulable' not in (node_output.stdout | from_json).spec }}"
+    when: inventory_hostname in groups.oo_nodes_to_upgrade
+
+  - name: Mark unschedulable if host is a node
+    command: >
+      {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename | lower }} --schedulable=false
+    delegate_to: "{{ groups.oo_first_master.0 }}"
+    when: inventory_hostname in groups.oo_nodes_to_upgrade
+    # NOTE: There is a transient "object has been modified" error here, allow a couple
+    # retries for a more reliable upgrade.
+    register: node_unsched
+    until: node_unsched.rc == 0
+    retries: 3
+    delay: 1
+
+  - name: Evacuate Node for Kubelet upgrade
+    command: >
+      {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename | lower }} --evacuate --force
+    delegate_to: "{{ groups.oo_first_master.0 }}"
+    when: inventory_hostname in groups.oo_nodes_to_upgrade
+  tasks:
+  - include: docker/upgrade.yml
+    when: l_docker_upgrade is defined and l_docker_upgrade | bool and not openshift.common.is_atomic | bool
+
+  - include: "{{ node_config_hook }}"
+    when: node_config_hook is defined and inventory_hostname in groups.oo_nodes_to_upgrade
+
+  - include: rpm_upgrade.yml
+    vars:
+       component: "node"
+       openshift_version: "{{ openshift_pkg_version | default('') }}"
+    when: inventory_hostname in groups.oo_nodes_to_upgrade and not openshift.common.is_containerized | bool
+
+  - include: containerized_node_upgrade.yml
+    when: inventory_hostname in groups.oo_nodes_to_upgrade and openshift.common.is_containerized | bool
+
+  - meta: flush_handlers
+
+  - name: Set node schedulability
+    command: >
+      {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename | lower }} --schedulable=true
+    delegate_to: "{{ groups.oo_first_master.0 }}"
+    when: inventory_hostname in groups.oo_nodes_to_upgrade and was_schedulable | bool
+    register: node_sched
+    until: node_sched.rc == 0
+    retries: 3
+    delay: 1
+
+

+ 0 - 1
roles/docker/defaults/main.yml

@@ -1,2 +1 @@
 ---
-docker_protect_installed_version: False

+ 4 - 4
roles/docker/tasks/main.yml

@@ -11,7 +11,7 @@
 - name: Error out if Docker pre-installed but too old
   fail:
     msg: "Docker {{ curr_docker_version.stdout }} is installed, but >= 1.9.1 is required."
-  when: not curr_docker_version | skipped and curr_docker_version.stdout != '' and curr_docker_version.stdout | version_compare('1.9.1', '<') and not docker_version is defined and not docker_protect_installed_version | bool
+  when: not curr_docker_version | skipped and curr_docker_version.stdout != '' and curr_docker_version.stdout | version_compare('1.9.1', '<') and not docker_version is defined
 
 - name: Error out if requested Docker is too old
   fail:
@@ -31,19 +31,19 @@
 - name: Fail if Docker version requested but downgrade is required
   fail:
     msg: "Docker {{ curr_docker_version.stdout }} is installed, but version {{ docker_version }} was requested."
-  when: not curr_docker_version | skipped and curr_docker_version.stdout != '' and docker_version is defined and curr_docker_version.stdout | version_compare(docker_version, '>') and not docker_protect_installed_version | bool
+  when: not curr_docker_version | skipped and curr_docker_version.stdout != '' and docker_version is defined and curr_docker_version.stdout | version_compare(docker_version, '>')
 
 # This involves an extremely slow migration process, users should instead run the
 # Docker 1.10 upgrade playbook to accomplish this.
 - name: Error out if attempting to upgrade Docker across the 1.10 boundary
   fail:
     msg: "Cannot upgrade Docker to >= 1.10, please upgrade or remove Docker manually, or use the Docker upgrade playbook if OpenShift is already installed."
-  when: not curr_docker_version | skipped and curr_docker_version.stdout != '' and curr_docker_version.stdout | version_compare('1.10', '<') and docker_version is defined and docker_version | version_compare('1.10', '>=') and not docker_protect_installed_version | bool
+  when: not curr_docker_version | skipped and curr_docker_version.stdout != '' and curr_docker_version.stdout | version_compare('1.10', '<') and docker_version is defined and docker_version | version_compare('1.10', '>=')
 
 # Make sure Docker is installed, but does not update a running version.
 # Docker upgrades are handled by a separate playbook.
 - name: Install Docker
-  action: "{{ ansible_pkg_mgr }} name=docker{{ '-' + docker_version if docker_version is defined and not docker_protect_installed_version | bool else '' }} state=present"
+  action: "{{ ansible_pkg_mgr }} name=docker{{ '-' + docker_version if docker_version is defined else '' }} state=present"
   when: not openshift.common.is_atomic | bool
 
 - name: Start the Docker service

+ 1 - 0
roles/openshift_cli/meta/main.yml

@@ -13,5 +13,6 @@ galaxy_info:
   - cloud
 dependencies:
 - role: openshift_docker
+  when: not skip_docker_role | default(False) | bool
 - role: openshift_common
 - role: openshift_cli_facts

+ 1 - 1
roles/openshift_version/meta/main.yml

@@ -15,4 +15,4 @@ dependencies:
 - role: openshift_repos
 - role: openshift_docker_facts
 - role: docker
-  when: openshift.common.is_containerized | default(False) | bool
+  when: openshift.common.is_containerized | default(False) | bool and not skip_docker_role | default(False) | bool