Browse Source

Docker 1.10 Upgrade

Adds a separate playbook for Docker 1.10 upgrade that can be run
standalone on a pre-existing 3.2 cluster. The upgrade will take each
node out of rotation, and remove *all* containers and images on it, as
this is reportedly faster and more storage efficient than performing the
in place 1.10 upgrade.

This process is integrated into the 3.1 to 3.2 upgrade process.

Normal config playbooks now become 3.2 only, and require Docker 1.10.
Users of older environments will have to use an appropriate
openshift-ansible version.

Config playbooks no longer are in the business of upgrading or
downgrading docker.
Devan Goodwin 8 years ago
parent
commit
0c31d72be3

+ 105 - 0
playbooks/byo/openshift-cluster/upgrades/docker/docker_upgrade.yml

@@ -0,0 +1,105 @@
+
+- name: Check for appropriate Docker versions for 1.9.x to 1.10.x upgrade
+  hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config
+  roles:
+  - openshift_facts
+  tasks:
+  - name: Determine available Docker version
+    script: ../../../../common/openshift-cluster/upgrades/files/rpm_versions.sh docker
+    register: g_docker_version_result
+    when: not openshift.common.is_atomic | bool
+
+  - name: Check if Docker is installed
+    command: rpm -q docker
+    register: pkg_check
+    failed_when: pkg_check.rc > 1
+    changed_when: no
+    when: not openshift.common.is_atomic | bool
+
+  - set_fact:
+      g_docker_version: "{{ g_docker_version_result.stdout | from_yaml }}"
+    when: not openshift.common.is_atomic | bool
+
+  - name: Set fact if docker requires an upgrade
+    set_fact:
+      docker_upgrade: true
+    when: not openshift.common.is_atomic | bool and pkg_check.rc == 0 and g_docker_version.curr_version | version_compare('1.10','<')
+
+  - fail:
+      msg: This playbook requires access to Docker 1.10 or later
+    when: g_docker_version.avail_version | default(g_docker_version.curr_version, true) | version_compare('1.10','<')
+
+# If a node fails, halt everything, the admin will need to clean up and we
+# don't want to carry on, potentially taking out every node. The playbook can safely be re-run
+# and will not take any action on a node already running 1.10+.
+- name: Evacuate and upgrade nodes
+  hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config
+  serial: 1
+  any_errors_fatal: true
+  tasks:
+  - debug: var=docker_upgrade
+
+  - name: Prepare for Node evacuation
+    command: >
+      {{ openshift.common.admin_binary }} manage-node {{ openshift.common.hostname | lower }} --schedulable=false
+    delegate_to: "{{ groups.oo_first_master.0 }}"
+    when: docker_upgrade is defined and docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_config
+
+# TODO: skip all node evac stuff for non-nodes (i.e. separate containerized etcd hosts)
+  - name: Evacuate Node for Kubelet upgrade
+    command: >
+      {{ openshift.common.admin_binary }} manage-node {{ openshift.common.hostname | lower }} --evacuate --force
+    delegate_to: "{{ groups.oo_first_master.0 }}"
+    when: docker_upgrade is defined and docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_config
+
+  - name: Stop containerized services
+    service: name={{ item }} state=stopped
+    with_items:
+      - "{{ openshift.common.service_type }}-master"
+      - "{{ openshift.common.service_type }}-master-api"
+      - "{{ openshift.common.service_type }}-master-controllers"
+      - "{{ openshift.common.service_type }}-node"
+      - etcd
+      - openvswitch
+    failed_when: false
+    when: docker_upgrade is defined and docker_upgrade | bool and openshift.common.is_containerized | bool
+
+  - name: Remove all containers and images
+    script: files/nuke_images.sh docker
+    register: nuke_images_result
+    when: docker_upgrade is defined and docker_upgrade | bool
+
+  - name: Upgrade Docker
+    command: "{{ ansible_pkg_mgr}} update -y docker"
+    register: docker_upgrade_result
+    when: docker_upgrade is defined and docker_upgrade | bool
+
+  - name: Restart containerized services
+    service: name={{ item }} state=started
+    with_items:
+      - etcd
+      - openvswitch
+      - "{{ openshift.common.service_type }}-master"
+      - "{{ openshift.common.service_type }}-master-api"
+      - "{{ openshift.common.service_type }}-master-controllers"
+      - "{{ openshift.common.service_type }}-node"
+    failed_when: false
+    when: docker_upgrade is defined and docker_upgrade | bool and openshift.common.is_containerized | bool
+
+  - name: Wait for master API to come back online
+    become: no
+    local_action:
+      module: wait_for
+        host="{{ inventory_hostname }}"
+        state=started
+        delay=10
+        port="{{ openshift.master.api_port }}"
+    when: docker_upgrade is defined and docker_upgrade | bool and inventory_hostname in groups.oo_masters_to_config
+
+  - name: Set node schedulability
+    command: >
+      {{ openshift.common.admin_binary }} manage-node {{ openshift.common.hostname | lower }} --schedulable=true
+    delegate_to: "{{ groups.oo_first_master.0 }}"
+    when: openshift.node.schedulable | bool
+    when: docker_upgrade is defined and docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_config and openshift.node.schedulable | bool
+

+ 23 - 0
playbooks/byo/openshift-cluster/upgrades/docker/files/nuke_images.sh

@@ -0,0 +1,23 @@
+#!/bin/bash
+
+# Stop any running containers
+running_container_count=`docker ps -q | wc -l`
+if test $running_container_count -gt 0
+then
+    docker stop $(docker ps -q)
+fi
+
+# Delete all containers
+container_count=`docker ps -a -q | wc -l`
+if test $container_count -gt 0
+then
+    docker rm -f -v $(docker ps -a -q)
+fi
+
+# Delete all images (forcefully)
+image_count=`docker images -q | wc -l`
+if test $image_count -gt 0
+then
+    # Taken from: https://gist.github.com/brianclements/f72b2de8e307c7b56689#gistcomment-1443144
+    docker rmi $(docker images | grep "$2/\|/$2 \| $2 \|$2 \|$2-\|$2_" | awk '{print $1 ":" $2}') 2>/dev/null || echo "No images matching \"$2\" left to purge."
+fi

+ 1 - 0
playbooks/byo/openshift-cluster/upgrades/docker/roles

@@ -0,0 +1 @@
+../../../../../roles

+ 29 - 0
playbooks/byo/openshift-cluster/upgrades/docker/upgrade.yml

@@ -0,0 +1,29 @@
+# Playbook to upgrade Docker to the max allowable version for an OpenShift cluster.
+#
+# Currently only supports upgrading 1.9.x to >= 1.10.x.
+- hosts: localhost
+  connection: local
+  become: no
+  gather_facts: no
+  tasks:
+  - include_vars: ../../cluster_hosts.yml
+  - add_host:
+      name: "{{ item }}"
+      groups: l_oo_all_hosts
+    with_items: g_all_hosts | default([])
+    changed_when: false
+
+- hosts: l_oo_all_hosts
+  gather_facts: no
+  tasks:
+  - include_vars: ../../cluster_hosts.yml
+
+- include: ../../../../common/openshift-cluster/evaluate_groups.yml
+  vars:
+    # Do not allow adding hosts during upgrade.
+    g_new_master_hosts: []
+    g_new_node_hosts: []
+    openshift_cluster_id: "{{ cluster_id | default('default') }}"
+    openshift_deployment_type: "{{ deployment_type }}"
+
+- include: docker_upgrade.yml

+ 0 - 4
playbooks/byo/openshift-cluster/upgrades/v3_1_to_v3_2/upgrade.yml

@@ -4,10 +4,6 @@
   become: no
   gather_facts: no
   tasks:
-  - name: Verify Ansible version is greater than or equal to 1.9.4 and less than 2.0
-    fail:
-      msg: "Unsupported ansible version: {{ ansible_version }} found."
-    when: ansible_version.full | version_compare('1.9.4', 'lt') or ansible_version.full | version_compare('2.0', 'ge')
   - include_vars: ../../../../byo/openshift-cluster/cluster_hosts.yml
   - add_host:
       name: "{{ item }}"

+ 0 - 15
playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/docker_upgrade.yml

@@ -1,15 +0,0 @@
-- name: Check if Docker is installed
-  command: rpm -q docker
-  register: pkg_check
-  failed_when: pkg_check.rc > 1
-  changed_when: no
-
-- name: Upgrade Docker
-  command: "{{ ansible_pkg_mgr}} update -y docker"
-  when: pkg_check.rc == 0 and g_docker_version.curr_version | version_compare('1.9','<')
-  register: docker_upgrade
-
-- name: Restart Docker
-  command: systemctl restart docker
-  when: docker_upgrade | changed
-

+ 3 - 3
playbooks/common/openshift-cluster/upgrades/v3_1_to_v3_2/upgrade.yml

@@ -3,13 +3,13 @@
 # The restart playbook should be run after this playbook completes.
 ###############################################################################
 
-- name: Upgrade docker
+- include: ../../../../byo/openshift-cluster/upgrades/docker/docker_upgrade.yml
+
+- name: Update Docker facts
   hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config
   roles:
   - openshift_facts
   tasks:
-  - include: docker_upgrade.yml
-    when: not openshift.common.is_atomic | bool
   - name: Set post docker install facts
     openshift_facts:
       role: "{{ item.role }}"

+ 30 - 29
roles/docker/tasks/main.yml

@@ -1,41 +1,42 @@
 ---
-# tasks file for docker
-
-- name: Get current installed version if docker_version is specified
+# Going forward we require Docker 1.10 or greater. If the user has a lesser version installed they must run a separate upgrade process.
+- name: Get current installed Docker version
   command: "{{ repoquery_cmd }} --installed --qf '%{version}' docker"
-  when:  not openshift.common.is_atomic | bool and docker_version != ''
-  register: docker_version_result
+  when:  not openshift.common.is_atomic | bool
+  register: curr_docker_version
   changed_when: false
 
-- stat: path=/etc/sysconfig/docker-storage
-  register: docker_storage_check
+# TODO: The use of upgrading var will be removed in the coming upgrade refactor. This is a temporary
+# fix to wory around the fact that right now, this role is called during upgrade, before we're
+# ready to upgrade Docker.
+- name: Fail if Docker upgrade is required
+  fail:
+    msg: "Docker {{ curr_docker_version.stdout }} must be upgraded to Docker 1.10 or greater"
+  when: not upgrading | bool and not curr_docker_version | skipped and curr_docker_version.stdout | default('0.0', True) | version_compare('1.10', '<')
 
-- name: Remove deferred deletion for downgrades from 1.9
+- name: Get latest available version of Docker
   command: >
-    sed -i 's/--storage-opt dm.use_deferred_deletion=true//' /etc/sysconfig/docker-storage
-  when: docker_storage_check.stat.exists | bool and not docker_version_result | skipped and docker_version_result.stdout | default('0.0', True) | version_compare('1.9', '>=') and docker_version | version_compare('1.9', '<')
-
-- name: Downgrade docker if necessary
-  command: "{{ ansible_pkg_mgr }} swap -y docker-* docker-*{{ docker_version }}"
-  register: docker_downgrade_result
-  when: not docker_version_result | skipped and docker_version_result.stdout | default('0.0', True) | version_compare(docker_version, 'gt')
+    {{ repoquery_cmd }} --qf '%{version}' "docker"
+  register: avail_docker_version
+  failed_when: false
+  changed_when: false
+  when: not openshift.common.is_atomic | bool
 
-- name: Install docker
-  action: "{{ ansible_pkg_mgr }} name=docker{{ '-' + docker_version if docker_version is defined and docker_version != '' else '' }} state=present"
-  when: not openshift.common.is_atomic | bool and docker_downgrade_result | skipped
+- name: Verify Docker >= 1.10 is available
+  fail:
+    msg: "Docker {{ avail_docker_version.stdout }} is available, but 1.10 or greater is required"
+  when: not avail_docker_version | skipped and avail_docker_version.stdout | default('0.0', True) | version_compare('1.10', '<')
 
-# If docker were enabled and started before we downgraded it may have entered a
-# failed state. Check for that and clear it if necessary.
-- name: Check that docker hasn't entered failed state
-  command: systemctl show docker
-  register: docker_state
-  changed_when: False
+- stat: path=/etc/sysconfig/docker-storage
+  register: docker_storage_check
 
-- name: Reset docker service state
-  command: systemctl reset-failed docker.service
-  when: " 'ActiveState=failed' in docker_state.stdout "
+# Make sure Docker is installed, but does not update a running version.
+# Docker upgrades are handled by a separate playbook.
+- name: Install Docker
+  action: "{{ ansible_pkg_mgr }} name=docker state=present"
+  when: not openshift.common.is_atomic | bool
 
-- name: Start the docker service
+- name: Start the Docker service
   service:
     name: docker
     enabled: yes
@@ -86,7 +87,7 @@
   notify:
     - restart docker
 
-- name: Set various docker options
+- name: Set various Docker options
   lineinfile:
     dest: /etc/sysconfig/docker
     regexp: '^OPTIONS=.*$'

+ 6 - 6
roles/etcd/tasks/main.yml

@@ -28,18 +28,18 @@
     state: directory
     mode: 0700
 
-- name: Check for etcd service presence
-  command: systemctl show etcd.service
-  register: etcd_show
-  changed_when: false
-
 - name: Disable system etcd when containerized
-  when: etcd_is_containerized | bool and 'LoadState=not-found' not in etcd_show.stdout
+  when: etcd_is_containerized | bool
   service:
     name: etcd
     state: stopped
     enabled: no
 
+- name: Check for etcd service presence
+  command: systemctl show etcd.service
+  register: etcd_show
+  changed_when: false
+
 - name: Mask system etcd when containerized
   when: etcd_is_containerized | bool and 'LoadState=not-found' not in etcd_show.stdout
   command: systemctl mask etcd

+ 0 - 11
roles/openshift_docker_facts/tasks/main.yml

@@ -57,14 +57,3 @@
     l_common_version: "{{ common_version.stdout | default('0.0', True) }}"
   when: not openshift.common.is_containerized | bool
 
-- name: Set docker version to be installed
-  set_fact:
-    docker_version: "{{ '1.8.2' }}"
-  when: " ( l_common_version | version_compare('3.2','<') and openshift.common.service_type in ['openshift', 'atomic-openshift'] ) or
-          ( l_common_version | version_compare('1.1.4','<') and openshift.common.service_type == 'origin' )"
-
-- name: Set docker version to be installed
-  set_fact:
-    docker_version: "{{ '1.9.1' }}"
-  when: " ( l_common_version | version_compare('3.2','>') and openshift.common.service_type == 'atomic-openshift' ) or
-          ( l_common_version | version_compare('1.2','>') and openshift.common.service_type == 'origin' )"

+ 2 - 7
roles/openshift_master/tasks/main.yml

@@ -164,14 +164,9 @@
   register: start_result
   notify: Verify API Server
 
-- name: Check for non-HA master service presence
-  command: systemctl show {{ openshift.common.service_type }}-master.service
-  register: master_svc_show
-  changed_when: false
-
-- name: Stop and disable non-HA master when running HA
+- name: Stop and disable non HA master when running HA
   service: name={{ openshift.common.service_type }}-master enabled=no state=stopped
-  when: openshift_master_ha | bool and 'LoadState=not-found' not in master_svc_show.stdout
+  when: openshift_master_ha | bool
 
 - set_fact:
     master_service_status_changed: "{{ start_result | changed }}"