Browse Source

Upgrade to 3.10 with static pods

Before upgrading the control plane, ensure that bootstrapping and other
cluster level operations like installing openshift-sdn and the config
sync plugin have completed. Then upgrade the masters to use
bootstrapping mode one-by-one and have the kubelet take over running the
master components as static pods.

When transitioning each node, move the existing node-config.yaml to
bootstrap-node-config.yaml to allow node config sync to take over if
necessary. Also ensure that the node group config is properly configured
for bootstrapping as v3.9 did not always ensure that config was correct.
Clayton Coleman 7 năm trước cách đây
mục cha
commit
5d5d25bf17
60 tập tin đã thay đổi với 765 bổ sung301 xóa
  1. 0 7
      inventory/dynamic/gcp/group_vars/all/00_defaults.yml
  2. 2 0
      playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
  3. 25 0
      playbooks/common/openshift-cluster/upgrades/v3_10/label_nodes.yml
  4. 47 0
      playbooks/common/openshift-cluster/upgrades/v3_10/upgrade_control_plane.yml
  5. 14 0
      playbooks/gcp/openshift-cluster/upgrade.yml
  6. 1 1
      playbooks/init/basic_facts.yml
  7. 1 0
      playbooks/init/cluster_facts.yml
  8. 13 5
      playbooks/init/evaluate_groups.yml
  9. 1 0
      playbooks/openshift-etcd/private/upgrade_backup.yml
  10. 1 0
      playbooks/openshift-master/private/additional_config.yml
  11. 24 2
      playbooks/openshift-master/private/config.yml
  12. 7 0
      playbooks/openshift-master/private/enable_bootstrap.yml
  13. 0 12
      playbooks/openshift-master/private/tasks/enable_bootstrap.yml
  14. 10 0
      playbooks/openshift-master/private/tasks/enable_bootstrap_config.yml
  15. 6 43
      playbooks/openshift-master/private/upgrade.yml
  16. 1 1
      playbooks/openshift-node/private/additional_config.yml
  17. 22 18
      playbooks/openshift-node/private/bootstrap.yml
  18. 1 1
      playbooks/openshift-node/private/certificates-backup.yml
  19. 1 2
      playbooks/openshift-node/private/certificates.yml
  20. 1 1
      playbooks/openshift-node/private/clean_image.yml
  21. 7 1
      playbooks/openshift-node/private/config.yml
  22. 18 0
      playbooks/openshift-node/private/configure_bootstrap.yml
  23. 1 1
      playbooks/openshift-node/private/configure_nodes.yml
  24. 7 0
      playbooks/openshift-node/private/disable_excluders.yml
  25. 1 1
      playbooks/openshift-node/private/enable_excluders.yml
  26. 9 2
      playbooks/openshift-node/private/image_prep.yml
  27. 0 18
      playbooks/openshift-node/private/join.yml
  28. 1 1
      playbooks/openshift-node/private/manage_node.yml
  29. 0 24
      playbooks/openshift-node/private/setup.yml
  30. 0 23
      playbooks/openshift-node/private/setup_bootstrap.yml
  31. 5 4
      roles/etcd/defaults/main.yaml
  32. 8 1
      roles/etcd/tasks/backup/backup.yml
  33. 0 6
      roles/etcd/tasks/certificates/fetch_server_certificates_from_ca.yml
  34. 26 0
      roles/etcd/tasks/upgrade/upgrade_static.yml
  35. 4 0
      roles/etcd/tasks/upgrade_image.yml
  36. 4 0
      roles/etcd/tasks/upgrade_rpm.yml
  37. 5 0
      roles/lib_utils/filter_plugins/openshift_master.py
  38. 0 3
      roles/openshift_bootstrap_autoapprover/tasks/main.yml
  39. 26 0
      roles/openshift_control_plane/files/scripts/docker/master-exec
  40. 8 1
      roles/openshift_control_plane/tasks/generate_session_secrets.yml
  41. 12 0
      roles/openshift_control_plane/tasks/main.yml
  42. 0 12
      roles/openshift_control_plane/tasks/static.yml
  43. 76 4
      roles/openshift_control_plane/tasks/upgrade.yml
  44. 16 8
      roles/openshift_gcp/tasks/setup_scale_group_facts.yml
  45. 1 1
      roles/openshift_gcp/templates/openshift-bootstrap-update.j2
  46. 12 12
      roles/openshift_manage_node/tasks/set_default_node_role.yml
  47. 1 2
      roles/openshift_node/tasks/config.yml
  48. 4 1
      roles/openshift_node/tasks/node_system_container.yml
  49. 11 3
      roles/openshift_node/tasks/upgrade.yml
  50. 112 0
      roles/openshift_node/tasks/upgrade/bootstrap_changes.yml
  51. 34 37
      roles/openshift_node/tasks/upgrade/config_changes.yml
  52. 0 3
      roles/openshift_node/tasks/upgrade/stop_services.yml
  53. 2 0
      roles/openshift_node_group/defaults/main.yml
  54. 12 0
      roles/openshift_node_group/files/sync-policy.yaml
  55. 56 23
      roles/openshift_node_group/files/sync.yaml
  56. 7 6
      roles/openshift_node_group/tasks/bootstrap_config.yml
  57. 29 0
      roles/openshift_node_group/tasks/upgrade.yml
  58. 0 1
      roles/openshift_node_group/templates/node-config.yaml.j2
  59. 20 0
      roles/openshift_sdn/files/sdn-ovs.yaml
  60. 52 9
      roles/openshift_sdn/files/sdn.yaml

+ 0 - 7
inventory/dynamic/gcp/group_vars/all/00_defaults.yml

@@ -34,10 +34,3 @@ openshift_node_sdn_mtu: 1410
 osm_cluster_network_cidr: 172.16.0.0/16
 osm_host_subnet_length: 9
 openshift_portal_net: 172.30.0.0/16
-
-# Default cluster configuration
-openshift_master_cluster_method: native
-openshift_schedulable: true
-# TODO: change to upstream conventions
-openshift_hosted_infra_selector: "role=infra"
-osm_default_node_selector: "role=app"

+ 2 - 0
playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml

@@ -57,6 +57,8 @@
   - import_role:
       name: openshift_node
       tasks_from: upgrade.yml
+    vars:
+      r_node_dynamic_config_name: "{{ openshift_node_group_name | default('node-config-compute') }}"
 
   # Run the upgrade hook prior to make the node schedulable again.
   - debug: msg="Running node upgrade hook {{ openshift_node_upgrade_hook }}"

+ 25 - 0
playbooks/common/openshift-cluster/upgrades/v3_10/label_nodes.yml

@@ -0,0 +1,25 @@
+---
+
+- name: Update all labels
+  hosts: oo_masters_to_config
+  roles:
+    - openshift_facts
+    - lib_openshift
+  tasks:
+    - import_role:
+        name: openshift_manage_node
+        tasks_from: config.yml
+      vars:
+        openshift_master_host: '{{ groups.oo_first_master.0 }}'
+
+- name: Update node labels to differentiate from (now-schedulable) masters
+  hosts: oo_first_master
+  roles:
+    - openshift_facts
+    - lib_openshift
+  tasks:
+    - import_role:
+        name: openshift_manage_node
+        tasks_from: set_default_node_role.yml
+      vars:
+        openshift_master_host: '{{ groups.oo_first_master.0 }}'

+ 47 - 0
playbooks/common/openshift-cluster/upgrades/v3_10/upgrade_control_plane.yml

@@ -15,6 +15,7 @@
   vars:
     l_upgrade_no_switch_firewall_hosts: "oo_masters_to_config:oo_etcd_to_config:oo_lb_to_config"
     l_init_fact_hosts: "oo_masters_to_config:oo_etcd_to_config:oo_lb_to_config"
+    l_base_packages_hosts: "oo_masters_to_config:oo_etcd_to_config:oo_lb_to_config"
 
 - name: Configure the upgrade target for the common upgrade tasks 3.10
   hosts: oo_masters_to_config:oo_etcd_to_config:oo_lb_to_config
@@ -45,6 +46,52 @@
   - set_fact:
       pre_upgrade_complete: True
 
+- import_playbook: label_nodes.yml
+
+# To upgrade, we need masters to be capable of signing certificates
+- hosts: oo_masters
+  serial: 1
+  tasks:
+  - name: Enable core bootstrapping components
+    include_tasks: ../../../../openshift-master/private/tasks/enable_bootstrap.yml
+  - name: Place shim commands on the masters before we begin the upgrade
+    import_role:
+      name: openshift_control_plane
+      tasks_from: static_shim
+
+# TODO: need to verify settings about the bootstrap configs
+# 1. Does network policy match the master config
+
+- name: Configure components that must be available prior to upgrade
+  hosts: oo_first_master
+  pre_tasks:
+  - name: Enable core bootstrapping components
+    include_tasks: ../../../../openshift-master/private/tasks/enable_bootstrap_config.yml
+  - name: Ensure the master bootstrap config has bootstrapping config
+    import_role:
+      name: openshift_node_group
+      tasks_from: upgrade
+  - name: Enable node configuration reconciliation
+    import_role:
+      name: openshift_node_group
+      tasks_from: sync
+  roles:
+  - role: openshift_sdn
+    when: openshift_use_openshift_sdn | default(True) | bool
+
+- name: Update master nodes
+  hosts: oo_masters
+  serial: 1
+  tasks:
+  - import_role:
+      name: openshift_node
+      tasks_from: upgrade_pre
+  - import_role:
+      name: openshift_node
+      tasks_from: upgrade
+    vars:
+      r_node_dynamic_config_name: "{{ openshift_node_group_name | default('node-config-master') }}"
+
 - import_playbook: ../upgrade_control_plane.yml
   vars:
     openshift_release: '3.10'

+ 14 - 0
playbooks/gcp/openshift-cluster/upgrade.yml

@@ -0,0 +1,14 @@
+# This playbook installs onto a provisioned cluster
+---
+- hosts: localhost
+  connection: local
+  tasks:
+  - name: place all scale groups into Ansible groups
+    include_role:
+      name: openshift_gcp
+      tasks_from: setup_scale_group_facts.yml
+    vars:
+      all_nodes: true
+
+- name: run the upgrade
+  import_playbook: ../../common/openshift-cluster/upgrades/v3_10/upgrade.yml

+ 1 - 1
playbooks/init/basic_facts.yml

@@ -35,7 +35,7 @@
     register: bootstrap_node_config_path_check
   - name: initialize_facts set fact openshift_is_bootstrapped
     set_fact:
-      openshift_is_bootstrapped: "{{ bootstrap_node_config_path_check.stat.exists }}"
+      openshift_is_bootstrapped: "{{ openshift_is_bootstrapped|default(False) or bootstrap_node_config_path_check.stat.exists }}"
 
   - name: initialize_facts set fact openshift_is_atomic and openshift_is_containerized
     set_fact:

+ 1 - 0
playbooks/init/cluster_facts.yml

@@ -45,6 +45,7 @@
       role: node
       local_facts:
         sdn_mtu: "{{ openshift_node_sdn_mtu | default(None) }}"
+        bootstrapped: "{{ openshift_is_bootstrapped }}"
 
 - name: Initialize etcd host variables
   hosts: oo_masters_to_config

+ 13 - 5
playbooks/init/evaluate_groups.yml

@@ -143,15 +143,23 @@
     with_items: "{{ g_new_node_hosts | default(g_node_hosts | default([], true), true) }}"
     changed_when: no
 
-  # Skip adding the master to oo_nodes_to_config when g_new_node_hosts is
-  - name: Add master to oo_nodes_to_config
+  - name: Evaluate oo_nodes_to_bootstrap
     add_host:
       name: "{{ item }}"
-      groups: oo_nodes_to_config
+      groups: oo_nodes_to_bootstrap
+      ansible_ssh_user: "{{ g_ssh_user | default(omit) }}"
+      ansible_become: "{{ g_sudo | default(omit) }}"
+    with_items: "{{ g_new_node_hosts | default(g_node_hosts | default([], true), true) }}"
+    changed_when: no
+    when: hostvars[item].openshift_node_bootstrap | default(True) | bool
+
+  - name: Add masters to oo_nodes_to_bootstrap
+    add_host:
+      name: "{{ item }}"
+      groups: oo_nodes_to_bootstrap
       ansible_ssh_user: "{{ g_ssh_user | default(omit) }}"
       ansible_become: "{{ g_sudo | default(omit) }}"
-    with_items: "{{ g_master_hosts | default([]) }}"
-    when: g_nodeonmaster | default(false) | bool and not g_new_node_hosts | default(false) | bool
+    with_items: "{{ groups.oo_masters | default([]) }}"
     changed_when: no
 
   - name: Evaluate oo_lb_to_config

+ 1 - 0
playbooks/openshift-etcd/private/upgrade_backup.yml

@@ -8,6 +8,7 @@
     vars:
       r_etcd_common_backup_tag: "{{ etcd_backup_tag }}"
       r_etcd_common_backup_sufix_name: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}"
+      r_etcd_common_skip_command_shim: true
 
 - name: Gate on etcd backup
   hosts: localhost

+ 1 - 0
playbooks/openshift-master/private/additional_config.yml

@@ -21,6 +21,7 @@
   # TODO: this is currently required in order to schedule pods onto the masters, but
   #   should be moved into components once nodes are using dynamic config
   - role: openshift_sdn
+    when: openshift_use_openshift_sdn | default(True) | bool
   - role: openshift_project_request_template
     when: openshift_project_request_template_manage
   - role: openshift_examples

+ 24 - 2
playbooks/openshift-master/private/config.yml

@@ -71,6 +71,10 @@
         console_url: "{{ openshift_master_console_url | default(None) }}"
         console_use_ssl: "{{ openshift_master_console_use_ssl | default(None) }}"
         public_console_url: "{{ openshift_master_public_console_url | default(None) }}"
+  - openshift_facts:
+      role: node
+      local_facts:
+        bootstrapped: true
 
 - name: Inspect state of first master config settings
   hosts: oo_first_master
@@ -79,7 +83,7 @@
   tasks:
   - import_role:
       name: openshift_control_plane
-      tasks_from: check_existing_config.yml
+      tasks_from: check_existing_config
 
 - name: Configure masters
   hosts: oo_masters_to_config
@@ -96,7 +100,8 @@
       name: openshift_node_group
       tasks_from: bootstrap_config
     vars:
-      r_node_bootstrap_config_name: "{{ openshift_node_group_master | default('node-config-master') }}"
+      r_node_dynamic_config_name: "{{ openshift_node_group_name | default('node-config-master') }}"
+      r_node_dynamic_config_force: True
 
   roles:
   - role: openshift_master_facts
@@ -119,6 +124,7 @@
     when: openshift_use_calico | default(false) | bool
   tasks:
   - name: Set up automatic node config reconcilation
+    run_once: True
     import_role:
       name: openshift_node_group
       tasks_from: sync
@@ -135,6 +141,22 @@
     group_by: key=oo_masters_deployment_type_{{ openshift_deployment_type }}
     changed_when: False
 
+- name: Deploy the central bootstrap configuration
+  hosts: oo_first_master
+  gather_facts: no
+  tasks:
+  - name: setup bootstrap settings
+    import_tasks: tasks/enable_bootstrap_config.yml
+
+- name: Ensure inventory labels are assigned to masters
+  hosts: oo_masters_to_config
+  gather_facts: no
+  roles:
+  - role: openshift_manage_node
+    openshift_master_host: "{{ groups.oo_first_master.0 }}"
+    openshift_manage_node_is_master: "{{ ('oo_masters_to_config' in group_names) | bool }}"
+    openshift_node_master_api_url: "{{ hostvars[groups.oo_first_master.0].openshift.master.api_url }}"
+
 - name: Re-enable excluder if it was previously enabled
   hosts: oo_masters_to_config
   gather_facts: no

+ 7 - 0
playbooks/openshift-master/private/enable_bootstrap.yml

@@ -5,3 +5,10 @@
   tasks:
   - name: include bootstrapping tasks
     import_tasks: tasks/enable_bootstrap.yml
+
+- name: Enable bootstrapping configuration
+  hosts: oo_first_master
+  gather_facts: no
+  tasks:
+  - name: include bootstrapping tasks
+    import_tasks: tasks/enable_bootstrap_config.yml

+ 0 - 12
playbooks/openshift-master/private/tasks/enable_bootstrap.yml

@@ -8,15 +8,3 @@
   import_role:
     name: openshift_control_plane
     tasks_from: bootstrap.yml
-
-- name: Setup the node group config maps
-  import_role:
-    name: openshift_node_group
-  run_once: True
-
-- name: Setup the node bootstrap auto approver
-  import_role:
-    name: openshift_bootstrap_autoapprover
-  run_once: True
-  when:
-  - openshift_master_bootstrap_auto_approve | default(False) | bool

+ 10 - 0
playbooks/openshift-master/private/tasks/enable_bootstrap_config.yml

@@ -0,0 +1,10 @@
+---
+- name: Setup the node group config maps
+  import_role:
+    name: openshift_node_group
+
+- name: Setup the node bootstrap auto approver
+  import_role:
+    name: openshift_bootstrap_autoapprover
+  when:
+  - openshift_master_bootstrap_auto_approve | default(False) | bool

+ 6 - 43
playbooks/openshift-master/private/upgrade.yml

@@ -69,7 +69,7 @@
 
   - import_role:
       name: openshift_control_plane
-      tasks_from: upgrade.yml
+      tasks_from: upgrade
 
   - name: update vsphere provider master config
     import_role:
@@ -80,9 +80,6 @@
     - openshift_cloudprovider_kind == 'vsphere'
     - openshift_version | version_compare('3.9', '>=')
 
-  - name: Setup and enable bootstrapping options
-    import_tasks: tasks/enable_bootstrap.yml
-
   # Run the upgrade hook prior to restarting services/system if defined:
   - debug: msg="Running master upgrade hook {{ openshift_master_upgrade_hook }}"
     when: openshift_master_upgrade_hook is defined
@@ -90,11 +87,10 @@
   - include_tasks: "{{ openshift_master_upgrade_hook }}"
     when: openshift_master_upgrade_hook is defined
 
-  - name: Disable master controller
-    service:
-      name: "{{ openshift_service_type }}-master-controllers"
-      enabled: false
-    when: openshift_rolling_restart_mode | default('services') == 'system'
+  - name: Lay down the static configuration
+    import_role:
+      name: openshift_control_plane
+      tasks_from: static.yml
 
   - import_tasks: tasks/restart_hosts.yml
     when: openshift_rolling_restart_mode | default('services') == 'system'
@@ -290,44 +286,11 @@
     import_role:
       name: lib_openshift
 
-  # TODO: To better handle re-trying failed upgrades, it would be nice to check if the node
-  # or docker actually needs an upgrade before proceeding. Perhaps best to save this until
-  # we merge upgrade functionality into the base roles and a normal config.yml playbook run.
-  - name: Mark node unschedulable
-    oc_adm_manage_node:
-      node: "{{ openshift.node.nodename | lower }}"
-      schedulable: False
-    delegate_to: "{{ groups.oo_first_master.0 }}"
-    retries: 10
-    delay: 5
-    register: node_unschedulable
-    until: node_unschedulable is succeeded
-
-  - name: Drain Node for Kubelet upgrade
-    command: >
-      {{ hostvars[groups.oo_first_master.0]['first_master_client_binary'] }} adm drain {{ openshift.node.nodename | lower }}
-      --config={{ openshift.common.config_base }}/master/admin.kubeconfig
-      --force --delete-local-data --ignore-daemonsets
-      --timeout={{ openshift_upgrade_nodes_drain_timeout | default(0) }}s
-    delegate_to: "{{ groups.oo_first_master.0 }}"
-    register: l_upgrade_control_plane_drain_result
-    until: not (l_upgrade_control_plane_drain_result is failed)
-    retries: "{{ 1 if ( openshift_upgrade_nodes_drain_timeout | default(0) | int ) == 0 else 0 }}"
-    delay: 5
-    failed_when:
-    - l_upgrade_control_plane_drain_result is failed
-    - openshift_upgrade_nodes_drain_timeout | default(0) | int == 0
-
   roles:
   - openshift_facts
+
   post_tasks:
   - import_role:
-      name: openshift_node
-      tasks_from: upgrade_pre.yml
-  - import_role:
-      name: openshift_node
-      tasks_from: upgrade.yml
-  - import_role:
       name: openshift_manage_node
       tasks_from: config.yml
     vars:

+ 1 - 1
playbooks/openshift-node/private/additional_config.yml

@@ -1,6 +1,6 @@
 ---
 - name: create additional node network plugin groups
-  hosts: "{{ openshift_node_scale_up_group | default('oo_nodes_to_config') }}:!oo_exclude_nodes_to_config"
+  hosts: "{{ openshift_node_scale_up_group | default('oo_nodes_to_config') }}:!oo_nodes_to_bootstrap"
   tasks:
   # Creating these node groups will prevent a ton of skipped tasks.
   # Create group for flannel nodes

+ 22 - 18
playbooks/openshift-node/private/bootstrap.yml

@@ -11,31 +11,35 @@
           status: "In Progress"
           start: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}"
 
-- import_playbook: setup_bootstrap.yml
+- name: Only target nodes that have not yet been bootstrapped
+  hosts: localhost
+  tasks:
+  - add_host:
+      name: "{{ item }}"
+      groups: oo_exclude_bootstrapped_nodes
+      ansible_ssh_user: "{{ g_ssh_user | default(omit) }}"
+      ansible_become: "{{ g_sudo | default(omit) }}"
+    with_items: "{{ groups.oo_nodes_to_bootstrap | default([]) }}"
+    changed_when: no
+    when: hostvars[item].openshift_is_bootstrapped | default(False) | bool
+
+- import_playbook: disable_excluders.yml
+  vars:
+    l_node_group: oo_nodes_to_bootstrap:!oo_exclude_bootstrapped_nodes
 
 - import_playbook: configure_nodes.yml
+  vars:
+    l_node_group: oo_nodes_to_bootstrap:!oo_exclude_bootstrapped_nodes
 
-- name: node bootstrap config
-  hosts: oo_nodes_to_config:!oo_exclude_nodes_to_config
-  tasks:
-  - import_role:
-      name: openshift_node
-      tasks_from: bootstrap
-  - import_role:
-      name: openshift_node_group
-      tasks_from: bootstrap
-  - name: Have the nodes automatically pull their configuration
-    import_role:
-      name: openshift_node_group
-      tasks_from: bootstrap_config
-    vars:
-      r_node_bootstrap_config_name: "{{ openshift_node_group_name | default('node-config-compute') }}"
-  - set_fact:
-      openshift_is_bootstrapped: True
+- import_playbook: configure_bootstrap.yml
 
 - import_playbook: enable_excluders.yml
+  vars:
+    l_node_group: oo_nodes_to_bootstrap:!oo_exclude_bootstrapped_nodes
 
 - import_playbook: clean_image.yml
+  vars:
+    l_node_group: oo_nodes_to_bootstrap:!oo_exclude_bootstrapped_nodes
 
 - name: Node Preparation Checkpoint End
   hosts: all

+ 1 - 1
playbooks/openshift-node/private/certificates-backup.yml

@@ -14,7 +14,7 @@
       warn: no
 
 - name: Redeploy node certificates
-  hosts: oo_nodes_to_config:!oo_exclude_nodes_to_config
+  hosts: oo_nodes_to_config:!oo_nodes_to_bootstrap
   pre_tasks:
   - name: Remove CA certificate
     file:

+ 1 - 2
playbooks/openshift-node/private/certificates.yml

@@ -1,8 +1,7 @@
 ---
 - name: Create OpenShift certificates for node hosts
-  hosts: oo_nodes_to_config:!oo_exclude_nodes_to_config
+  hosts: oo_nodes_to_config:!oo_nodes_to_bootstrap
   gather_facts: no
   roles:
   - role: openshift_node_certificates
     openshift_ca_host: "{{ groups.oo_first_master.0 }}"
-    when: not openshift_node_bootstrap | default(false) | bool

+ 1 - 1
playbooks/openshift-node/private/clean_image.yml

@@ -1,6 +1,6 @@
 ---
 - name: Configure nodes
-  hosts: oo_nodes_to_config:!oo_exclude_nodes_to_config
+  hosts: "{{ l_node_group }}"
   tasks:
   - name: Remove any ansible facts created during AMI creation
     file:

+ 7 - 1
playbooks/openshift-node/private/config.yml

@@ -11,17 +11,23 @@
           status: "In Progress"
           start: "{{ lookup('pipe', 'date +%Y%m%d%H%M%SZ') }}"
 
-- import_playbook: setup.yml
+- import_playbook: disable_excluders.yml
+  vars:
+    l_node_group: oo_nodes_to_config:!oo_nodes_to_bootstrap
 
 - import_playbook: certificates.yml
 
 - import_playbook: configure_nodes.yml
+  vars:
+    l_node_group: oo_nodes_to_config:!oo_nodes_to_bootstrap
 
 - import_playbook: additional_config.yml
 
 - import_playbook: manage_node.yml
 
 - import_playbook: enable_excluders.yml
+  vars:
+    l_node_group: oo_nodes_to_config:!oo_nodes_to_bootstrap
 
 - name: Node Install Checkpoint End
   hosts: all

+ 18 - 0
playbooks/openshift-node/private/configure_bootstrap.yml

@@ -0,0 +1,18 @@
+---
+- name: node bootstrap config
+  hosts: oo_nodes_to_bootstrap:!oo_exclude_bootstrapped_nodes
+  tasks:
+  - import_role:
+      name: openshift_node
+      tasks_from: bootstrap
+  - import_role:
+      name: openshift_node_group
+      tasks_from: bootstrap
+  - name: Have the nodes automatically pull their configuration
+    import_role:
+      name: openshift_node_group
+      tasks_from: bootstrap_config
+    vars:
+      r_node_dynamic_config_name: "{{ openshift_node_group_name | default('node-config-compute') }}"
+  - set_fact:
+      openshift_is_bootstrapped: True

+ 1 - 1
playbooks/openshift-node/private/configure_nodes.yml

@@ -1,6 +1,6 @@
 ---
 - name: Configure nodes
-  hosts: oo_nodes_to_config:!oo_exclude_nodes_to_config
+  hosts: "{{ l_node_group }}"
   vars:
     openshift_node_master_api_url: "{{ hostvars[groups.oo_first_master.0].openshift.master.api_url }}"
     openshift_node_first_master_ip: "{{ hostvars[groups.oo_first_master.0].openshift.common.ip }}"

+ 7 - 0
playbooks/openshift-node/private/disable_excluders.yml

@@ -0,0 +1,7 @@
+---
+- name: Disable excluders
+  hosts: "{{ l_node_group }}"
+  gather_facts: no
+  roles:
+  - role: openshift_excluder
+    r_openshift_excluder_action: disable

+ 1 - 1
playbooks/openshift-node/private/enable_excluders.yml

@@ -1,6 +1,6 @@
 ---
 - name: Re-enable excluder if it was previously enabled
-  hosts: oo_nodes_to_config:!oo_exclude_nodes_to_config
+  hosts: "{{ l_node_group }}"
   gather_facts: no
   roles:
   - role: openshift_excluder

+ 9 - 2
playbooks/openshift-node/private/image_prep.yml

@@ -9,10 +9,14 @@
     l_openshift_version_check_hosts: "all:!all"
 
 - name: run node config setup
-  import_playbook: setup.yml
+  import_playbook: disable_excluders.yml
+  vars:
+    l_node_group: oo_nodes_to_config
 
 - name: run node config
   import_playbook: configure_nodes.yml
+  vars:
+    l_node_group: oo_nodes_to_config
 
 - name: node bootstrap config
   hosts: oo_nodes_to_config
@@ -24,9 +28,12 @@
         name: openshift_node_group
         tasks_from: bootstrap.yml
 
-
 - name: Re-enable excluders
   import_playbook: enable_excluders.yml
+  vars:
+    l_node_group: oo_nodes_to_config
 
 - name: Remove any undesired artifacts from build
   import_playbook: clean_image.yml
+  vars:
+    l_node_group: oo_nodes_to_config

+ 0 - 18
playbooks/openshift-node/private/join.yml

@@ -1,22 +1,4 @@
 ---
-- name: Evaluate bootstrapped nodes
-  hosts: localhost
-  gather_facts: no
-  connection: local
-  tasks:
-  - name: Add all nodes that are bootstrapped
-    add_host:
-      name: "{{ item }}"
-      groups: oo_nodes_to_bootstrap
-      ansible_ssh_user: "{{ g_ssh_user | default(omit) }}"
-      ansible_become: "{{ g_sudo | default(omit) }}"
-    with_items: "{{ groups.oo_nodes_to_config | default([]) }}"
-    when:
-    - hostvars[item].openshift is defined
-    - hostvars[item].openshift.common is defined
-    - (hostvars[item].openshift_is_bootstrapped | bool) or (hostvars[item].openshift_node_bootstrap | default(True) | bool)
-    changed_when: False
-
 - name: Distribute bootstrap and start nodes
   hosts: oo_nodes_to_bootstrap
   gather_facts: no

+ 1 - 1
playbooks/openshift-node/private/manage_node.yml

@@ -1,6 +1,6 @@
 ---
 - name: Additional node config
-  hosts: "{{ openshift_node_scale_up_group | default('oo_nodes_to_config') }}:!oo_exclude_nodes_to_config"
+  hosts: "{{ openshift_node_scale_up_group | default('oo_nodes_to_config') }}:!oo_nodes_to_bootstrap"
   vars:
     openshift_node_master_api_url: "{{ hostvars[groups.oo_first_master.0].openshift.master.api_url }}"
   roles:

+ 0 - 24
playbooks/openshift-node/private/setup.yml

@@ -1,24 +0,0 @@
----
-- name: Evaluate node groups
-  hosts: localhost
-  connection: local
-  tasks:
-  - name: Evaluate oo_exclude_nodes_to_config as all nodes that have already been bootstrapped
-    add_host:
-      name: "{{ item }}"
-      groups: oo_exclude_nodes_to_config
-      ansible_ssh_user: "{{ g_ssh_user | default(omit) }}"
-      ansible_become: "{{ g_sudo | default(omit) }}"
-    with_items: "{{ groups.oo_nodes_to_config | default([]) }}"
-    when:
-    - hostvars[item].openshift is defined
-    - hostvars[item].openshift.common is defined
-    - hostvars[item].openshift_is_bootstrapped | bool
-    changed_when: False
-
-- name: Disable excluders
-  hosts: oo_nodes_to_config:!oo_exclude_nodes_to_config
-  gather_facts: no
-  roles:
-  - role: openshift_excluder
-    r_openshift_excluder_action: disable

+ 0 - 23
playbooks/openshift-node/private/setup_bootstrap.yml

@@ -1,23 +0,0 @@
----
-# We exclude all nodes that have already been bootstrapped or have requested not to be bootstrapped
-- name: Evaluate node groups
-  hosts: localhost
-  connection: local
-  tasks:
-  - name: Evaluate oo_exclude_nodes_to_config as all nodes that shouldn't be configured for bootstrapping
-    add_host:
-      name: "{{ item }}"
-      groups: oo_exclude_nodes_to_config
-      ansible_ssh_user: "{{ g_ssh_user | default(omit) }}"
-      ansible_become: "{{ g_sudo | default(omit) }}"
-    with_items: "{{ groups.oo_nodes_to_config | default([]) }}"
-    when:
-    - (not (hostvars[item].openshift_node_bootstrap | default(True) | bool)) or (hostvars[item].openshift_is_bootstrapped | bool)
-    changed_when: False
-
-- name: Disable excluders
-  hosts: oo_nodes_to_config:!oo_exclude_nodes_to_config
-  gather_facts: no
-  roles:
-  - role: openshift_excluder
-    r_openshift_excluder_action: disable

+ 5 - 4
roles/etcd/defaults/main.yaml

@@ -5,18 +5,19 @@ r_etcd_common_backup_sufix_name: ''
 l_is_etcd_system_container: "{{ (openshift_use_etcd_system_container | default(openshift_use_system_containers | default(false)) | bool) }}"
 
 # runc, docker, host
-r_etcd_common_etcd_runtime: "{{ 'runc' if l_is_etcd_system_container else 'docker' if openshift_is_containerized else 'host' }}"
+r_etcd_common_etcd_runtime: "{{ 'runc' if l_is_etcd_system_container else ('docker' if openshift_is_containerized else 'host') }}"
 
-osm_etcd_image: 'registry.access.redhat.com/rhel7/etcd'
+r_etcd_default_version: "3.2.15"
+osm_etcd_image: "registry.access.redhat.com/rhel7/etcd:{{ r_etcd_upgrade_version | default(r_etcd_default_version) }}"
 etcd_image_dict:
-  origin: "registry.fedoraproject.org/latest/etcd"
+  origin: "quay.io/coreos/etcd:v{{ r_etcd_upgrade_version | default(r_etcd_default_version) }}"
   openshift-enterprise: "{{ osm_etcd_image }}"
 etcd_image: "{{ etcd_image_dict[openshift_deployment_type | default('origin')] }}"
 
 # etcd run on a host => use etcdctl command directly
 # etcd run as a docker container => use docker exec
 # etcd run as a runc container => use runc exec
-r_etcd_common_etcdctl_command: "{{ 'etcdctl' if r_etcd_common_etcd_runtime == 'host' | bool else 'docker exec etcd_container etcdctl' if r_etcd_common_etcd_runtime == 'docker' else 'runc exec etcd etcdctl' }}"
+r_etcd_common_etcdctl_command: "{{ 'etcdctl' if (r_etcd_common_etcd_runtime == 'host') else ('docker exec etcd_container etcdctl' if (r_etcd_common_etcd_runtime == 'docker') else 'runc exec etcd etcdctl') }}"
 
 # etcd server vars
 etcd_conf_dir: '/etc/etcd'

+ 8 - 1
roles/etcd/tasks/backup/backup.yml

@@ -49,10 +49,17 @@
   - l_etcd_selinux_labels.rc == 0
   - "'svirt_sandbox_file_t' not in l_etcd_selinux_labels.stdout"
 
-- name: Generate etcd backup
+- name: Generate etcd backup (legacy)
   command: >
     {{ r_etcd_common_etcdctl_command }} backup --data-dir={{ l_etcd_incontainer_data_dir }}
     --backup-dir={{ l_etcd_incontainer_backup_dir }}
+  when: r_etcd_common_skip_command_shim | default(False) | bool
+
+- name: Generate etcd backup (static pod)
+  command: >
+    /usr/local/bin/master-exec etcd etcd etcdctl backup --data-dir={{ l_etcd_incontainer_data_dir }}
+    --backup-dir={{ l_etcd_incontainer_backup_dir }}
+  when: not (r_etcd_common_skip_command_shim | default(False) | bool)
 
 # According to the docs change you can simply copy snap/db
 # https://github.com/openshift/openshift-docs/commit/b38042de02d9780842dce95cfa0ef45d53b58bc6

+ 0 - 6
roles/etcd/tasks/certificates/fetch_server_certificates_from_ca.yml

@@ -180,8 +180,6 @@
   file:
     path: "{{ item }}"
     mode: 0600
-    owner: "{{ 'etcd' if not openshift_is_bootstrapped else omit }}"
-    group: "{{ 'etcd' if not openshift_is_bootstrapped else omit }}"
   when: etcd_url_scheme == 'https'
   with_items:
   - "{{ etcd_ca_file }}"
@@ -192,8 +190,6 @@
   file:
     path: "{{ item }}"
     mode: 0600
-    owner: "{{ 'etcd' if not openshift_is_bootstrapped else omit }}"
-    group: "{{ 'etcd' if not openshift_is_bootstrapped else omit }}"
   when: etcd_peer_url_scheme == 'https'
   with_items:
   - "{{ etcd_peer_ca_file }}"
@@ -204,6 +200,4 @@
   file:
     path: "{{ etcd_conf_dir }}"
     state: directory
-    owner: "{{ 'etcd' if not openshift_is_bootstrapped else omit }}"
-    group: "{{ 'etcd' if not openshift_is_bootstrapped else omit }}"
     mode: 0700

+ 26 - 0
roles/etcd/tasks/upgrade/upgrade_static.yml

@@ -0,0 +1,26 @@
+---
+# PREREQ Node service is ready to run static pods
+
+# INPUT r_etcd_upgrade_version
+- name: Verify cluster is healthy pre-upgrade
+  command: "{{ etcdctlv2 }} cluster-health"
+
+- name: Stop etcd
+  systemd:
+    name: "{{ etcd_service }}"
+    state: stopped
+
+- name: Remove old service file
+  file:
+    path: "{{ etcd_service }}"
+    state: absent
+
+- name: Configure static definition
+  import_tasks: ../static.yml
+
+- name: Verify cluster is healthy
+  command: "{{ etcdctlv2 }} cluster-health"
+  register: etcdctl
+  until: etcdctl.rc == 0
+  retries: 3
+  delay: 10

+ 4 - 0
roles/etcd/tasks/upgrade_image.yml

@@ -1,2 +1,6 @@
 ---
+- include_tasks: upgrade/upgrade_static.yml
+  when: openshift_master_bootstrap_enabled | default(False) | bool
+
 - include_tasks: upgrade/upgrade_image.yml
+  when: not openshift_master_bootstrap_enabled | default(False) | bool

+ 4 - 0
roles/etcd/tasks/upgrade_rpm.yml

@@ -1,2 +1,6 @@
 ---
+- include_tasks: upgrade/upgrade_static.yml
+  when: openshift_master_bootstrap_enabled | default(False) | bool
+
 - include_tasks: upgrade/upgrade_rpm.yml
+  when: not openshift_master_bootstrap_enabled | default(False) | bool

+ 5 - 0
roles/lib_utils/filter_plugins/openshift_master.py

@@ -488,6 +488,11 @@ class FilterModule(object):
         certs = ['admin.crt',
                  'admin.key',
                  'admin.kubeconfig',
+                 'aggregator-front-proxy.crt',
+                 'aggregator-front-proxy.key',
+                 'aggregator-front-proxy.kubeconfig',
+                 'front-proxy-ca.crt',
+                 'front-proxy-ca.key',
                  'master.kubelet-client.crt',
                  'master.kubelet-client.key',
                  'master.proxy-client.crt',

+ 0 - 3
roles/openshift_bootstrap_autoapprover/tasks/main.yml

@@ -12,7 +12,6 @@
     - "files/*.yaml"
 
 - name: Update the image tag
-  run_once: true
   yedit:
     src: "{{ mktemp.stdout }}/openshift-bootstrap-images.yaml"
     key: 'tag.from.name'
@@ -25,13 +24,11 @@
     --config={{ openshift.common.config_base }}/master/admin.kubeconfig
 
 - name: Apply the config
-  run_once: true
   command: >
     {{ openshift_client_binary }} apply -f "{{ mktemp.stdout }}"
     --config={{ openshift.common.config_base }}/master/admin.kubeconfig
 
 - name: Remove temp directory
-  run_once: true
   file:
     state: absent
     name: "{{ mktemp.stdout }}"

+ 26 - 0
roles/openshift_control_plane/files/scripts/docker/master-exec

@@ -0,0 +1,26 @@
+#!/bin/bash
+set -euo pipefail
+
+# Exec a file in the named component by component name and container name.
+# Remaining arguments are passed to the command. If no static pods have been
+# created yet this will execute on the host.
+if [[ -z "${1-}" || -z "${2-}" ]]; then
+  echo "A component name like 'api', 'etcd', or 'controllers' must be specified along with the container name within that component." 1>&2
+  exit 1
+fi
+
+# We haven't started using static pods yet, assume this command is to be direct executed
+if [[ ! -d /etc/origin/node/pods || -z "$( ls -A /etc/origin/node/pods )" ]]; then
+  exec "${@:3}"
+fi
+
+# TODO: move to cri-ctl
+# TODO: short term hack for cri-o
+
+uid=$(docker ps -l -a --filter "label=openshift.io/component=${1}" --filter "label=io.kubernetes.container.name=POD" --format '{{ .Label "io.kubernetes.pod.uid" }}')
+if [[ -z "${uid}" ]]; then
+  echo "Component ${1} is stopped or not running" 1>&2
+  exit 0
+fi
+container=$(docker ps -l -a -q --filter "label=io.kubernetes.pod.uid=${uid}" --filter "label=io.kubernetes.container.name=${2}")
+exec docker exec "${container}" "${@:3}"

+ 8 - 1
roles/openshift_control_plane/tasks/generate_session_secrets.yml

@@ -13,6 +13,13 @@
   register: l_osm_session_secrets_slurp
   when: l_osm_session_secrets_stat.stat.exists
 
+- name: slurp session secrets if defined
+  slurp:
+    src: "{{ openshift_master_session_secrets_file }}"
+  register: osm_session_secrets
+  no_log: true
+  when: l_osm_session_secrets_stat.stat.exists
+
 # lib_utils_oo_collect is a custom filter in
 # roles/lib_utils/filter_plugins/oo_filters.py
 - name: Gather existing session secrets from first master
@@ -23,7 +30,7 @@
     l_existing_osm_session: "{{ (l_osm_session_secrets_slurp.content | b64decode | from_yaml) }}"
   when:
   - l_osm_session_secrets_stat.stat.exists
-  - l_osm_session_secrets_slurp defined
+  - l_osm_session_secrets_slurp is defined
   - l_existing_osm_session.secrets is defined
   - l_existing_osm_session.secrets != ''
   - l_existing_osm_session.secrets != []

+ 12 - 0
roles/openshift_control_plane/tasks/main.yml

@@ -153,6 +153,18 @@
 
 - import_tasks: static.yml
 
+- name: Establish the default bootstrap kubeconfig for masters
+  copy:
+    remote_src: true
+    src: "/etc/origin/master/admin.kubeconfig"
+    dest: "{{ item }}"
+    mode: 0600
+  with_items:
+  # bootstrap as an admin
+  - /etc/origin/node/bootstrap.kubeconfig
+  # copy to this location to bypass initial bootstrap request
+  - /etc/origin/node/node.kubeconfig
+
 - name: Start and enable self-hosting node
   systemd:
     name: "{{ openshift_service_type }}-node"

+ 0 - 12
roles/openshift_control_plane/tasks/static.yml

@@ -49,15 +49,3 @@
     name: "{{ mktemp.stdout }}"
     state: absent
   changed_when: False
-
-- name: Establish the default bootstrap kubeconfig for masters
-  copy:
-    remote_src: true
-    src: "/etc/origin/master/admin.kubeconfig"
-    dest: "{{ item }}"
-    mode: 0600
-  with_items:
-  # bootstrap as an admin
-  - /etc/origin/node/bootstrap.kubeconfig
-  # copy to this location to bypass initial bootstrap request
-  - /etc/origin/node/node.kubeconfig

+ 76 - 4
roles/openshift_control_plane/tasks/upgrade.yml

@@ -1,6 +1,14 @@
 ---
-- import_tasks: upgrade/rpm_upgrade.yml
-  when: not openshift_is_containerized | bool
+- name: Stop control plane services
+  service:
+    name: "{{ item }}"
+    state: stopped
+  with_items:
+  - "{{ openshift_service_type }}-master-api"
+  - "{{ openshift_service_type }}-master-controllers"
+  failed_when: false
+
+- import_tasks: static_shim.yml
 
 - import_tasks: upgrade/upgrade_scheduler.yml
 
@@ -8,8 +16,6 @@
 - include_tasks: "upgrade/{{ master_config_hook }}"
   when: master_config_hook is defined
 
-- import_tasks: systemd_units.yml
-
 - import_tasks: set_loopback_context.yml
 
 - name: Check for ca-bundle.crt
@@ -37,6 +43,37 @@
     state: link
   when: ca_crt_stat.stat.isreg and not ca_bundle_stat.stat.exists
 
+- name: Find existing master sysconfig
+  find:
+    paths:
+    - /etc/sysconfig
+    patterns:
+    - openshift-master-api
+    - openshift-master-controllers
+    - origin-master-api
+    - origin-master-controllers
+  register: sysconfigs
+- when: sysconfigs|succeeded and sysconfigs.matched > 0
+  name: Migrate OPENSHIFT_DEFAULT_REGISTRY from master sysconfig to master-config.yaml
+  block:
+  - name: Get master sysconfig contents
+    slurp:
+      src: "{{ sysconfigs.files[0].path }}"
+    register: sysconfig
+  # TODO: surely there is a better way
+  - name: Update imagePolicyConfig.internalRegistryHostname
+    yedit:
+      src: "{{ openshift.common.config_base }}/master/master-config.yaml"
+      key: "imagePolicyConfig.internalRegistryHostname"
+      value: "{{ item | regex_replace('^OPENSHIFT_DEFAULT_REGISTRY=\\s*([^#\\s]+).*','\\1') }}"
+    with_items: "{{ (sysconfig.content | b64decode).split('\n') | select('match','^OPENSHIFT_DEFAULT_REGISTRY=\\s*.+') | list }}"
+
+- name: Create the master service env file if it does not exist
+  template:
+    src: "master.env.j2"
+    dest: "{{ openshift.common.config_base }}/master/master.env"
+    force: no
+
 - name: Update oreg value
   yedit:
     src: "{{ openshift.common.config_base }}/master/master-config.yaml"
@@ -50,3 +87,38 @@
     key: 'projectConfig.defaultNodeSelector'
     value: '{{ hostvars[groups.oo_first_master.0].l_osm_default_node_selector }}'
   when: openshift_upgrade_target | version_compare('3.9', '>=')
+
+- name: Remove use of pod presets from master config
+  yedit:
+    src: "{{ openshift.common.config_base }}/master/master-config.yaml"
+    key: 'admissionConfig.pluginConfig.PodPreset'
+    state: absent
+- name: Find current value for runtime-config
+  yedit:
+    src: "/tmp/master-config.yaml"
+    key: "kubernetesMasterConfig.apiServerArguments.runtime-config"
+    state: list
+  register: runtime_config
+- name: Set the runtime-config to exclude pod presets
+  yedit:
+    src: "{{ openshift.common.config_base }}/master/master-config.yaml"
+    key: "kubernetesMasterConfig.apiServerArguments.runtime-config"
+    value: "{{ runtime_config.result | join(',') | regex_replace('(?:,)*apis/settings\\.k8s\\.io/v1alpha1=true','') }}"
+  when: runtime_config.result
+
+- name: Remove old service information
+  file:
+    path: "{{ item }}"
+    state: absent
+  with_items:
+  - /etc/systemd/system/atomic-openshift-master-api.service
+  - /etc/systemd/system/atomic-openshift-master-controllers.service
+  - /etc/systemd/system/origin-master-api.service
+  - /etc/systemd/system/origin-master-controllers.service
+  - /usr/lib/systemd/system/atomic-openshift-master-api.service
+  - /usr/lib/systemd/system/atomic-openshift-master-controllers.service
+  - /usr/lib/systemd/system/origin-master-api.service
+  - /usr/lib/systemd/system/origin-master-controllers.service
+
+- name: reload systemd units
+  command: systemctl daemon-reload

+ 16 - 8
roles/openshift_gcp/tasks/setup_scale_group_facts.yml

@@ -3,6 +3,8 @@
   add_host:
     name: "{{ hostvars[item].gce_name }}"
     groups: masters, etcd
+    openshift_node_labels:
+      node-role.kubernetes.io/master: "true"
   with_items: "{{ groups['tag_ocp-master'] }}"
 
 - name: Add a master to the primary masters group
@@ -15,24 +17,21 @@
   add_host:
     name: "{{ hostvars[item].gce_name }}"
     groups: nodes
-    openshift_node_labels:
-      role: infra
   with_items: "{{ groups['tag_ocp-master'] | default([]) | difference(groups['tag_ocp-bootstrap'] | default([])) }}"
 
 - name: Add infra node instances to node group
   add_host:
     name: "{{ hostvars[item].gce_name }}"
-    groups: nodes
+    groups: nodes, new_nodes
     openshift_node_labels:
-      role: infra
+      node-role.kubernetes.io/infra: "true"
   with_items: "{{ groups['tag_ocp-infra-node'] | default([]) | difference(groups['tag_ocp-bootstrap'] | default([])) }}"
 
 - name: Add node instances to node group
   add_host:
     name: "{{ hostvars[item].gce_name }}"
-    groups: nodes
-    openshift_node_labels:
-      role: app
+    groups: nodes, new_nodes
+    openshift_node_bootstrap: False
   with_items: "{{ groups['tag_ocp-node'] | default([]) | difference(groups['tag_ocp-bootstrap'] | default([])) }}"
 
 - name: Add bootstrap node instances
@@ -40,5 +39,14 @@
     name: "{{ hostvars[item].gce_name }}"
     groups: bootstrap_nodes
     openshift_node_bootstrap: True
+    openshift_is_bootstrapped: True
   with_items: "{{ groups['tag_ocp-node'] | default([]) | intersect(groups['tag_ocp-bootstrap'] | default([])) }}"
-  when: not (openshift_node_bootstrap | default(True))
+
+- name: Add bootstrap node instances as nodes
+  add_host:
+    name: "{{ item }}"
+    groups: nodes, new_nodes
+    openshift_node_bootstrap: True
+    openshift_is_bootstrapped: True
+  with_items: "{{ groups['tag_ocp-bootstrap'] | default([]) }}"
+  when: all_nodes | default(False)

+ 1 - 1
roles/openshift_gcp/templates/openshift-bootstrap-update.j2

@@ -2,6 +2,6 @@
 
 set -euo pipefail
 
-"{{ openshift_client_binary }}" serviceaccounts create-kubeconfig -n openshift-infra node-bootstrapper > /root/bootstrap.kubeconfig
+oc serviceaccounts create-kubeconfig -n openshift-infra node-bootstrapper > /root/bootstrap.kubeconfig
 gcloud compute project-info --project '{{ openshift_gcp_project }}' add-metadata --metadata-from-file '{{ openshift_gcp_prefix + openshift_gcp_clusterid | default("default") }}-bootstrap-config=/root/bootstrap.kubeconfig'
 rm -f /root/bootstrap.kubeconfig

+ 12 - 12
roles/openshift_manage_node/tasks/set_default_node_role.yml

@@ -3,36 +3,36 @@
 - delegate_to: '{{ openshift_master_host }}'
   run_once: true
   block:
-    - name: Retrieve non-infra, non-master nodes that are not yet labeled compute
+    - name: Retrieve nodes that are marked with the infra selector or the legacy infra selector
       oc_obj:
         state: list
         kind: Node
-        selector: '{{ (openshift_hosted_infra_selector | default("node-role.kubernetes.io/infra=true")) | regex_replace("=", "!=") }},node-role.kubernetes.io/master!=true,node-role.kubernetes.io/compute!=true'
-      register: non_master_non_infra_nodes_result
+        selector: '{{ openshift_hosted_infra_selector | default("region=infra") }}'
+      register: infra_nodes_by_selector
 
-    - name: label non-master non-infra nodes compute
+    - name: Label infra or legacy infra nodes with the new role label
       oc_label:
         name: '{{ item }}'
         kind: node
         state: add
         labels:
-          - key: node-role.kubernetes.io/compute
+          - key: node-role.kubernetes.io/infra
             value: 'true'
-      with_items: "{{ non_master_non_infra_nodes_result.results.results.0['items'] | map(attribute='metadata') | map(attribute='name') | list }}"
+      with_items: "{{ infra_nodes_by_selector.results.results.0['items'] | map(attribute='metadata') | map(attribute='name') | list }}"
 
-    - name: Retrieve nodes that are marked with the infra selector or the legacy infra selector
+    - name: Retrieve non-infra, non-master nodes that are not yet labeled compute
       oc_obj:
         state: list
         kind: Node
-        selector: '{{ openshift_hosted_infra_selector | default("region=infra") }}'
-      register: infra_nodes_by_selector
+        selector: '{{ (openshift_hosted_infra_selector | default("node-role.kubernetes.io/infra=true")) | regex_replace("=", "!=") }},node-role.kubernetes.io/infra!=true,node-role.kubernetes.io/master!=true,node-role.kubernetes.io/compute!=true'
+      register: non_master_non_infra_nodes_result
 
-    - name: Label infra or legacy infra nodes with the new role label
+    - name: label non-master non-infra nodes compute
       oc_label:
         name: '{{ item }}'
         kind: node
         state: add
         labels:
-          - key: node-role.kubernetes.io/infra
+          - key: node-role.kubernetes.io/compute
             value: 'true'
-      with_items: "{{ infra_nodes_by_selector.results.results.0['items'] | map(attribute='metadata') | map(attribute='name') | list }}"
+      with_items: "{{ non_master_non_infra_nodes_result.results.results.0['items'] | map(attribute='metadata') | map(attribute='name') | list }}"

+ 1 - 2
roles/openshift_node/tasks/config.yml

@@ -4,8 +4,7 @@
 
 - name: Install Node system container
   import_tasks: node_system_container.yml
-  when:
-    - l_is_node_system_container | bool
+  when: l_is_node_system_container | bool
 
 - file:
     dest: "{{ l2_openshift_node_kubelet_args['config'] }}"

+ 4 - 1
roles/openshift_node/tasks/node_system_container.yml

@@ -9,8 +9,11 @@
 - name: Ensure old system path is set
   file:
     state: directory
-    path: "/etc/origin/openvswitch"
+    path: "{{ item }}"
     mode: '0750'
+  with_items:
+  - "/etc/origin/openvswitch"
+  - "/var/lib/kubelet"
 
 - name: Pre-pull node system container image
   command: >

+ 11 - 3
roles/openshift_node/tasks/upgrade.yml

@@ -27,7 +27,6 @@
     openshift_version: "{{ openshift_pkg_version | default('') }}"
   when: not openshift_is_containerized | bool
 
-
 - include_tasks: "{{ node_config_hook }}"
   when: node_config_hook is defined
 
@@ -39,6 +38,15 @@
 # Restart all services
 - import_tasks: upgrade/restart.yml
 
+- name: Approve node certificates when bootstrapping
+  oc_adm_csr:
+    nodes: "{{ openshift.common.hostname | lower }}"
+    timeout: 180
+    fail_on_timeout: true
+  delegate_to: "{{ groups.oo_first_master.0 }}"
+  ignore_errors: true
+  when: openshift_node_bootstrap | default(True) | bool
+
 - name: Wait for node to be ready
   oc_obj:
     state: list
@@ -47,8 +55,8 @@
   register: node_output
   delegate_to: "{{ groups.oo_first_master.0 }}"
   until: node_output.results.returncode == 0 and node_output.results.results[0].status.conditions | selectattr('type', 'match', '^Ready$') | map(attribute='status') | join | bool == True
-  # Give the node two minutes to come back online.
-  retries: 24
+  # Give the node three minutes to come back online.
+  retries: 36
   delay: 5
 
 - import_tasks: journald.yml

+ 112 - 0
roles/openshift_node/tasks/upgrade/bootstrap_changes.yml

@@ -0,0 +1,112 @@
+---
+- name: Update node-config to prepare for bootstrapping
+  yedit:
+    src: "{{ openshift.common.config_base }}/node/node-config.yaml"
+    edits:
+    - key: servingInfo.certFile
+      value: ""
+    - key: servingInfo.keyFile
+      value: ""
+    - key: kubeletArguments.bootstrap-kubeconfig
+      value:
+      - "{{ openshift.common.config_base }}/node/bootstrap.kubeconfig"
+    - key: kubeletArguments.rotate-certificates
+      value:
+      - "true"
+    - key: kubeletArguments.cert-dir
+      value:
+      - "{{ openshift.common.config_base }}/node/certificates"
+    - key: kubeletArguments.feature-gates
+      value:
+      - RotateKubeletClientCertificate=true,RotateKubeletServerCertificate=true
+
+- name: Check for existing node-config.yaml
+  stat:
+    path: "{{ openshift.common.config_base }}/node/node-config.yaml"
+  register: existing_node_config
+
+- name: Copy existing configuration to bootstrap configuration
+  copy:
+    remote_src: true
+    src: "{{ openshift.common.config_base }}/node/node-config.yaml"
+    dest: "{{ openshift.common.config_base }}/node/bootstrap-node-config.yaml"
+    force: no
+    owner: root
+    group: root
+    mode: 0600
+  when: existing_node_config.stat.exists
+
+- name: Find existing credentials
+  find:
+    paths:
+    - "{{ openshift.common.config_base }}/node"
+    patterns:
+    - system*.kubeconfig
+  register: system_kubeconfigs
+
+- name: Copy existing credentials to bootstrap credentials
+  copy:
+    remote_src: true
+    src: "{{ item }}"
+    dest: "{{ openshift.common.config_base }}/node/bootstrap.kubeconfig"
+    force: no
+    owner: root
+    group: root
+    mode: 0600
+  with_items: "{{ system_kubeconfigs.files | default([]) | map(attribute='path') | list + [openshift.common.config_base+'/node/node.kubeconfig'] }}"
+
+- name: Remove non-bootstrap configuration
+  file:
+    path: "{{ item }}"
+    state: absent
+  with_items:
+  - "{{ openshift.common.config_base }}/node/node.kubeconfig"
+  - "{{ openshift.common.config_base }}/node/node-config.yaml"
+
+- name: Use the admin.kubeconfig for the kubelet bootstrap identity
+  copy:
+    remote_src: true
+    src: "{{ openshift.common.config_base }}/master/admin.kubeconfig"
+    dest: "{{ openshift.common.config_base }}/node/bootstrap.kubeconfig"
+    force: yes
+    owner: root
+    group: root
+    mode: 0600
+  when: inventory_hostname in groups.oo_masters_to_config
+
+- name: Update symlink master CA for docker-registry (name changed)
+  file:
+    src: "{{ item }}"
+    dest: "/etc/docker/certs.d/docker-registry.default.svc:5000/{{ item | basename }}"
+    state: link
+    force: yes
+  with_items:
+  - "{{ openshift.common.config_base }}/node/client-ca.crt"
+
+- name: Remove previous bootstrap certificates
+  file:
+    path: "{{ openshift.common.config_base }}/node/certificates"
+    state: absent
+
+- name: Determine if node already has a dynamic config group
+  command: grep -E '^BOOTSTRAP_CONFIG_NAME=.+' "/etc/sysconfig/{{ openshift_service_type }}-node"
+  ignore_errors: true
+  register: existing
+
+- name: Update the sysconfig to group "{{ r_node_dynamic_config_name }}"
+  lineinfile:
+    dest: "/etc/sysconfig/{{ openshift_service_type }}-node"
+    line: "BOOTSTRAP_CONFIG_NAME={{ r_node_dynamic_config_name }}"
+    regexp: "^BOOTSTRAP_CONFIG_NAME=.*"
+  when: r_node_dynamic_config_force|default(False) or existing is failed
+
+- name: Set up node-config.yml if dynamic configuration is off
+  copy:
+    remote_src: true
+    src: "{{ openshift.common.config_base }}/node/bootstrap-node-config.yaml"
+    dest: "{{ openshift.common.config_base }}/node/node-config.yaml"
+    force: no
+    owner: root
+    group: root
+    mode: 0600
+  when: r_node_dynamic_config_name|length == 0

+ 34 - 37
roles/openshift_node/tasks/upgrade/config_changes.yml

@@ -1,8 +1,4 @@
 ---
-- name: Update systemd units
-  import_tasks: ../systemd_units.yml
-  when: openshift_is_containerized | bool
-
 - name: Update oreg value
   yedit:
     src: "{{ openshift.common.config_base }}/node/node-config.yaml"
@@ -10,47 +6,45 @@
     value: "{{ oreg_url | default(oreg_url_node) }}"
   when: oreg_url is defined or oreg_url_node is defined
 
-- name: Remove obsolete docker-sdn-ovs.conf
+- name: Ensure the node static pod directory exists
   file:
-    path: "/etc/systemd/system/docker.service.d/docker-sdn-ovs.conf"
-    state: absent
+    path: "{{ openshift.common.config_base }}/node/pods"
+    state: directory
+    mode: 0755
 
-# https://bugzilla.redhat.com/show_bug.cgi?id=1513054
-- name: Clean up dockershim data
-  file:
-    path: "/var/lib/dockershim/sandbox/"
-    state: absent
+- name: Update node-config for static pods
+  yedit:
+    src: "{{ openshift.common.config_base }}/node/node-config.yaml"
+    edits:
+    - key: kubeletArguments.pod-manifest-path
+      value:
+      - "{{ openshift.common.config_base }}/node/pods"
 
 # https://bugzilla.redhat.com/show_bug.cgi?id=1518912
-- name: Clean up IPAM data
-  file:
-    path: "/var/lib/cni/networks/openshift-sdn/"
-    state: absent
+# - name: Clean up IPAM data
+#   file:
+#     path: "/var/lib/cni/networks/openshift-sdn/"
+#     state: absent
 
-# https://docs.openshift.com/container-platform/3.4/admin_guide/overcommit.html#disabling-swap-memory
-# swapoff is a custom module in lib_utils that comments out swap entries in
-# /etc/fstab and runs swapoff -a, if necessary.
-- name: Disable swap
-  swapoff: {}
-  when: openshift_disable_swap | default(true) | bool
+- name: Uninstall openvswitch
+  package:
+    name: openvswitch
+    state: absent
+  when: not openshift_is_atomic | bool
 
-- name: Apply 3.6 dns config changes
-  yedit:
-    src: /etc/origin/node/node-config.yaml
-    key: "{{ item.key }}"
-    value: "{{ item.value }}"
+- name: Remove old service information
+  file:
+    path: "{{ item }}"
+    state: absent
   with_items:
-  - key: "dnsBindAddress"
-    value: "127.0.0.1:53"
-  - key: "dnsRecursiveResolvConf"
-    value: "/etc/origin/node/resolv.conf"
+  - /etc/systemd/system/docker.service.d/docker-sdn-ovs.conf
+  - /etc/systemd/system/atomic-openshift-node-dep.service
+  - /etc/systemd/system/origin-node-dep.service
+  - /etc/systemd/system/openvswitch.service
 
-- name: Install Node service file
-  template:
-    dest: "/etc/systemd/system/{{ openshift_service_type }}-node.service"
-    src: "node.service.j2"
-  register: l_node_unit
-  when: not openshift_is_containerized | bool
+- name: Move existing credentials and configuration into bootstrap configuration
+  import_tasks: bootstrap_changes.yml
+  when: openshift_node_bootstrap | default(True) | bool
 
 - name: Reset selinux context
   command: restorecon -RF {{ openshift_node_data_dir }}/openshift.local.volumes
@@ -58,6 +52,9 @@
   - ansible_selinux is defined
   - ansible_selinux.status == 'enabled'
 
+- name: Update systemd units
+  import_tasks: ../systemd_units.yml
+
 # NOTE: This is needed to make sure we are using the correct set
 #       of systemd unit files. The RPMs lay down defaults but
 #       the install/upgrade may override them in /etc/systemd/system/.

+ 0 - 3
roles/openshift_node/tasks/upgrade/stop_services.yml

@@ -4,10 +4,7 @@
     name: "{{ item }}"
     state: stopped
   with_items:
-  - etcd_container
   - openvswitch
-  - "{{ openshift_service_type }}-master-api"
-  - "{{ openshift_service_type }}-master-controllers"
   - "{{ openshift_service_type }}-node"
   failed_when: false
 

+ 2 - 0
roles/openshift_node_group/defaults/main.yml

@@ -13,6 +13,8 @@ openshift_node_groups:
   - 'node-role.kubernetes.io/compute=true'
   edits: []
 
+l_openshift_node_group_master: "{{ openshift_node_group_master | default('node-config-master') }}"
+
 openshift_node_group_edits: []
 openshift_node_group_namespace: openshift-node
 openshift_node_group_labels: []

+ 12 - 0
roles/openshift_node_group/files/sync-policy.yaml

@@ -6,3 +6,15 @@ items:
   metadata:
     name: sync
     namespace: openshift-node
+- apiVersion: authorization.openshift.io/v1
+  kind: RoleBinding
+  metadata:
+    name: sync-node-config-reader-binding
+    namespace: openshift-node
+  roleRef:
+    name: system:node-config-reader
+    namespace: openshift-node
+  subjects:
+  - kind: ServiceAccount
+    name: sync
+    namespace: openshift-node

+ 56 - 23
roles/openshift_node_group/files/sync.yaml

@@ -16,6 +16,8 @@ spec:
       app: sync
   updateStrategy:
     type: RollingUpdate
+    rollingUpdate:
+      maxUnavailable: 50%
   template:
     metadata:
       labels:
@@ -50,19 +52,10 @@ spec:
           #!/bin/bash
           set -euo pipefail
 
-          # loop until BOOTSTRAP_CONFIG_NAME is set
-          set -o allexport
-          while true; do
-            if [[ -f /etc/sysconfig/origin-node ]]; then
-              source /etc/sysconfig/origin-node
-              if [[ -z "${BOOTSTRAP_CONFIG_NAME-}" ]]; then
-                echo "info: Waiting for BOOTSTRAP_CONFIG_NAME to be set" 2>&1
-                sleep 15
-                continue
-              fi
-              break
-            fi
-          done
+          # set by the node image
+          unset KUBECONFIG
+
+          trap 'kill $(jobs -p); exit 0' TERM
 
           # track the current state of the config
           if [[ -f /etc/origin/node/node-config.yaml ]]; then
@@ -71,12 +64,50 @@ spec:
             touch /tmp/.old
           fi
 
+          # loop until BOOTSTRAP_CONFIG_NAME is set
+          while true; do
+            file=/etc/sysconfig/origin-node
+            if [[ -f /etc/sysconfig/atomic-openshift-node ]]; then
+              file=/etc/sysconfig/atomic-openshift-node
+            elif [[ -f /etc/sysconfig/origin-node ]]; then
+              file=/etc/sysconfig/origin-node
+            else
+              echo "info: Waiting for the node sysconfig file to be created" 2>&1
+              sleep 15 & wait
+              continue
+            fi
+            name="$(sed -nE 's|^BOOTSTRAP_CONFIG_NAME=([^#].+)|\1|p' "${file}" | head -1)"
+            if [[ -z "${name}" ]]; then
+              echo "info: Waiting for BOOTSTRAP_CONFIG_NAME to be set" 2>&1
+              sleep 15 & wait
+              continue
+            fi
+            # in the background check to see if the value changes and exit if so
+            pid=$BASHPID
+            (
+              while true; do
+                if ! updated="$(sed -nE 's|^BOOTSTRAP_CONFIG_NAME=([^#].+)|\1|p' "${file}" | head -1)"; then
+                  echo "error: Unable to check for bootstrap config, exiting" 2>&1
+                  kill $pid
+                  exit 1
+                fi
+                if [[ "${updated}" != "${name}" ]]; then
+                  echo "info: Bootstrap configuration profile name changed, exiting" 2>&1
+                  kill $pid
+                  exit 0
+                fi
+                sleep 15
+              done
+            ) &
+            break
+          done
+
           # periodically refresh both node-config.yaml and relabel the node
           while true; do
-            name=${BOOTSTRAP_CONFIG_NAME}
-            if ! oc extract --config=/etc/origin/node/node.kubeconfig "cm/${BOOTSTRAP_CONFIG_NAME}" -n openshift-node --to=/etc/origin/node --confirm; then
+            if ! oc extract "configmaps/${name}" -n openshift-node --to=/etc/origin/node --confirm --request-timeout=10s --config /etc/origin/node/node.kubeconfig "--token=$( cat /var/run/secrets/kubernetes.io/serviceaccount/token )"  > /dev/null; then
               echo "error: Unable to retrieve latest config for node" 2>&1
-              sleep 15
+              sleep 15 &
+              wait $!
               continue
             fi
             # detect whether the node-config.yaml has changed, and if so trigger a restart of the kubelet.
@@ -92,17 +123,22 @@ spec:
                   echo "info: Applying node labels $labels" 2>&1
                   if ! oc label --config=/etc/origin/node/node.kubeconfig "node/${NODE_NAME}" ${labels} --overwrite; then
                     echo "error: Unable to apply labels, will retry in 10" 2>&1
-                    sleep 10
+                    sleep 10 &
+                    wait $!
                     continue
                   fi
                 fi
+              else
+                echo "error: The downloaded node configuration is invalid, exiting" 2>&1
+                exit 1
               fi
               if ! pgrep -U 0 -f 'hyperkube kubelet ' | xargs kill; then
                 echo "error: Unable to restart Kubelet" 2>&1
               fi
             fi
             cp -f /tmp/.new /tmp/.old
-            sleep 180
+            sleep 180 &
+            wait $!
           done
 
         env:
@@ -117,7 +153,7 @@ spec:
         # Directory which contains the host configuration. We read from this directory
         - mountPath: /etc/origin/node/
           name: host-config
-        - mountPath: /etc/sysconfig/origin-node
+        - mountPath: /etc/sysconfig
           name: host-sysconfig-node
           readOnly: true
 
@@ -129,7 +165,4 @@ spec:
           path: /etc/origin/node
       - name: host-sysconfig-node
         hostPath:
-          path: /etc/sysconfig/origin-node
-      - name: host-modules
-        hostPath:
-          path: /lib/modules
+          path: /etc/sysconfig

+ 7 - 6
roles/openshift_node_group/tasks/bootstrap_config.yml

@@ -9,12 +9,13 @@
   with_items:
   - /etc/origin/node/pods
   - /etc/origin/node/certificates
-- name: Update the sysconfig to group "{{ r_node_bootstrap_config_name }}"
+- name: Determine if node already has a dynamic config group
+  command: grep -E '^BOOTSTRAP_CONFIG_NAME=.+' "/etc/sysconfig/{{ openshift_service_type }}-node"
+  ignore_errors: true
+  register: existing
+- name: Update the sysconfig to group "{{ r_node_dynamic_config_name }}"
   lineinfile:
     dest: "/etc/sysconfig/{{ openshift_service_type }}-node"
-    line: "{{ item.line | default(omit) }}"
-    regexp: "{{ item.regexp }}"
-    state: "{{ item.state | default('present') }}"
-  with_items:
-  - line: "BOOTSTRAP_CONFIG_NAME={{ r_node_bootstrap_config_name }}"
+    line: "BOOTSTRAP_CONFIG_NAME={{ r_node_dynamic_config_name }}"
     regexp: "^BOOTSTRAP_CONFIG_NAME=.*"
+  when: r_node_dynamic_config_force|default(False) or existing is failed

+ 29 - 0
roles/openshift_node_group/tasks/upgrade.yml

@@ -0,0 +1,29 @@
+---
+- name: Ensure all node groups have bootstrap settings
+  include_tasks: create_config.yml
+  vars:
+    openshift_node_group_name: "{{ node_group.name }}"
+    openshift_node_group_edits:
+    - key: servingInfo.certFile
+      value: ""
+    - key: servingInfo.keyFile
+      value: ""
+    - key: kubeletArguments.pod-manifest-path
+      value:
+      - /etc/origin/node/pods
+    - key: kubeletArguments.bootstrap-kubeconfig
+      value:
+      - /etc/origin/node/bootstrap.kubeconfig
+    - key: kubeletArguments.feature-gates
+      value:
+      - RotateKubeletClientCertificate=true,RotateKubeletServerCertificate=true
+    - key: kubeletArguments.rotate-certificates
+      value:
+      - "true"
+    - key: kubeletArguments.cert-dir
+      value:
+      - /etc/origin/node/certificates
+    openshift_node_group_labels: "{{ node_group.labels | default([]) }}"
+  with_items: "{{ openshift_node_groups }}"
+  loop_control:
+    loop_var: node_group

+ 0 - 1
roles/openshift_node_group/templates/node-config.yaml.j2

@@ -59,7 +59,6 @@ masterKubeConfig: node.kubeconfig
 networkConfig:
   mtu: {{ openshift_node_group_network_mtu }}
   networkPluginName: {{ openshift_node_group_network_plugin }}
-networkPluginName: {{ openshift_node_group_network_plugin }}
 servingInfo:
   bindAddress: 0.0.0.0:10250
   bindNetwork: tcp4

+ 20 - 0
roles/openshift_sdn/files/sdn-ovs.yaml

@@ -28,6 +28,7 @@ spec:
       # as all pods.
       serviceAccountName: sdn
       hostNetwork: true
+      hostPID: true
       containers:
       - name: openvswitch
         image: " "
@@ -37,6 +38,25 @@ spec:
         - |
           #!/bin/bash
           set -euo pipefail
+
+          # if another process is listening on the cni-server socket, wait until it exits
+          trap 'kill $(jobs -p); exit 0' TERM
+          retries=0
+          while true; do
+            if /usr/share/openvswitch/scripts/ovs-ctl status &>/dev/null; then
+              echo "warning: Another process is currently managing OVS, waiting 15s ..." 2>&1
+              sleep 15 & wait
+              (( retries += 1 ))
+            else
+              break
+            fi
+            if [[ "${retries}" -gt 40 ]]; then
+              echo "error: Another process is currently managing OVS, exiting" 2>&1
+              exit 1
+            fi
+          done
+
+          # launch OVS
           function quit {
               /usr/share/openvswitch/scripts/ovs-ctl stop
               exit 0

+ 52 - 9
roles/openshift_sdn/files/sdn.yaml

@@ -42,6 +42,39 @@ spec:
         - |
           #!/bin/bash
           set -euo pipefail
+
+          # if another process is listening on the cni-server socket, wait until it exits
+          trap 'kill $(jobs -p); exit 0' TERM
+          retries=0
+          while true; do
+            if echo 'test' | socat - UNIX-CONNECT:/var/run/openshift-sdn/cni-server.sock >/dev/null; then
+              echo "warning: Another process is currently listening on the CNI socket, waiting 15s ..." 2>&1
+              sleep 15 & wait
+              (( retries += 1 ))
+            else
+              break
+            fi
+            if [[ "${retries}" -gt 40 ]]; then
+              echo "error: Another process is currently listening on the CNI socket, exiting" 2>&1
+              exit 1
+            fi
+          done
+          # if the node config doesn't exist yet, wait until it does
+          retries=0
+          while true; do
+            if [[ ! -f /etc/origin/node/node-config.yaml ]]; then
+              echo "warning: Cannot find existing node-config.yaml, waiting 15s ..." 2>&1
+              sleep 15 & wait
+              (( retries += 1 ))
+            else
+              break
+            fi
+            if [[ "${retries}" -gt 40 ]]; then
+              echo "error: No existing node-config.yaml, exiting" 2>&1
+              exit 1
+            fi
+          done
+
           # Take over network functions on the node
           rm -Rf /etc/cni/net.d/*
           rm -Rf /host/opt/cni/bin/*
@@ -83,6 +116,10 @@ spec:
         - mountPath: /etc/sysconfig/origin-node
           name: host-sysconfig-node
           readOnly: true
+        # Mount the entire run directory for socket access for Docker or CRI-o
+        # TODO: remove
+        - mountPath: /var/run
+          name: host-var-run
         # Run directories where we need to be able to access sockets
         - mountPath: /var/run/dbus/
           name: host-var-run-dbus
@@ -114,12 +151,14 @@ spec:
         ports:
         - name: healthz
           containerPort: 10256
-        livenessProbe:
-          initialDelaySeconds: 10
-          httpGet:
-            path: /healthz
-            port: 10256
-            scheme: HTTP
+        # TODO: Temporarily disabled until we determine how to wait for clean default
+        # config
+        # livenessProbe:
+        #   initialDelaySeconds: 10
+        #   httpGet:
+        #     path: /healthz
+        #     port: 10256
+        #     scheme: HTTP
         lifecycle:
 
       volumes:
@@ -135,15 +174,19 @@ spec:
         hostPath:
           path: /lib/modules
 
+      # TODO: access to the docker socket should be replaced by CRI socket
+      - name: host-var-run
+        hostPath:
+          path: /var/run
+      - name: host-var-run-dbus
+        hostPath:
+          path: /var/run/dbus
       - name: host-var-run-ovs
         hostPath:
           path: /var/run/openvswitch
       - name: host-var-run-kubernetes
         hostPath:
           path: /var/run/kubernetes
-      - name: host-var-run-dbus
-        hostPath:
-          path: /var/run/dbus
       - name: host-var-run-openshift-sdn
         hostPath:
           path: /var/run/openshift-sdn