upgrade.yml 20 KB


  1. ---
  2. ###############################################################################
  3. # Evaluate host groups and gather facts
  4. ###############################################################################
  5. - name: Evaluate host groups
  6. include: ../../evaluate_groups.yml
  7. - name: Load openshift_facts
  8. hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config:oo_lb_to_config
  9. roles:
  10. - openshift_facts
  11. - name: Evaluate additional groups for upgrade
  12. hosts: localhost
  13. tasks:
  14. - name: Evaluate etcd_hosts_to_backup
  15. add_host:
  16. name: "{{ item }}"
  17. groups: etcd_hosts_to_backup
  18. with_items: groups.oo_etcd_to_config if groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config | length > 0 else groups.oo_first_master
  19. ###############################################################################
  20. # Pre-upgrade checks
  21. ###############################################################################
  22. - name: Verify upgrade can proceed
  23. hosts: oo_first_master
  24. vars:
  25. openshift_master_ha: "{{ groups.oo_masters_to_config | length > 1 }}"
  26. gather_facts: no
  27. tasks:
  28. # Pacemaker is currently the only supported upgrade path for multiple masters
  29. - fail:
  30. msg: "openshift_master_cluster_method must be set to 'pacemaker'"
  31. when: openshift_master_ha | bool and ((openshift_master_cluster_method is not defined) or (openshift_master_cluster_method is defined and openshift_master_cluster_method != "pacemaker"))
  32. - fail:
  33. msg: >
  34. This upgrade is only supported for origin, openshift-enterprise, and online
  35. deployment types
  36. when: deployment_type not in ['origin','openshift-enterprise', 'online']
  37. - fail:
  38. msg: >
  39. openshift_pkg_version is {{ openshift_pkg_version }} which is not a
  40. valid version for a 3.1 upgrade
  41. when: openshift_pkg_version is defined and openshift_pkg_version.split('-',1).1 | version_compare('3.0.2.900','<')
  42. # If this script errors out ansible will show the default stdout/stderr
  43. # which contains details for the user:
  44. - script: ../files/pre-upgrade-check
  45. - name: Verify upgrade can proceed
  46. hosts: oo_masters_to_config:oo_nodes_to_config
  47. tasks:
  48. - name: Clean package cache
  49. command: "{{ ansible_pkg_mgr }} clean all"
  50. - set_fact:
  51. g_new_service_name: "{{ 'origin' if deployment_type =='origin' else 'atomic-openshift' }}"
  52. - name: Determine available versions
  53. script: ../files/versions.sh {{ g_new_service_name }} openshift
  54. register: g_versions_result
  55. - set_fact:
  56. g_aos_versions: "{{ g_versions_result.stdout | from_yaml }}"
  57. - set_fact:
  58. g_new_version: "{{ g_aos_versions.curr_version.split('-', 1).0 if g_aos_versions.avail_version is none else g_aos_versions.avail_version.split('-', 1).0 }}"
  59. - fail:
  60. msg: This playbook requires Origin 1.0.6 or later
  61. when: deployment_type == 'origin' and g_aos_versions.curr_version | version_compare('1.0.6','<')
  62. - fail:
  63. msg: Atomic OpenShift 3.1 packages not found
  64. when: g_aos_versions.curr_version | version_compare('3.0.2.900','<') and (g_aos_versions.avail_version is none or g_aos_versions.avail_version | version_compare('3.0.2.900','<'))
  65. - set_fact:
  66. pre_upgrade_complete: True
  67. ##############################################################################
  68. # Gate on pre-upgrade checks
  69. ##############################################################################
  70. - name: Gate on pre-upgrade checks
  71. hosts: localhost
  72. vars:
  73. pre_upgrade_hosts: "{{ groups.oo_masters_to_config | union(groups.oo_nodes_to_config) }}"
  74. tasks:
  75. - set_fact:
  76. pre_upgrade_completed: "{{ hostvars
  77. | oo_select_keys(pre_upgrade_hosts)
  78. | oo_collect('inventory_hostname', {'pre_upgrade_complete': true}) }}"
  79. - set_fact:
  80. pre_upgrade_failed: "{{ pre_upgrade_hosts | difference(pre_upgrade_completed) }}"
  81. - fail:
  82. msg: "Upgrade cannot continue. The following hosts did not complete pre-upgrade checks: {{ pre_upgrade_failed | join(',') }}"
  83. when: pre_upgrade_failed | length > 0
  84. ###############################################################################
  85. # Backup etcd
  86. ###############################################################################
  87. - name: Backup etcd
  88. hosts: etcd_hosts_to_backup
  89. vars:
  90. embedded_etcd: "{{ openshift.master.embedded_etcd }}"
  91. timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}"
  92. roles:
  93. - openshift_facts
  94. tasks:
  95. # Ensure we persist the etcd role for this host in openshift_facts
  96. - openshift_facts:
  97. role: etcd
  98. local_facts: {}
  99. when: "'etcd' not in openshift"
  100. - stat: path=/var/lib/openshift
  101. register: var_lib_openshift
  102. - stat: path=/var/lib/origin
  103. register: var_lib_origin
  104. - name: Create origin symlink if necessary
  105. file: src=/var/lib/openshift/ dest=/var/lib/origin state=link
  106. when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False
  107. # TODO: replace shell module with command and update later checks
  108. # We assume to be using the data dir for all backups.
  109. - name: Check available disk space for etcd backup
  110. shell: df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1
  111. register: avail_disk
  112. # TODO: replace shell module with command and update later checks
  113. - name: Check current embedded etcd disk usage
  114. shell: du -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1
  115. register: etcd_disk_usage
  116. when: embedded_etcd | bool
  117. - name: Abort if insufficient disk space for etcd backup
  118. fail:
  119. msg: >
  120. {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup,
  121. {{ avail_disk.stdout }} Kb available.
  122. when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int)
  123. - name: Install etcd (for etcdctl)
  124. action: "{{ ansible_pkg_mgr }} name=etcd state=latest"
  125. - name: Generate etcd backup
  126. command: >
  127. etcdctl backup --data-dir={{ openshift.etcd.etcd_data_dir }}
  128. --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}
  129. - set_fact:
  130. etcd_backup_complete: True
  131. - name: Display location of etcd backup
  132. debug:
  133. msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}"
  134. ##############################################################################
  135. # Gate on etcd backup
  136. ##############################################################################
  137. - name: Gate on etcd backup
  138. hosts: localhost
  139. tasks:
  140. - set_fact:
  141. etcd_backup_completed: "{{ hostvars
  142. | oo_select_keys(groups.etcd_hosts_to_backup)
  143. | oo_collect('inventory_hostname', {'etcd_backup_complete': true}) }}"
  144. - set_fact:
  145. etcd_backup_failed: "{{ groups.etcd_hosts_to_backup | difference(etcd_backup_completed) }}"
  146. - fail:
  147. msg: "Upgrade cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}"
  148. when: etcd_backup_failed | length > 0
  149. ###############################################################################
  150. # Upgrade Masters
  151. ###############################################################################
  152. - name: Create temp directory for syncing certs
  153. hosts: localhost
  154. gather_facts: no
  155. tasks:
  156. - name: Create local temp directory for syncing certs
  157. local_action: command mktemp -d /tmp/openshift-ansible-XXXXXXX
  158. register: g_master_mktemp
  159. changed_when: False
  160. - name: Update deployment type
  161. hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config
  162. roles:
  163. - openshift_facts
  164. post_tasks:
  165. - openshift_facts:
  166. role: common
  167. local_facts:
  168. deployment_type: "{{ deployment_type }}"
  169. - name: Update master facts
  170. hosts: oo_masters_to_config
  171. roles:
  172. - openshift_facts
  173. post_tasks:
  174. - openshift_facts:
  175. role: master
  176. local_facts:
  177. cluster_method: "{{ openshift_master_cluster_method | default(None) }}"
  178. - name: Upgrade master packages and configuration
  179. hosts: oo_masters_to_config
  180. vars:
  181. openshift_version: "{{ openshift_pkg_version | default('') }}"
  182. tasks:
  183. - name: Upgrade to latest available kernel
  184. action: "{{ ansible_pkg_mgr}} name=kernel state=latest"
  185. - name: Upgrade master packages
  186. command: "{{ ansible_pkg_mgr}} update -y {{ openshift.common.service_type }}-master{{ openshift_version }}"
  187. - name: Ensure python-yaml present for config upgrade
  188. action: "{{ ansible_pkg_mgr }} name=PyYAML state=present"
  189. - name: Upgrade master configuration
  190. openshift_upgrade_config:
  191. from_version: '3.0'
  192. to_version: '3.1'
  193. role: master
  194. config_base: "{{ hostvars[inventory_hostname].openshift.common.config_base }}"
  195. - set_fact:
  196. master_certs_missing: True
  197. master_cert_subdir: master-{{ openshift.common.hostname }}
  198. master_cert_config_dir: "{{ openshift.common.config_base }}/master"
  199. - name: Generate missing master certificates
  200. hosts: oo_first_master
  201. vars:
  202. master_hostnames: "{{ hostvars
  203. | oo_select_keys(groups.oo_masters_to_config)
  204. | oo_collect('openshift.common.all_hostnames')
  205. | oo_flatten | unique }}"
  206. master_generated_certs_dir: "{{ openshift.common.config_base }}/generated-configs"
  207. masters_needing_certs: "{{ hostvars
  208. | oo_select_keys(groups.oo_masters_to_config)
  209. | difference([groups.oo_first_master.0]) }}"
  210. sync_tmpdir: "{{ hostvars.localhost.g_master_mktemp.stdout }}"
  211. openshift_deployment_type: "{{ deployment_type }}"
  212. roles:
  213. - openshift_master_certificates
  214. post_tasks:
  215. - name: Remove generated etcd client certs when using external etcd
  216. file:
  217. path: "{{ master_generated_certs_dir }}/{{ item.0.master_cert_subdir }}/{{ item.1 }}"
  218. state: absent
  219. when: groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config
  220. with_nested:
  221. - masters_needing_certs
  222. - - master.etcd-client.crt
  223. - master.etcd-client.key
  224. - name: Create a tarball of the master certs
  225. command: >
  226. tar -czvf {{ master_generated_certs_dir }}/{{ item.master_cert_subdir }}.tgz
  227. -C {{ master_generated_certs_dir }}/{{ item.master_cert_subdir }} .
  228. with_items: masters_needing_certs
  229. - name: Retrieve the master cert tarball from the master
  230. fetch:
  231. src: "{{ master_generated_certs_dir }}/{{ item.master_cert_subdir }}.tgz"
  232. dest: "{{ sync_tmpdir }}/"
  233. flat: yes
  234. fail_on_missing: yes
  235. validate_checksum: yes
  236. with_items: masters_needing_certs
  237. - name: Sync generated certs, update service config and restart master services
  238. hosts: oo_masters_to_config
  239. vars:
  240. sync_tmpdir: "{{ hostvars.localhost.g_master_mktemp.stdout }}"
  241. openshift_master_ha: "{{ groups.oo_masters_to_config | length > 1 }}"
  242. openshift_deployment_type: "{{ deployment_type }}"
  243. tasks:
  244. - name: Unarchive the tarball on the master
  245. unarchive:
  246. src: "{{ sync_tmpdir }}/{{ master_cert_subdir }}.tgz"
  247. dest: "{{ master_cert_config_dir }}"
  248. when: inventory_hostname != groups.oo_first_master.0
  249. - name: Restart master service
  250. service: name="{{ openshift.common.service_type}}-master" state=restarted
  251. when: not openshift_master_ha | bool
  252. - name: Ensure the master service is enabled
  253. service: name="{{ openshift.common.service_type}}-master" state=started enabled=yes
  254. when: not openshift_master_ha | bool
  255. - name: Check for configured cluster
  256. stat:
  257. path: /etc/corosync/corosync.conf
  258. register: corosync_conf
  259. when: openshift_master_ha | bool
  260. - name: Destroy cluster
  261. command: pcs cluster destroy --all
  262. when: openshift_master_ha | bool and corosync_conf.stat.exists == true
  263. run_once: true
  264. - name: Start pcsd
  265. service: name=pcsd enabled=yes state=started
  266. when: openshift_master_ha | bool
  267. - name: Re-create cluster
  268. hosts: oo_first_master
  269. vars:
  270. openshift_master_ha: "{{ groups.oo_masters_to_config | length > 1 }}"
  271. openshift_deployment_type: "{{ deployment_type }}"
  272. omc_cluster_hosts: "{{ groups.oo_masters_to_config | join(' ') }}"
  273. roles:
  274. - role: openshift_master_cluster
  275. when: openshift_master_ha | bool
  276. - name: Delete temporary directory on localhost
  277. hosts: localhost
  278. gather_facts: no
  279. tasks:
  280. - file: name={{ g_master_mktemp.stdout }} state=absent
  281. changed_when: False
  282. - name: Set master update status to complete
  283. hosts: oo_masters_to_config
  284. tasks:
  285. - set_fact:
  286. master_update_complete: True
  287. ##############################################################################
  288. # Gate on master update complete
  289. ##############################################################################
  290. - name: Gate on master update
  291. hosts: localhost
  292. tasks:
  293. - set_fact:
  294. master_update_completed: "{{ hostvars
  295. | oo_select_keys(groups.oo_masters_to_config)
  296. | oo_collect('inventory_hostname', {'master_update_complete': true}) }}"
  297. - set_fact:
  298. master_update_failed: "{{ groups.oo_masters_to_config | difference(master_update_completed) }}"
  299. - fail:
  300. msg: "Upgrade cannot continue. The following masters did not finish updating: {{ master_update_failed | join(',') }}"
  301. when: master_update_failed | length > 0
  302. ###############################################################################
  303. # Upgrade Nodes
  304. ###############################################################################
  305. - name: Upgrade nodes
  306. hosts: oo_nodes_to_config
  307. vars:
  308. openshift_version: "{{ openshift_pkg_version | default('') }}"
  309. roles:
  310. - openshift_facts
  311. tasks:
  312. - name: Upgrade node packages
  313. command: "{{ ansible_pkg_mgr }} update -y {{ openshift.common.service_type }}-node{{ openshift_version }}"
  314. - name: Restart node service
  315. service: name="{{ openshift.common.service_type }}-node" state=restarted
  316. - name: Ensure node service enabled
  317. service: name="{{ openshift.common.service_type }}-node" state=started enabled=yes
  318. - set_fact:
  319. node_update_complete: True
  320. ##############################################################################
  321. # Gate on nodes update
  322. ##############################################################################
  323. - name: Gate on nodes update
  324. hosts: localhost
  325. tasks:
  326. - set_fact:
  327. node_update_completed: "{{ hostvars
  328. | oo_select_keys(groups.oo_nodes_to_config)
  329. | oo_collect('inventory_hostname', {'node_update_complete': true}) }}"
  330. - set_fact:
  331. node_update_failed: "{{ groups.oo_nodes_to_config | difference(node_update_completed) }}"
  332. - fail:
  333. msg: "Upgrade cannot continue. The following nodes did not finish updating: {{ node_update_failed | join(',') }}"
  334. when: node_update_failed | length > 0
  335. ###############################################################################
  336. # Post upgrade - Reconcile Cluster Roles and Cluster Role Bindings
  337. ###############################################################################
  338. - name: Reconcile Cluster Roles and Cluster Role Bindings
  339. hosts: oo_masters_to_config
  340. vars:
  341. origin_reconcile_bindings: "{{ deployment_type == 'origin' and g_new_version | version_compare('1.0.6', '>') }}"
  342. ent_reconcile_bindings: true
  343. openshift_master_ha: "{{ groups.oo_masters_to_config | length > 1 }}"
  344. tasks:
  345. - name: Reconcile Cluster Roles
  346. command: >
  347. {{ openshift.common.admin_binary}} --config={{ openshift.common.config_base }}/master/admin.kubeconfig
  348. policy reconcile-cluster-roles --confirm
  349. run_once: true
  350. - name: Reconcile Cluster Role Bindings
  351. command: >
  352. {{ openshift.common.admin_binary}} --config={{ openshift.common.config_base }}/master/admin.kubeconfig
  353. policy reconcile-cluster-role-bindings
  354. --exclude-groups=system:authenticated
  355. --exclude-groups=system:unauthenticated
  356. --exclude-users=system:anonymous
  357. --additive-only=true --confirm
  358. when: origin_reconcile_bindings | bool or ent_reconcile_bindings | bool
  359. run_once: true
  360. - name: Restart master services
  361. service: name="{{ openshift.common.service_type}}-master" state=restarted
  362. when: not openshift_master_ha | bool
  363. - name: Restart master cluster
  364. command: pcs resource restart master
  365. when: openshift_master_ha | bool
  366. run_once: true
  367. - name: Wait for the clustered master service to be available
  368. wait_for:
  369. host: "{{ openshift_master_cluster_vip }}"
  370. port: 8443
  371. state: started
  372. timeout: 180
  373. delay: 90
  374. when: openshift_master_ha | bool
  375. run_once: true
  376. - set_fact:
  377. reconcile_complete: True
  378. ##############################################################################
  379. # Gate on reconcile
  380. ##############################################################################
  381. - name: Gate on reconcile
  382. hosts: localhost
  383. tasks:
  384. - set_fact:
  385. reconcile_completed: "{{ hostvars
  386. | oo_select_keys(groups.oo_masters_to_config)
  387. | oo_collect('inventory_hostname', {'reconcile_complete': true}) }}"
  388. - set_fact:
  389. reconcile_failed: "{{ groups.oo_masters_to_config | difference(reconcile_completed) }}"
  390. - fail:
  391. msg: "Upgrade cannot continue. The following masters did not finish reconciling: {{ reconcile_failed | join(',') }}"
  392. when: reconcile_failed | length > 0
  393. ###############################################################################
  394. # Post upgrade - Upgrade default router, default registry and examples
  395. ###############################################################################
  396. - name: Upgrade default router and default registry
  397. hosts: oo_first_master
  398. vars:
  399. openshift_deployment_type: "{{ deployment_type }}"
  400. registry_image: "{{ openshift.master.registry_url | replace( '${component}', 'docker-registry' ) | replace ( '${version}', 'v' + g_new_version ) }}"
  401. router_image: "{{ openshift.master.registry_url | replace( '${component}', 'haproxy-router' ) | replace ( '${version}', 'v' + g_new_version ) }}"
  402. oc_cmd: "{{ openshift.common.client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig"
  403. roles:
  404. # Create the new templates shipped in 3.1, existing templates are left
  405. # unmodified. This prevents the subsequent role definition for
  406. # openshift_examples from failing when trying to replace templates that do
  407. # not already exist. We could have potentially done a replace --force to
  408. # create and update in one step.
  409. - openshift_examples
  410. # Update the existing templates
  411. - role: openshift_examples
  412. openshift_examples_import_command: replace
  413. pre_tasks:
  414. - name: Check for default router
  415. command: >
  416. {{ oc_cmd }} get -n default dc/router
  417. register: _default_router
  418. failed_when: false
  419. changed_when: false
  420. - name: Check for allowHostNetwork and allowHostPorts
  421. when: _default_router.rc == 0
  422. shell: >
  423. {{ oc_cmd }} get -o yaml scc/privileged | /usr/bin/grep -e allowHostPorts -e allowHostNetwork
  424. register: _scc
  425. - name: Grant allowHostNetwork and allowHostPorts
  426. when:
  427. - _default_router.rc == 0
  428. - "'false' in _scc.stdout"
  429. command: >
  430. {{ oc_cmd }} patch scc/privileged -p
  431. '{"allowHostPorts":true,"allowHostNetwork":true}' --api-version=v1
  432. - name: Update deployment config to 1.0.4/3.0.1 spec
  433. when: _default_router.rc == 0
  434. command: >
  435. {{ oc_cmd }} patch dc/router -p
  436. '{"spec":{"strategy":{"rollingParams":{"updatePercent":-10},"spec":{"serviceAccount":"router","serviceAccountName":"router"}}}}'
  437. --api-version=v1
  438. - name: Switch to hostNetwork=true
  439. when: _default_router.rc == 0
  440. command: >
  441. {{ oc_cmd }} patch dc/router -p '{"spec":{"template":{"spec":{"hostNetwork":true}}}}'
  442. --api-version=v1
  443. - name: Update router image to current version
  444. when: _default_router.rc == 0
  445. command: >
  446. {{ oc_cmd }} patch dc/router -p
  447. '{"spec":{"template":{"spec":{"containers":[{"name":"router","image":"{{ router_image }}"}]}}}}'
  448. --api-version=v1
  449. - name: Check for default registry
  450. command: >
  451. {{ oc_cmd }} get -n default dc/docker-registry
  452. register: _default_registry
  453. failed_when: false
  454. changed_when: false
  455. - name: Update registry image to current version
  456. when: _default_registry.rc == 0
  457. command: >
  458. {{ oc_cmd }} patch dc/docker-registry -p
  459. '{"spec":{"template":{"spec":{"containers":[{"name":"registry","image":"{{ registry_image }}"}]}}}}'
  460. --api-version=v1