upgrade.yml 20 KB


  1. ---
  2. ###############################################################################
  3. # Evaluate host groups and gather facts
  4. ###############################################################################
  5. - name: Evaluate host groups
  6. include: ../../evaluate_groups.yml
  7. - name: Load openshift_facts
  8. hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config:oo_lb_to_config
  9. roles:
  10. - openshift_facts
  11. - name: Evaluate additional groups for upgrade
  12. hosts: localhost
  13. tasks:
  14. - name: Evaluate etcd_hosts_to_backup
  15. add_host:
  16. name: "{{ item }}"
  17. groups: etcd_hosts_to_backup
  18. with_items: groups.oo_etcd_to_config if groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config | length > 0 else groups.oo_first_master
  19. ###############################################################################
  20. # Pre-upgrade checks
  21. ###############################################################################
  22. - name: Verify upgrade can proceed
  23. hosts: oo_first_master
  24. vars:
  25. openshift_master_ha: "{{ groups.oo_masters_to_config | length > 1 }}"
  26. gather_facts: no
  27. tasks:
  28. # Pacemaker is currently the only supported upgrade path for multiple masters
  29. - fail:
  30. msg: "openshift_master_cluster_method must be set to 'pacemaker'"
  31. when: openshift_master_ha | bool and ((openshift_master_cluster_method is not defined) or (openshift_master_cluster_method is defined and openshift_master_cluster_method != "pacemaker"))
  32. - fail:
  33. msg: >
  34. This upgrade is only supported for origin and openshift-enterprise
  35. deployment types
  36. when: deployment_type not in ['origin','openshift-enterprise']
  37. - fail:
  38. msg: >
  39. openshift_pkg_version is {{ openshift_pkg_version }} which is not a
  40. valid version for a 3.1 upgrade
  41. when: openshift_pkg_version is defined and openshift_pkg_version.split('-',1).1 | version_compare('3.0.2.900','<')
  42. # If this script errors out ansible will show the default stdout/stderr
  43. # which contains details for the user:
  44. - script: ../files/pre-upgrade-check
  45. - name: Verify upgrade can proceed
  46. hosts: oo_masters_to_config:oo_nodes_to_config
  47. tasks:
  48. - name: Clean yum cache
  49. command: yum clean all
  50. - set_fact:
  51. g_new_service_name: "{{ 'origin' if deployment_type =='origin' else 'atomic-openshift' }}"
  52. - name: Determine available versions
  53. script: ../files/versions.sh {{ g_new_service_name }} openshift
  54. register: g_versions_result
  55. - set_fact:
  56. g_aos_versions: "{{ g_versions_result.stdout | from_yaml }}"
  57. - set_fact:
  58. g_new_version: "{{ g_aos_versions.curr_version.split('-', 1).0 if g_aos_versions.avail_version is none else g_aos_versions.avail_version.split('-', 1).0 }}"
  59. - fail:
  60. msg: This playbook requires Origin 1.0.6 or later
  61. when: deployment_type == 'origin' and g_aos_versions.curr_version | version_compare('1.0.6','<')
  62. - fail:
  63. msg: Atomic OpenShift 3.1 packages not found
  64. when: g_aos_versions.curr_version | version_compare('3.0.2.900','<') and (g_aos_versions.avail_version is none or g_aos_versions.avail_version | version_compare('3.0.2.900','<'))
  65. - set_fact:
  66. pre_upgrade_complete: True
  67. ##############################################################################
  68. # Gate on pre-upgrade checks
  69. ##############################################################################
  70. - name: Gate on pre-upgrade checks
  71. hosts: localhost
  72. vars:
  73. pre_upgrade_hosts: "{{ groups.oo_masters_to_config | union(groups.oo_nodes_to_config) }}"
  74. tasks:
  75. - set_fact:
  76. pre_upgrade_completed: "{{ hostvars
  77. | oo_select_keys(pre_upgrade_hosts)
  78. | oo_collect('inventory_hostname', {'pre_upgrade_complete': true}) }}"
  79. - set_fact:
  80. pre_upgrade_failed: "{{ pre_upgrade_hosts | difference(pre_upgrade_completed) }}"
  81. - fail:
  82. msg: "Upgrade cannot continue. The following hosts did not complete pre-upgrade checks: {{ pre_upgrade_failed | join(',') }}"
  83. when: pre_upgrade_failed | length > 0
  84. ###############################################################################
  85. # Backup etcd
  86. ###############################################################################
  87. - name: Backup etcd
  88. hosts: etcd_hosts_to_backup
  89. vars:
  90. embedded_etcd: "{{ openshift.master.embedded_etcd }}"
  91. timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}"
  92. roles:
  93. - openshift_facts
  94. tasks:
  95. # Ensure we persist the etcd role for this host in openshift_facts
  96. - openshift_facts:
  97. role: etcd
  98. local_facts: {}
  99. when: "'etcd' not in openshift"
  100. - stat: path=/var/lib/openshift
  101. register: var_lib_openshift
  102. - stat: path=/var/lib/origin
  103. register: var_lib_origin
  104. - name: Create origin symlink if necessary
  105. file: src=/var/lib/openshift/ dest=/var/lib/origin state=link
  106. when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False
  107. # TODO: replace shell module with command and update later checks
  108. # We assume to be using the data dir for all backups.
  109. - name: Check available disk space for etcd backup
  110. shell: df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1
  111. register: avail_disk
  112. # TODO: replace shell module with command and update later checks
  113. - name: Check current embedded etcd disk usage
  114. shell: du -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1
  115. register: etcd_disk_usage
  116. when: embedded_etcd | bool
  117. - name: Abort if insufficient disk space for etcd backup
  118. fail:
  119. msg: >
  120. {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup,
  121. {{ avail_disk.stdout }} Kb available.
  122. when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int)
  123. - name: Install etcd (for etcdctl)
  124. yum:
  125. pkg: etcd
  126. state: latest
  127. - name: Generate etcd backup
  128. command: >
  129. etcdctl backup --data-dir={{ openshift.etcd.etcd_data_dir }}
  130. --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}
  131. - set_fact:
  132. etcd_backup_complete: True
  133. - name: Display location of etcd backup
  134. debug:
  135. msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}"
  136. ##############################################################################
  137. # Gate on etcd backup
  138. ##############################################################################
  139. - name: Gate on etcd backup
  140. hosts: localhost
  141. tasks:
  142. - set_fact:
  143. etcd_backup_completed: "{{ hostvars
  144. | oo_select_keys(groups.etcd_hosts_to_backup)
  145. | oo_collect('inventory_hostname', {'etcd_backup_complete': true}) }}"
  146. - set_fact:
  147. etcd_backup_failed: "{{ groups.etcd_hosts_to_backup | difference(etcd_backup_completed) }}"
  148. - fail:
  149. msg: "Upgrade cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}"
  150. when: etcd_backup_failed | length > 0
  151. ###############################################################################
  152. # Upgrade Masters
  153. ###############################################################################
  154. - name: Create temp directory for syncing certs
  155. hosts: localhost
  156. gather_facts: no
  157. tasks:
  158. - name: Create local temp directory for syncing certs
  159. local_action: command mktemp -d /tmp/openshift-ansible-XXXXXXX
  160. register: g_master_mktemp
  161. changed_when: False
  162. - name: Update deployment type
  163. hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config
  164. roles:
  165. - openshift_facts
  166. post_tasks:
  167. - openshift_facts:
  168. role: common
  169. local_facts:
  170. deployment_type: "{{ deployment_type }}"
  171. - name: Update master facts
  172. hosts: oo_masters_to_config
  173. roles:
  174. - openshift_facts
  175. post_tasks:
  176. - openshift_facts:
  177. role: master
  178. local_facts:
  179. cluster_method: "{{ openshift_master_cluster_method | default(None) }}"
  180. - name: Upgrade master packages and configuration
  181. hosts: oo_masters_to_config
  182. vars:
  183. openshift_version: "{{ openshift_pkg_version | default('') }}"
  184. tasks:
  185. - name: Upgrade to latest available kernel
  186. yum:
  187. pkg: kernel
  188. state: latest
  189. - name: Upgrade master packages
  190. command: yum update -y {{ openshift.common.service_type }}-master{{ openshift_version }}
  191. - name: Ensure python-yaml present for config upgrade
  192. yum:
  193. pkg: PyYAML
  194. state: installed
  195. - name: Upgrade master configuration
  196. openshift_upgrade_config:
  197. from_version: '3.0'
  198. to_version: '3.1'
  199. role: master
  200. config_base: "{{ hostvars[inventory_hostname].openshift.common.config_base }}"
  201. - set_fact:
  202. master_certs_missing: True
  203. master_cert_subdir: master-{{ openshift.common.hostname }}
  204. master_cert_config_dir: "{{ openshift.common.config_base }}/master"
  205. - name: Generate missing master certificates
  206. hosts: oo_first_master
  207. vars:
  208. master_hostnames: "{{ hostvars
  209. | oo_select_keys(groups.oo_masters_to_config)
  210. | oo_collect('openshift.common.all_hostnames')
  211. | oo_flatten | unique }}"
  212. master_generated_certs_dir: "{{ openshift.common.config_base }}/generated-configs"
  213. masters_needing_certs: "{{ hostvars
  214. | oo_select_keys(groups.oo_masters_to_config)
  215. | difference([groups.oo_first_master.0]) }}"
  216. sync_tmpdir: "{{ hostvars.localhost.g_master_mktemp.stdout }}"
  217. openshift_deployment_type: "{{ deployment_type }}"
  218. roles:
  219. - openshift_master_certificates
  220. post_tasks:
  221. - name: Remove generated etcd client certs when using external etcd
  222. file:
  223. path: "{{ master_generated_certs_dir }}/{{ item.0.master_cert_subdir }}/{{ item.1 }}"
  224. state: absent
  225. when: groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config
  226. with_nested:
  227. - masters_needing_certs
  228. - - master.etcd-client.crt
  229. - master.etcd-client.key
  230. - name: Create a tarball of the master certs
  231. command: >
  232. tar -czvf {{ master_generated_certs_dir }}/{{ item.master_cert_subdir }}.tgz
  233. -C {{ master_generated_certs_dir }}/{{ item.master_cert_subdir }} .
  234. with_items: masters_needing_certs
  235. - name: Retrieve the master cert tarball from the master
  236. fetch:
  237. src: "{{ master_generated_certs_dir }}/{{ item.master_cert_subdir }}.tgz"
  238. dest: "{{ sync_tmpdir }}/"
  239. flat: yes
  240. fail_on_missing: yes
  241. validate_checksum: yes
  242. with_items: masters_needing_certs
  243. - name: Sync generated certs, update service config and restart master services
  244. hosts: oo_masters_to_config
  245. vars:
  246. sync_tmpdir: "{{ hostvars.localhost.g_master_mktemp.stdout }}"
  247. openshift_master_ha: "{{ groups.oo_masters_to_config | length > 1 }}"
  248. openshift_deployment_type: "{{ deployment_type }}"
  249. tasks:
  250. - name: Unarchive the tarball on the master
  251. unarchive:
  252. src: "{{ sync_tmpdir }}/{{ master_cert_subdir }}.tgz"
  253. dest: "{{ master_cert_config_dir }}"
  254. when: inventory_hostname != groups.oo_first_master.0
  255. - name: Restart master service
  256. service: name="{{ openshift.common.service_type}}-master" state=restarted
  257. when: not openshift_master_ha | bool
  258. - name: Ensure the master service is enabled
  259. service: name="{{ openshift.common.service_type}}-master" state=started enabled=yes
  260. when: not openshift_master_ha | bool
  261. - name: Check for configured cluster
  262. stat:
  263. path: /etc/corosync/corosync.conf
  264. register: corosync_conf
  265. when: openshift_master_ha | bool
  266. - name: Destroy cluster
  267. command: pcs cluster destroy --all
  268. when: openshift_master_ha | bool and corosync_conf.stat.exists == true
  269. run_once: true
  270. - name: Start pcsd
  271. service: name=pcsd enabled=yes state=started
  272. when: openshift_master_ha | bool
  273. - name: Re-create cluster
  274. hosts: oo_first_master
  275. vars:
  276. openshift_master_ha: "{{ groups.oo_masters_to_config | length > 1 }}"
  277. openshift_deployment_type: "{{ deployment_type }}"
  278. omc_cluster_hosts: "{{ groups.oo_masters_to_config | join(' ') }}"
  279. roles:
  280. - role: openshift_master_cluster
  281. when: openshift_master_ha | bool
  282. - name: Delete temporary directory on localhost
  283. hosts: localhost
  284. gather_facts: no
  285. tasks:
  286. - file: name={{ g_master_mktemp.stdout }} state=absent
  287. changed_when: False
  288. - name: Set master update status to complete
  289. hosts: oo_masters_to_config
  290. tasks:
  291. - set_fact:
  292. master_update_complete: True
  293. ##############################################################################
  294. # Gate on master update complete
  295. ##############################################################################
  296. - name: Gate on master update
  297. hosts: localhost
  298. tasks:
  299. - set_fact:
  300. master_update_completed: "{{ hostvars
  301. | oo_select_keys(groups.oo_masters_to_config)
  302. | oo_collect('inventory_hostname', {'master_update_complete': true}) }}"
  303. - set_fact:
  304. master_update_failed: "{{ groups.oo_masters_to_config | difference(master_update_completed) }}"
  305. - fail:
  306. msg: "Upgrade cannot continue. The following masters did not finish updating: {{ master_update_failed | join(',') }}"
  307. when: master_update_failed | length > 0
  308. ###############################################################################
  309. # Upgrade Nodes
  310. ###############################################################################
  311. - name: Upgrade nodes
  312. hosts: oo_nodes_to_config
  313. vars:
  314. openshift_version: "{{ openshift_pkg_version | default('') }}"
  315. roles:
  316. - openshift_facts
  317. tasks:
  318. - name: Upgrade node packages
  319. command: yum update -y {{ openshift.common.service_type }}-node{{ openshift_version }}
  320. - name: Restart node service
  321. service: name="{{ openshift.common.service_type }}-node" state=restarted
  322. - name: Ensure node service enabled
  323. service: name="{{ openshift.common.service_type }}-node" state=started enabled=yes
  324. - set_fact:
  325. node_update_complete: True
  326. ##############################################################################
  327. # Gate on nodes update
  328. ##############################################################################
  329. - name: Gate on nodes update
  330. hosts: localhost
  331. tasks:
  332. - set_fact:
  333. node_update_completed: "{{ hostvars
  334. | oo_select_keys(groups.oo_nodes_to_config)
  335. | oo_collect('inventory_hostname', {'node_update_complete': true}) }}"
  336. - set_fact:
  337. node_update_failed: "{{ groups.oo_nodes_to_config | difference(node_update_completed) }}"
  338. - fail:
  339. msg: "Upgrade cannot continue. The following nodes did not finish updating: {{ node_update_failed | join(',') }}"
  340. when: node_update_failed | length > 0
  341. ###############################################################################
  342. # Post upgrade - Reconcile Cluster Roles and Cluster Role Bindings
  343. ###############################################################################
  344. - name: Reconcile Cluster Roles and Cluster Role Bindings
  345. hosts: oo_masters_to_config
  346. vars:
  347. origin_reconcile_bindings: "{{ deployment_type == 'origin' and g_new_version | version_compare('1.0.6', '>') }}"
  348. ent_reconcile_bindings: true
  349. openshift_master_ha: "{{ groups.oo_masters_to_config | length > 1 }}"
  350. tasks:
  351. - name: Reconcile Cluster Roles
  352. command: >
  353. {{ openshift.common.admin_binary}} --config={{ openshift.common.config_base }}/master/admin.kubeconfig
  354. policy reconcile-cluster-roles --confirm
  355. run_once: true
  356. - name: Reconcile Cluster Role Bindings
  357. command: >
  358. {{ openshift.common.admin_binary}} --config={{ openshift.common.config_base }}/master/admin.kubeconfig
  359. policy reconcile-cluster-role-bindings
  360. --exclude-groups=system:authenticated
  361. --exclude-groups=system:unauthenticated
  362. --exclude-users=system:anonymous
  363. --additive-only=true --confirm
  364. when: origin_reconcile_bindings | bool or ent_reconcile_bindings | bool
  365. run_once: true
  366. - name: Restart master services
  367. service: name="{{ openshift.common.service_type}}-master" state=restarted
  368. when: not openshift_master_ha | bool
  369. - name: Restart master cluster
  370. command: pcs resource restart master
  371. when: openshift_master_ha | bool
  372. run_once: true
  373. - name: Wait for the clustered master service to be available
  374. wait_for:
  375. host: "{{ openshift_master_cluster_vip }}"
  376. port: 8443
  377. state: started
  378. timeout: 180
  379. delay: 90
  380. when: openshift_master_ha | bool
  381. run_once: true
  382. - set_fact:
  383. reconcile_complete: True
  384. ##############################################################################
  385. # Gate on reconcile
  386. ##############################################################################
  387. - name: Gate on reconcile
  388. hosts: localhost
  389. tasks:
  390. - set_fact:
  391. reconcile_completed: "{{ hostvars
  392. | oo_select_keys(groups.oo_masters_to_config)
  393. | oo_collect('inventory_hostname', {'reconcile_complete': true}) }}"
  394. - set_fact:
  395. reconcile_failed: "{{ groups.oo_masters_to_config | difference(reconcile_completed) }}"
  396. - fail:
  397. msg: "Upgrade cannot continue. The following masters did not finish reconciling: {{ reconcile_failed | join(',') }}"
  398. when: reconcile_failed | length > 0
  399. ###############################################################################
  400. # Post upgrade - Upgrade default router, default registry and examples
  401. ###############################################################################
  402. - name: Upgrade default router and default registry
  403. hosts: oo_first_master
  404. vars:
  405. openshift_deployment_type: "{{ deployment_type }}"
  406. registry_image: "{{ openshift.master.registry_url | replace( '${component}', 'docker-registry' ) | replace ( '${version}', 'v' + g_new_version ) }}"
  407. router_image: "{{ openshift.master.registry_url | replace( '${component}', 'haproxy-router' ) | replace ( '${version}', 'v' + g_new_version ) }}"
  408. oc_cmd: "{{ openshift.common.client_binary }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig"
  409. roles:
  410. # Create the new templates shipped in 3.1, existing templates are left
  411. # unmodified. This prevents the subsequent role definition for
  412. # openshift_examples from failing when trying to replace templates that do
  413. # not already exist. We could have potentially done a replace --force to
  414. # create and update in one step.
  415. - openshift_examples
  416. # Update the existing templates
  417. - role: openshift_examples
  418. openshift_examples_import_command: replace
  419. pre_tasks:
  420. - name: Check for default router
  421. command: >
  422. {{ oc_cmd }} get -n default dc/router
  423. register: _default_router
  424. failed_when: false
  425. changed_when: false
  426. - name: Check for allowHostNetwork and allowHostPorts
  427. when: _default_router.rc == 0
  428. shell: >
  429. {{ oc_cmd }} get -o yaml scc/privileged | /usr/bin/grep -e allowHostPorts -e allowHostNetwork
  430. register: _scc
  431. - name: Grant allowHostNetwork and allowHostPorts
  432. when:
  433. - _default_router.rc == 0
  434. - "'false' in _scc.stdout"
  435. command: >
  436. {{ oc_cmd }} patch scc/privileged -p
  437. '{"allowHostPorts":true,"allowHostNetwork":true}' --api-version=v1
  438. - name: Update deployment config to 1.0.4/3.0.1 spec
  439. when: _default_router.rc == 0
  440. command: >
  441. {{ oc_cmd }} patch dc/router -p
  442. '{"spec":{"strategy":{"rollingParams":{"updatePercent":-10},"spec":{"serviceAccount":"router","serviceAccountName":"router"}}}}'
  443. --api-version=v1
  444. - name: Switch to hostNetwork=true
  445. when: _default_router.rc == 0
  446. command: >
  447. {{ oc_cmd }} patch dc/router -p '{"spec":{"template":{"spec":{"hostNetwork":true}}}}'
  448. --api-version=v1
  449. - name: Update router image to current version
  450. when: _default_router.rc == 0
  451. command: >
  452. {{ oc_cmd }} patch dc/router -p
  453. '{"spec":{"template":{"spec":{"containers":[{"name":"router","image":"{{ router_image }}"}]}}}}'
  454. --api-version=v1
  455. - name: Check for default registry
  456. command: >
  457. {{ oc_cmd }} get -n default dc/docker-registry
  458. register: _default_registry
  459. failed_when: false
  460. changed_when: false
  461. - name: Update registry image to current version
  462. when: _default_registry.rc == 0
  463. command: >
  464. {{ oc_cmd }} patch dc/docker-registry -p
  465. '{"spec":{"template":{"spec":{"containers":[{"name":"registry","image":"{{ registry_image }}"}]}}}}'
  466. --api-version=v1