config.yml 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. ---
  2. #### Disable SWAP #####
  3. # https://docs.openshift.com/container-platform/3.4/admin_guide/overcommit.html#disabling-swap-memory
  4. # swapoff is a custom module that comments out swap entries in
  5. # /etc/fstab and runs swapoff -a, if necessary.
  6. - name: Disable swap
  7. swapoff: {}
  8. # The atomic-openshift-node service will set this parameter on
  9. # startup, but if the network service is restarted this setting is
  10. # lost. Reference: https://bugzilla.redhat.com/show_bug.cgi?id=1372388
  11. - name: Enable IP Forwarding
  12. sysctl:
  13. name: net.ipv4.ip_forward
  14. value: 1
  15. sysctl_file: "/etc/sysctl.d/99-openshift.conf"
  16. reload: yes
  17. # The base OS RHEL with "Minimal" installation option is
  18. # enabled firewalld serivce by default, it denies unexpected 10250 port.
  19. # Reference: https://bugzilla.redhat.com/show_bug.cgi?id=1740439
  20. - name: Disable firewalld service
  21. systemd:
  22. name: "firewalld.service"
  23. enabled: false
  24. register: service_status
  25. failed_when:
  26. - service_status is failed
  27. - not ('Could not find the requested service' in service_status.msg)
  28. - name: Setting sebool container_manage_cgroup
  29. seboolean:
  30. name: container_manage_cgroup
  31. state: yes
  32. persistent: yes
  33. - name: Create temp directory
  34. tempfile:
  35. state: directory
  36. register: temp_dir
  37. - name: Wait for bootstrap endpoint to show up
  38. uri:
  39. url: "{{ openshift_node_bootstrap_endpoint }}"
  40. validate_certs: false
  41. delay: 10
  42. retries: 60
  43. register: result
  44. until:
  45. - result.status is defined
  46. - result.status == 200
  47. - name: Fetch bootstrap ignition file locally
  48. uri:
  49. url: "{{ openshift_node_bootstrap_endpoint }}"
  50. dest: "{{ temp_dir.path }}/bootstrap.ign"
  51. validate_certs: false
  52. register: bootstrap_ignition
  53. # registries.conf is listed twice in the config, the second one is the right one
  54. - name: Extract the last registries.conf file from bootstrap.ign
  55. set_fact:
  56. registries_conf: >
  57. {{
  58. bootstrap_ignition.json.storage.files
  59. | selectattr('path', 'match', '/etc/containers/registries.conf')
  60. | list
  61. | last
  62. }}
  63. - name: Check data URL encoding and extract source data
  64. set_fact:
  65. base64encoded: "{{ registries_conf.contents.source.split(',')[0].endswith('base64') }}"
  66. source_data: "{{ registries_conf.contents.source.split(',')[1] }}"
  67. - name: Write /etc/containers/registries.conf
  68. copy:
  69. content: "{{ (source_data | b64decode) if base64encoded else (source_data | urldecode) }}"
  70. mode: "{{ '0' ~ registries_conf.mode }}"
  71. dest: "{{ registries_conf.path }}"
  72. register: update_registries
  73. - name: Restart the CRI-O service
  74. systemd:
  75. name: "crio"
  76. state: restarted
  77. when: update_registries is changed
  78. - name: Get cluster pull-secret
  79. command: >
  80. oc get secret pull-secret
  81. --kubeconfig={{ openshift_node_kubeconfig_path }}
  82. --namespace=openshift-config
  83. --output=jsonpath='{.data.\.dockerconfigjson}'
  84. delegate_to: localhost
  85. register: oc_get
  86. until:
  87. - oc_get.stdout != ''
  88. retries: 36
  89. delay: 5
  90. - name: Write pull-secret to file
  91. copy:
  92. content: "{{ oc_get.stdout | b64decode }}"
  93. dest: "{{ temp_dir.path }}/pull-secret.json"
  94. - name: Get cluster release image
  95. command: >
  96. oc get clusterversion
  97. --kubeconfig={{ openshift_node_kubeconfig_path }}
  98. --output=jsonpath='{.items[0].status.desired.image}'
  99. delegate_to: localhost
  100. register: oc_get
  101. until:
  102. - oc_get.stdout != ''
  103. retries: 36
  104. delay: 5
  105. - name: Set l_release_image fact
  106. set_fact:
  107. l_release_image: "{{ oc_get.stdout }}"
  108. - import_tasks: proxy.yml
  109. - block:
  110. - name: Pull release image
  111. command: "podman pull --tls-verify={{ openshift_node_tls_verify }} --authfile {{ temp_dir.path }}/pull-secret.json {{ l_release_image }}"
  112. register: podman_pull
  113. until:
  114. podman_pull.stdout != ''
  115. - name: Get machine controller daemon image from release image
  116. command: "podman run --rm {{ l_release_image }} image machine-config-operator"
  117. register: release_image_mcd
  118. environment:
  119. http_proxy: "{{ http_proxy | default('')}}"
  120. https_proxy: "{{https_proxy | default('')}}"
  121. no_proxy: "{{ no_proxy | default('')}}"
  122. - block:
  123. - name: Pull MCD image
  124. command: "podman pull --tls-verify={{ openshift_node_tls_verify }} --authfile {{ temp_dir.path }}/pull-secret.json {{ release_image_mcd.stdout }}"
  125. register: podman_pull
  126. until:
  127. podman_pull.stdout != ''
  128. - name: Apply ignition manifest
  129. command: "podman run {{ podman_mounts }} {{ podman_flags }} {{ mcd_command }}"
  130. vars:
  131. podman_flags: "--privileged --rm --entrypoint=/usr/bin/machine-config-daemon -ti {{ release_image_mcd.stdout }}"
  132. podman_mounts: "-v /:/rootfs -v /var/run/dbus:/var/run/dbus -v /run/systemd:/run/systemd"
  133. mcd_command: "start --node-name {{ ansible_nodename | lower }} --once-from {{ temp_dir.path }}/bootstrap.ign --skip-reboot"
  134. - name: Remove temp directory
  135. file:
  136. path: "{{ temp_dir.path }}"
  137. state: absent
  138. - name: Reboot the host and wait for it to come back
  139. reboot:
  140. # reboot_timeout: 600 # default, 10 minutes
  141. environment:
  142. http_proxy: "{{ http_proxy | default('')}}"
  143. https_proxy: "{{ https_proxy | default('')}}"
  144. no_proxy: "{{ no_proxy | default('')}}"
  145. rescue:
  146. - fail:
  147. msg: "Ignition apply failed"
  148. - block:
  149. - name: Approve node-bootstrapper CSR
  150. shell: >
  151. count=0;
  152. for csr in `oc --kubeconfig={{ openshift_node_kubeconfig_path }} get csr --no-headers \
  153. | grep " system:serviceaccount:openshift-machine-config-operator:node-bootstrapper " \
  154. | cut -d " " -f1`;
  155. do
  156. oc --kubeconfig={{ openshift_node_kubeconfig_path }} describe csr/$csr \
  157. | grep " system:node:{{ hostvars[item].ansible_nodename | lower }}$";
  158. if [ $? -eq 0 ];
  159. then
  160. oc --kubeconfig={{ openshift_node_kubeconfig_path }} adm certificate approve ${csr};
  161. if [ $? -eq 0 ];
  162. then
  163. count=$((count+1));
  164. fi;
  165. fi;
  166. done;
  167. exit $((!count));
  168. loop: "{{ ansible_play_batch }}"
  169. delegate_to: localhost
  170. run_once: true
  171. register: oc_get
  172. until:
  173. - oc_get is success
  174. retries: 6
  175. delay: 5
  176. rescue:
  177. - import_tasks: gather_debug.yml
  178. - name: DEBUG - Failed to approve node-bootstrapper CSR
  179. fail:
  180. msg: "Failed to approve node-bootstrapper CSR"
  181. delegate_to: localhost
  182. - block:
  183. - name: Approve node CSR
  184. shell: >
  185. count=0;
  186. for csr in `oc --kubeconfig={{ openshift_node_kubeconfig_path }} get csr --no-headers \
  187. | grep " system:node:{{ hostvars[item].ansible_nodename | lower }} " \
  188. | cut -d " " -f1`;
  189. do
  190. oc --kubeconfig={{ openshift_node_kubeconfig_path }} adm certificate approve ${csr};
  191. if [ $? -eq 0 ];
  192. then
  193. count=$((count+1));
  194. fi;
  195. done;
  196. exit $((!count));
  197. loop: "{{ ansible_play_batch }}"
  198. delegate_to: localhost
  199. run_once: true
  200. register: oc_get
  201. until:
  202. - oc_get is success
  203. retries: 6
  204. delay: 5
  205. rescue:
  206. - import_tasks: gather_debug.yml
  207. - name: DEBUG - Failed to approve node CSR
  208. fail:
  209. msg: "Failed to approve node CSR"
  210. delegate_to: localhost
  211. - block:
  212. - name: Wait for nodes to report ready
  213. command: >
  214. oc get node {{ hostvars[item].ansible_nodename | lower }}
  215. --kubeconfig={{ openshift_node_kubeconfig_path }}
  216. --output=jsonpath='{.status.conditions[?(@.type=="Ready")].status}'
  217. loop: "{{ ansible_play_batch }}"
  218. delegate_to: localhost
  219. run_once: true
  220. register: oc_get
  221. until:
  222. - oc_get.stdout == "True"
  223. retries: 36
  224. delay: 5
  225. changed_when: false
  226. rescue:
  227. - import_tasks: gather_debug.yml
  228. - name: DEBUG - Node failed to report ready
  229. fail:
  230. msg: "Node failed to report ready"
  231. delegate_to: localhost