Browse Source

attach leases via the first master only and only once

- move openshift-etcd/migrate.yml from byo to common and keep just the entry point
- replace std_include with essential plays (e.g. no need to detect openshift version)
- delegate the ttl re-attaching to the first master
Jan Chaloupka 7 years ago
parent
commit
2b521c8ae6

+ 1 - 117
playbooks/byo/openshift-etcd/migrate.yml

@@ -3,122 +3,6 @@
   tags:
   - always
 
-- include: ../../common/openshift-cluster/evaluate_groups.yml
+- include: ../../common/openshift-etcd/migrate.yml
   tags:
   - always
-
-- name: Run pre-checks
-  hosts: oo_etcd_to_config
-  tags:
-  - always
-  roles:
-  - role: etcd_migrate
-    r_etcd_migrate_action: check
-    etcd_peer: "{{ ansible_default_ipv4.address }}"
-
-# TODO(jchaloup): replace the std_include with something minimal so the entire playbook is faster
-# e.g. I don't need to detect the OCP version, install deps, etc.
-- include: ../../common/openshift-cluster/std_include.yml
-  tags:
-  - always
-
-- name: Backup v2 data
-  hosts: oo_etcd_to_config
-  gather_facts: no
-  tags:
-  - always
-  roles:
-  - role: openshift_facts
-  - role: etcd_common
-    r_etcd_common_action: backup
-    r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}"
-    r_etcd_common_backup_tag: pre-migration
-    r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}"
-    r_etcd_common_backup_sufix_name: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}"
-
-- name: Gate on etcd backup
-  hosts: localhost
-  connection: local
-  become: no
-  tasks:
-  - set_fact:
-      etcd_backup_completed: "{{ hostvars
-                                 | oo_select_keys(groups.oo_etcd_to_config)
-                                 | oo_collect('inventory_hostname', {'r_etcd_common_backup_complete': true}) }}"
-  - set_fact:
-      etcd_backup_failed: "{{ groups.oo_etcd_to_config | difference(etcd_backup_completed) }}"
-  - fail:
-      msg: "Migration cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}"
-    when:
-    - etcd_backup_failed | length > 0
-
-- name: Prepare masters for etcd data migration
-  hosts: oo_masters_to_config
-  tasks:
-  - set_fact:
-      master_services:
-      - "{{ openshift.common.service_type + '-master' }}"
-  - set_fact:
-      master_services:
-      - "{{ openshift.common.service_type + '-master-controllers' }}"
-      - "{{ openshift.common.service_type + '-master-api' }}"
-    when:
-    - (openshift_master_cluster_method is defined and openshift_master_cluster_method == "native") or openshift.common.is_master_system_container | bool
-  - debug:
-      msg: "master service name: {{ master_services }}"
-  - name: Stop masters
-    service:
-      name: "{{ item }}"
-      state: stopped
-    with_items: "{{ master_services }}"
-
-- name: Migrate etcd data from v2 to v3
-  hosts: oo_etcd_to_config
-  gather_facts: no
-  tags:
-  - always
-  roles:
-  - role: etcd_migrate
-    r_etcd_migrate_action: migrate
-    etcd_peer: "{{ ansible_default_ipv4.address }}"
-
-- name: Gate on etcd migration
-  hosts: oo_masters_to_config
-  gather_facts: no
-  tasks:
-  - set_fact:
-      etcd_migration_completed: "{{ hostvars
-                                 | oo_select_keys(groups.oo_etcd_to_config)
-                                 | oo_collect('inventory_hostname', {'r_etcd_migrate_success': true}) }}"
-  - set_fact:
-      etcd_migration_failed: "{{ groups.oo_etcd_to_config | difference(etcd_migration_completed) }}"
-
-- name: Configure masters if etcd data migration is succesfull
-  hosts: oo_masters_to_config
-  roles:
-  - role: etcd_migrate
-    r_etcd_migrate_action: configure
-    when: etcd_migration_failed | length == 0
-  tasks:
-  - debug:
-      msg: "Skipping master re-configuration since migration failed."
-    when:
-    - etcd_migration_failed | length > 0
-
-- name: Start masters after etcd data migration
-  hosts: oo_masters_to_config
-  tasks:
-  - name: Start master services
-    service:
-      name: "{{ item }}"
-      state: started
-    register: service_status
-    # Sometimes the master-api, resp. master-controllers fails to start for the first time
-    until: service_status.state is defined and service_status.state == "started"
-    retries: 5
-    delay: 10
-    with_items: "{{ master_services[::-1] }}"
-  - fail:
-      msg: "Migration failed. The following hosts were not properly migrated: {{ etcd_migration_failed | join(',') }}"
-    when:
-    - etcd_migration_failed | length > 0

+ 9 - 0
playbooks/common/openshift-cluster/evaluate_groups.yml

@@ -157,3 +157,12 @@
       ansible_become: "{{ g_sudo | default(omit) }}"
     with_items: "{{ g_glusterfs_hosts | union(g_glusterfs_registry_hosts | default([])) }}"
     changed_when: no
+
+  - name: Evaluate oo_etcd_to_migrate
+    add_host:
+      name: "{{ item }}"
+      groups: oo_etcd_to_migrate
+      ansible_ssh_user: "{{ g_ssh_user | default(omit) }}"
+      ansible_become: "{{ g_sudo | default(omit) }}"
+    with_items: "{{ groups.oo_etcd_to_config if groups.oo_etcd_to_config | default([]) | length != 0 else groups.oo_first_master }}"
+    changed_when: no

+ 120 - 0
playbooks/common/openshift-etcd/migrate.yml

@@ -0,0 +1,120 @@
+---
+- include: ../openshift-cluster/evaluate_groups.yml
+  tags:
+  - always
+
+- name: Run pre-checks
+  hosts: oo_etcd_to_migrate
+  tags:
+  - always
+  roles:
+  - role: etcd_migrate
+    r_etcd_migrate_action: check
+    r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}"
+    etcd_peer: "{{ ansible_default_ipv4.address }}"
+
+- include: ../openshift-cluster/initialize_facts.yml
+  tags:
+  - always
+
+- name: Backup v2 data
+  hosts: oo_etcd_to_migrate
+  gather_facts: no
+  tags:
+  - always
+  roles:
+  - role: openshift_facts
+  - role: etcd_common
+    r_etcd_common_action: backup
+    r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}"
+    r_etcd_common_backup_tag: pre-migration
+    r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}"
+    r_etcd_common_backup_sufix_name: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}"
+
+- name: Gate on etcd backup
+  hosts: localhost
+  connection: local
+  become: no
+  tasks:
+  - set_fact:
+      etcd_backup_completed: "{{ hostvars
+                                 | oo_select_keys(groups.oo_etcd_to_migrate)
+                                 | oo_collect('inventory_hostname', {'r_etcd_common_backup_complete': true}) }}"
+  - set_fact:
+      etcd_backup_failed: "{{ groups.oo_etcd_to_migrate | difference(etcd_backup_completed) }}"
+  - fail:
+      msg: "Migration cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}"
+    when:
+    - etcd_backup_failed | length > 0
+
+- name: Prepare masters for etcd data migration
+  hosts: oo_masters_to_config
+  tasks:
+  - set_fact:
+      master_services:
+      - "{{ openshift.common.service_type + '-master' }}"
+  - set_fact:
+      master_services:
+      - "{{ openshift.common.service_type + '-master-controllers' }}"
+      - "{{ openshift.common.service_type + '-master-api' }}"
+    when:
+    - (openshift_master_cluster_method is defined and openshift_master_cluster_method == "native") or openshift.common.is_master_system_container | bool
+  - debug:
+      msg: "master service name: {{ master_services }}"
+  - name: Stop masters
+    service:
+      name: "{{ item }}"
+      state: stopped
+    with_items: "{{ master_services }}"
+
+- name: Migrate etcd data from v2 to v3
+  hosts: oo_etcd_to_migrate
+  gather_facts: no
+  tags:
+  - always
+  roles:
+  - role: etcd_migrate
+    r_etcd_migrate_action: migrate
+    r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}"
+    etcd_peer: "{{ ansible_default_ipv4.address }}"
+
+- name: Gate on etcd migration
+  hosts: oo_masters_to_config
+  gather_facts: no
+  tasks:
+  - set_fact:
+      etcd_migration_completed: "{{ hostvars
+                                 | oo_select_keys(groups.oo_etcd_to_migrate)
+                                 | oo_collect('inventory_hostname', {'r_etcd_migrate_success': true}) }}"
+  - set_fact:
+      etcd_migration_failed: "{{ groups.oo_etcd_to_migrate | difference(etcd_migration_completed) }}"
+
+- name: Configure masters if etcd data migration is succesfull
+  hosts: oo_masters_to_config
+  roles:
+  - role: etcd_migrate
+    r_etcd_migrate_action: configure
+    when: etcd_migration_failed | length == 0
+  tasks:
+  - debug:
+      msg: "Skipping master re-configuration since migration failed."
+    when:
+    - etcd_migration_failed | length > 0
+
+- name: Start masters after etcd data migration
+  hosts: oo_masters_to_config
+  tasks:
+  - name: Start master services
+    service:
+      name: "{{ item }}"
+      state: started
+    register: service_status
+    # Sometimes the master-api, resp. master-controllers fails to start for the first time
+    until: service_status.state is defined and service_status.state == "started"
+    retries: 5
+    delay: 10
+    with_items: "{{ master_services[::-1] }}"
+  - fail:
+      msg: "Migration failed. The following hosts were not properly migrated: {{ etcd_migration_failed | join(',') }}"
+    when:
+    - etcd_migration_failed | length > 0

+ 1 - 1
roles/etcd_migrate/tasks/check.yml

@@ -6,7 +6,7 @@
 # Run the migration only if the data are v2
 - name: Check if there are any v3 data
   command: >
-    etcdctl --cert {{ etcd_peer_cert_file }} --key {{ etcd_peer_key_file }} --cacert {{ etcd_peer_ca_file }} --endpoints 'https://{{ etcd_peer }}:2379' get "" --from-key --keys-only -w json --limit 1
+    etcdctl --cert {{ etcd_peer_cert_file }} --key {{ etcd_peer_key_file }} --cacert {{ etcd_peer_ca_file }} --endpoints 'https://{{ etcd_peer }}:{{ etcd_client_port }}' get "" --from-key --keys-only -w json --limit 1
   environment:
     ETCDCTL_API: 3
   register: l_etcdctl_output

+ 1 - 1
roles/etcd_migrate/tasks/check_cluster_health.yml

@@ -1,7 +1,7 @@
 ---
 - name: Check cluster health
   command: >
-    etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt --endpoint https://{{ etcd_peer }}:2379 cluster-health
+    etcdctl --cert-file {{ etcd_peer_cert_file }} --key-file {{ etcd_peer_key_file }} --ca-file {{ etcd_peer_ca_file }} --endpoint https://{{ etcd_peer }}:{{ etcd_client_port }} cluster-health
   register: etcd_cluster_health
   changed_when: false
   failed_when: false

+ 4 - 4
roles/etcd_migrate/tasks/check_cluster_status.yml

@@ -2,7 +2,7 @@
 # etcd_ip originates from etcd_common role
 - name: Check cluster status
   command: >
-    etcdctl --cert /etc/etcd/peer.crt --key /etc/etcd/peer.key --cacert /etc/etcd/ca.crt --endpoints 'https://{{ etcd_peer }}:2379' -w json endpoint status
+    etcdctl --cert {{ etcd_peer_cert_file }} --key {{ etcd_peer_key_file }} --cacert {{ etcd_peer_ca_file }} --endpoints 'https://{{ etcd_peer }}:{{ etcd_client_port }}' -w json endpoint status
   environment:
     ETCDCTL_API: 3
   register: l_etcd_cluster_status
@@ -15,7 +15,7 @@
   # http://docs.ansible.com/ansible/playbooks_filters.html#extracting-values-from-containers
   - name: Group all raftIndices into a list
     set_fact:
-      etcd_members_raft_indices: "{{ groups['oo_etcd_to_config'] | map('extract', hostvars, 'etcd_member_raft_index') | list | unique }}"
+      etcd_members_raft_indices: "{{ groups['oo_etcd_to_migrate'] | map('extract', hostvars, 'etcd_member_raft_index') | list | unique }}"
 
   - name: Check the minimum and the maximum of raftIndices is at most 1
     set_fact:
@@ -24,9 +24,9 @@
   - debug:
       msg: "Raft indices difference: {{ etcd_members_raft_indices_diff }}"
 
-  when: inventory_hostname in groups.oo_etcd_to_config[0]
+  when: inventory_hostname in groups.oo_etcd_to_migrate[0]
 
 # The cluster raft status is ok if the difference of the max and min raft index is at most 1
 - name: capture the status
   set_fact:
-    l_etcd_cluster_status_ok: "{{ hostvars[groups.oo_etcd_to_config[0]]['etcd_members_raft_indices_diff'] | int < 2 }}"
+    l_etcd_cluster_status_ok: "{{ hostvars[groups.oo_etcd_to_migrate[0]]['etcd_members_raft_indices_diff'] | int < 2 }}"

+ 8 - 9
roles/etcd_migrate/tasks/migrate.yml

@@ -20,10 +20,12 @@
 - name: Check the etcd v2 data are correctly migrated
   fail:
     msg: "Failed to migrate a member"
-  when: "'finished transforming keys' not in l_etcdctl_migrate.stdout"
+  when: "'finished transforming keys' not in l_etcdctl_migrate.stdout and 'no v2 keys to migrate' not in l_etcdctl_migrate.stdout"
+
+- name: Migration message
+  debug:
+    msg: "Etcd migration finished with: {{ l_etcdctl_migrate.stdout }}"
 
-# TODO(jchaloup): start the etcd on a different port so noone can access it
-# Once the validation is done
 - name: Enable etcd member
   service:
     name: "{{ l_etcd_service }}"
@@ -35,7 +37,7 @@
     --cert {{ etcd_peer_cert_file }} \
     --key {{ etcd_peer_key_file }} \
     --cacert {{ etcd_peer_ca_file }} \
-    --etcd-address 'https://{{ etcd_peer }}:2379' \
+    --etcd-address 'https://{{ etcd_peer }}:{{ etcd_client_port }}' \
     --ttl-keys-prefix {{ item }} \
     --lease-duration 1h
   environment:
@@ -43,11 +45,8 @@
   with_items:
   - "/kubernetes.io/events"
   - "/kubernetes.io/masterleases"
+  delegate_to: "{{ groups.oo_first_master[0] }}"
+  run_once: true
 
 - set_fact:
     r_etcd_migrate_success: true
-
-- name: Enable etcd member
-  service:
-    name: "{{ l_etcd_service }}"
-    state: started