Browse Source

Merge pull request #7802 from vrutkovs/papr-master-ha

PAPR: check 3 masters install and minor update
OpenShift Merge Robot 7 years ago
parent
commit
1f95e3d9ab

+ 39 - 0
.papr-master-ha.inventory

@@ -0,0 +1,39 @@
+[OSEv3:children]
+masters
+nodes
+etcd
+
+[OSEv3:vars]
+ansible_ssh_user=root
+ansible_python_interpreter=/usr/bin/python3
+openshift_deployment_type=origin
+openshift_release="{{ lookup('env', 'target_branch') }}"
+openshift_master_default_subdomain="{{ lookup('env', 'RHCI_ocp_master1_IP') }}.xip.io"
+openshift_check_min_host_disk_gb=1.5
+openshift_check_min_host_memory_gb=1.9
+osm_cluster_network_cidr=10.128.0.0/14
+openshift_portal_net=172.30.0.0/16
+osm_host_subnet_length=9
+openshift_enable_service_catalog=false
+
+[all:vars]
+# bootstrap configs
+openshift_master_bootstrap_auto_approve=true
+openshift_master_bootstrap_auto_approver_node_selector={"node-role.kubernetes.io/master":"true"}
+osm_controller_args={"experimental-cluster-signing-duration": ["20m"]}
+osm_default_node_selector="node-role.kubernetes.io/compute=true"
+
+[masters]
+ocp-master1
+ocp-master2
+ocp-master3
+
+[etcd]
+ocp-master1
+ocp-master2
+ocp-master3
+
+[nodes]
+ocp-master1 openshift_schedulable=true openshift_node_labels="{'node-role.kubernetes.io/infra':'true'}"
+ocp-master2
+ocp-master3

+ 33 - 0
.papr.all-in-one.inventory

@@ -0,0 +1,33 @@
+[OSEv3:children]
+masters
+nodes
+etcd
+
+[OSEv3:vars]
+ansible_ssh_user=root
+ansible_python_interpreter=/usr/bin/python3
+openshift_deployment_type=origin
+openshift_release="{{ lookup('env', 'target_branch') }}"
+openshift_master_default_subdomain="{{ lookup('env', 'RHCI_ocp_master_IP') }}.xip.io"
+openshift_check_min_host_disk_gb=1.5
+openshift_check_min_host_memory_gb=1.9
+osm_cluster_network_cidr=10.128.0.0/14
+openshift_portal_net=172.30.0.0/16
+osm_host_subnet_length=9
+openshift_enable_service_catalog=false
+
+[all:vars]
+# bootstrap configs
+openshift_master_bootstrap_auto_approve=true
+openshift_master_bootstrap_auto_approver_node_selector={"node-role.kubernetes.io/master":"true"}
+osm_controller_args={"experimental-cluster-signing-duration": ["20m"]}
+osm_default_node_selector="node-role.kubernetes.io/compute=true"
+
+[masters]
+ocp-master
+
+[etcd]
+ocp-master
+
+[nodes]
+ocp-master openshift_schedulable=true openshift_node_labels="{'node-role.kubernetes.io/infra':'true'}"

+ 15 - 5
.papr.sh

@@ -20,14 +20,18 @@ export target_branch
 
 pip install -r requirements.txt
 
+PAPR_INVENTORY=${PAPR_INVENTORY:-.papr.inventory}
+PAPR_RUN_UPDATE=${PAPR_RUN_UPDATE:-0}
+
 # ping the nodes to check they're responding and register their ostree versions
-ansible -vvv -i .papr.inventory nodes -a 'rpm-ostree status'
+ansible -vvv -i $PAPR_INVENTORY nodes -a 'rpm-ostree status'
 
 upload_journals() {
   mkdir journals
-  for node in master node1 node2; do
-    ssh ocp-$node 'journalctl --no-pager || true' > journals/ocp-$node.log
-  done
+  ansible -vvv -i $PAPR_INVENTORY all \
+    -m shell -a 'journalctl --no-pager > /tmp/journal'
+  ansible -vvv -i $PAPR_INVENTORY all \
+    -m fetch -a "src=/tmp/journal dest=journals/{{ inventory_hostname }}.log flat=yes"
 }
 
 trap upload_journals ERR
@@ -36,7 +40,13 @@ trap upload_journals ERR
 export ANSIBLE_LOG_PATH=ansible.log
 
 # run the actual installer
-ansible-playbook -v -i .papr.inventory playbooks/deploy_cluster.yml
+ansible-playbook -v -i $PAPR_INVENTORY playbooks/deploy_cluster.yml
+
+# Run upgrade playbook (to a minor version)
+if [[ "${PAPR_RUN_UPDATE:-0}" != "0" ]]; then
+  update_version="$(echo $target_branch | sed 's/\./_/')"
+  ansible-playbook -v -i $PAPR_INVENTORY playbooks/byo/openshift-cluster/upgrades/v${update_version}/upgrade.yml
+fi
 
 ### DISABLING TESTS FOR NOW, SEE:
 ### https://github.com/openshift/openshift-ansible/pull/6132

+ 36 - 0
.papr.yml

@@ -41,3 +41,39 @@ tests:
 artifacts:
   - journals/
   - ansible.log
+
+---
+inherit: true
+context: 'fedora/27/atomic/update'
+
+cluster:
+  hosts:
+    - name: ocp-master
+      distro: fedora/27/atomic
+      specs:
+        ram: 4096
+  container:
+    image: registry.fedoraproject.org/fedora:27
+env:
+  PAPR_INVENTORY: .papr.all-in-one.inventory
+  PAPR_RUN_UPDATE: "yes"
+
+---
+inherit: true
+context: 'fedora/27/atomic/master-ha'
+
+cluster:
+  hosts:
+    - name: ocp-master1
+      distro: fedora/27/atomic
+      specs:
+        ram: 4096
+    - name: ocp-master2
+      distro: fedora/27/atomic
+    - name: ocp-master3
+      distro: fedora/27/atomic
+  container:
+    image: registry.fedoraproject.org/fedora:27
+
+env:
+  PAPR_INVENTORY: .papr-master-ha.inventory

+ 3 - 1
playbooks/common/openshift-cluster/upgrades/pre/verify_cluster.yml

@@ -68,7 +68,9 @@
       local_facts:
         ha: "{{ groups.oo_masters_to_config | length > 1 }}"
 
-  - when: openshift_is_containerized | bool
+  - when:
+    - openshift_is_containerized | bool
+    - not openshift_is_bootstrapped | bool
     block:
     - set_fact:
         master_services:

+ 14 - 0
playbooks/openshift-master/private/upgrade.yml

@@ -13,6 +13,20 @@
       path: "{{ openshift.common.config_base }}/master/service-signer.crt"
     register: service_signer_cert_stat
     changed_when: false
+  - name: verify api server
+    command: >
+      curl --silent --tlsv1.2
+      --cacert {{ openshift.common.config_base }}/master/ca-bundle.crt
+      {{ openshift.master.api_url }}/healthz/ready
+    args:
+      # Disables the following warning:
+      # Consider using get_url or uri module rather than running curl
+      warn: no
+    register: api_available_output
+    until: api_available_output.stdout == 'ok'
+    retries: 120
+    delay: 1
+    changed_when: false
 
 - import_playbook: create_service_signer_cert.yml
 

+ 1 - 1
playbooks/openshift-node/private/restart.yml

@@ -49,7 +49,7 @@
     oc_obj:
       state: list
       kind: node
-      name: "{{ openshift.common.hostname | lower }}"
+      name: "{{ openshift.node.nodename | lower }}"
     register: node_output
     delegate_to: "{{ groups.oo_first_master.0 }}"
     when: inventory_hostname in groups.oo_nodes_to_config

+ 9 - 2
roles/etcd/defaults/main.yaml

@@ -4,8 +4,10 @@ r_etcd_common_backup_sufix_name: ''
 
 l_is_etcd_system_container: "{{ (openshift_use_etcd_system_container | default(openshift_use_system_containers | default(false)) | bool) }}"
 
+l_etcd_static_pod: "{{ not (r_etcd_common_skip_command_shim is defined and r_etcd_common_skip_command_shim) or openshift.node.bootstrapped }}"
+
 # runc, docker, host
-r_etcd_common_etcd_runtime: "{{ 'runc' if l_is_etcd_system_container else ('docker' if openshift_is_containerized else 'host') }}"
+r_etcd_common_etcd_runtime: "{{ 'static_pod' if l_etcd_static_pod else ('runc' if l_is_etcd_system_container else ('docker' if openshift_is_containerized else 'host')) }}"
 
 r_etcd_default_version: "3.2.15"
 osm_etcd_image: "registry.access.redhat.com/rhel7/etcd:{{ r_etcd_upgrade_version | default(r_etcd_default_version) }}"
@@ -17,7 +19,12 @@ etcd_image: "{{ etcd_image_dict[openshift_deployment_type | default('origin')] }
 # etcd run on a host => use etcdctl command directly
 # etcd run as a docker container => use docker exec
 # etcd run as a runc container => use runc exec
-r_etcd_common_etcdctl_command: "{{ 'etcdctl' if (r_etcd_common_etcd_runtime == 'host') else ('docker exec etcd_container etcdctl' if (r_etcd_common_etcd_runtime == 'docker') else 'runc exec etcd etcdctl') }}"
+etcdctl_dict:
+  host: 'etcdctl'
+  docker: 'docker exec etcd_container etcdctl'
+  static_pod: '/usr/local/bin/master-exec etcd etcd etcdctl'
+  runc: 'runc exec etcd etcdctl'
+r_etcd_common_etcdctl_command: "{{ etcdctl_dict[r_etcd_common_etcd_runtime | default('runc')] }}"
 
 # etcd server vars
 etcd_conf_dir: '/etc/etcd'

+ 1 - 8
roles/etcd/tasks/backup/backup.yml

@@ -49,17 +49,10 @@
   - l_etcd_selinux_labels.rc == 0
   - "'svirt_sandbox_file_t' not in l_etcd_selinux_labels.stdout"
 
-- name: Generate etcd backup (legacy)
+- name: Generate etcd backup
   command: >
     {{ r_etcd_common_etcdctl_command }} backup --data-dir={{ l_etcd_incontainer_data_dir }}
     --backup-dir={{ l_etcd_incontainer_backup_dir }}
-  when: r_etcd_common_skip_command_shim | default(False) | bool
-
-- name: Generate etcd backup (static pod)
-  command: >
-    /usr/local/bin/master-exec etcd etcd etcdctl backup --data-dir={{ l_etcd_incontainer_data_dir }}
-    --backup-dir={{ l_etcd_incontainer_backup_dir }}
-  when: not (r_etcd_common_skip_command_shim | default(False) | bool)
 
 # According to the docs change you can simply copy snap/db
 # https://github.com/openshift/openshift-docs/commit/b38042de02d9780842dce95cfa0ef45d53b58bc6

+ 1 - 0
roles/etcd/tasks/upgrade/upgrade_static.yml

@@ -9,6 +9,7 @@
   systemd:
     name: "{{ etcd_service }}"
     state: stopped
+  when: not openshift.node.bootstrapped
 
 - name: Remove old service file
   file:

+ 21 - 4
roles/etcd/tasks/version_detect.yml

@@ -23,14 +23,14 @@
     # state, not manipulating anything
     changed_when: false
     when:
-    - not l_is_etcd_system_container | bool
+    - not l_is_etcd_system_container | bool and not l_etcd_static_pod | bool
 
     # Given a register variables is set even if the whwen condition
     # is false, we need to set etcd_container_version separately
   - set_fact:
       etcd_container_version: "{{ etcd_container_version_docker.stdout }}"
     when:
-    - not l_is_etcd_system_container | bool
+    - not l_is_etcd_system_container | bool and not l_etcd_static_pod | bool
 
   - name: Record containerized etcd version (runc)
     command: runc exec etcd rpm -qa --qf '%{version}' etcd\*
@@ -40,14 +40,31 @@
     # state, not manipulating anything
     changed_when: false
     when:
-    - l_is_etcd_system_container | bool
+    - l_is_etcd_system_container | bool and not l_etcd_static_pod | bool
 
     # Given a register variables is set even if the whwen condition
     # is false, we need to set etcd_container_version separately
   - set_fact:
       etcd_container_version: "{{ etcd_container_version_runc.stdout }}"
     when:
-    - l_is_etcd_system_container | bool
+    - l_is_etcd_system_container | bool and not l_etcd_static_pod
+
+  - name: Record etcd version (static pod)
+    command: /usr/local/bin/master-exec etcd etcd rpm -qa --qf '%{version}' etcd\*
+    register: etcd_container_version_static_pod
+    failed_when: false
+    # AUDIT:changed_when: `false` because we are only inspecting
+    # state, not manipulating anything
+    changed_when: false
+    when:
+    - l_etcd_static_pod | bool
+
+    # Given a register variables is set even if the whwen condition
+    # is false, we need to set etcd_container_version separately
+  - set_fact:
+      etcd_container_version: "{{ etcd_container_version_static_pod.stdout }}"
+    when:
+    - l_etcd_static_pod | bool
 
   - debug:
       msg: "Etcd containerized version {{ etcd_container_version }} detected"

+ 1 - 0
roles/openshift_control_plane/tasks/static_shim.yml

@@ -6,5 +6,6 @@
     dest: "/usr/local/bin/"
     mode: 0500
   with_items:
+  - scripts/docker/master-exec
   - scripts/docker/master-logs
   - scripts/docker/master-restart

+ 2 - 2
roles/openshift_node/tasks/upgrade.yml

@@ -40,7 +40,7 @@
 
 - name: Approve node certificates when bootstrapping
   oc_adm_csr:
-    nodes: "{{ openshift.common.hostname | lower }}"
+    nodes: "{{ openshift.node.nodename | lower }}"
     timeout: 180
     fail_on_timeout: true
   delegate_to: "{{ groups.oo_first_master.0 }}"
@@ -51,7 +51,7 @@
   oc_obj:
     state: list
     kind: node
-    name: "{{ openshift.common.hostname | lower }}"
+    name: "{{ openshift.node.nodename | lower }}"
   register: node_output
   delegate_to: "{{ groups.oo_first_master.0 }}"
   until: node_output.results.returncode == 0 and node_output.results.results[0].status.conditions | selectattr('type', 'match', '^Ready$') | map(attribute='status') | join | bool == True

+ 2 - 2
setup.py

@@ -79,7 +79,7 @@ def find_playbooks():
             os.path.join(os.getcwd(), 'playbooks'),
             exclude_dirs, None, r'\.ya?ml$'):
         with open(yaml_file, 'r') as contents:
-            for task in yaml.safe_load(contents) or {}:
+            for task in yaml.safe_load_all(contents) or {}:
                 if not isinstance(task, dict):
                     # Skip yaml files which are not a dictionary of tasks
                     continue
@@ -323,7 +323,7 @@ class OpenShiftAnsibleSyntaxCheck(Command):
         for yaml_file in find_files(
                 os.getcwd(), exclude_dirs, None, r'\.ya?ml$'):
             with open(yaml_file, 'r') as contents:
-                yaml_contents = yaml.safe_load(contents)
+                yaml_contents = yaml.safe_load_all(contents)
                 if not isinstance(yaml_contents, list):
                     continue