Ver Fonte

Merge pull request #6335 from kwoodson/node_groups_refactor

Node group management update.
Scott Dodson há 7 anos atrás
pai
commit
01ae634a0b

+ 2 - 2
playbooks/aws/openshift-cluster/accept.yml

@@ -18,7 +18,7 @@
       name: lib_openshift
       name: lib_openshift
 
 
   - name: fetch masters
   - name: fetch masters
-    ec2_remote_facts:
+    ec2_instance_facts:
       region: "{{ openshift_aws_region | default('us-east-1') }}"
       region: "{{ openshift_aws_region | default('us-east-1') }}"
       filters:
       filters:
         "tag:clusterid": "{{ openshift_aws_clusterid | default('default') }}"
         "tag:clusterid": "{{ openshift_aws_clusterid | default('default') }}"
@@ -30,7 +30,7 @@
     until: "'instances' in mastersout and mastersout.instances|length > 0"
     until: "'instances' in mastersout and mastersout.instances|length > 0"
 
 
   - name: fetch new node instances
   - name: fetch new node instances
-    ec2_remote_facts:
+    ec2_instance_facts:
       region: "{{ openshift_aws_region | default('us-east-1') }}"
       region: "{{ openshift_aws_region | default('us-east-1') }}"
       filters:
       filters:
         "tag:clusterid": "{{ openshift_aws_clusterid | default('default') }}"
         "tag:clusterid": "{{ openshift_aws_clusterid | default('default') }}"

+ 21 - 14
playbooks/common/openshift-cluster/upgrades/upgrade_scale_group.yml

@@ -11,25 +11,19 @@
       msg: "Ensure that new scale groups were provisioned before proceeding to update."
       msg: "Ensure that new scale groups were provisioned before proceeding to update."
     when:
     when:
     - "'oo_sg_new_nodes' not in groups or groups.oo_sg_new_nodes|length == 0"
     - "'oo_sg_new_nodes' not in groups or groups.oo_sg_new_nodes|length == 0"
+    - "'oo_sg_current_nodes' not in groups or groups.oo_sg_current_nodes|length == 0"
+    - groups.oo_sg_current_nodes == groups.oo_sg_new_nodes
 
 
 - name: initialize upgrade bits
 - name: initialize upgrade bits
   import_playbook: init.yml
   import_playbook: init.yml
 
 
-- name: Drain and upgrade nodes
+- name: unschedule nodes
   hosts: oo_sg_current_nodes
   hosts: oo_sg_current_nodes
-  # This var must be set with -e on invocation, as it is not a per-host inventory var
-  # and is evaluated early. Values such as "20%" can also be used.
-  serial: "{{ openshift_upgrade_nodes_serial | default(1) }}"
-  max_fail_percentage: "{{ openshift_upgrade_nodes_max_fail_percentage | default(0) }}"
-
-  pre_tasks:
+  tasks:
   - name: Load lib_openshift modules
   - name: Load lib_openshift modules
-    include_role:
+    import_role:
       name: ../roles/lib_openshift
       name: ../roles/lib_openshift
 
 
-  # TODO: To better handle re-trying failed upgrades, it would be nice to check if the node
-  # or docker actually needs an upgrade before proceeding. Perhaps best to save this until
-  # we merge upgrade functionality into the base roles and a normal config.yml playbook run.
   - name: Mark node unschedulable
   - name: Mark node unschedulable
     oc_adm_manage_node:
     oc_adm_manage_node:
       node: "{{ openshift.node.nodename | lower }}"
       node: "{{ openshift.node.nodename | lower }}"
@@ -40,14 +34,27 @@
     register: node_unschedulable
     register: node_unschedulable
     until: node_unschedulable|succeeded
     until: node_unschedulable|succeeded
 
 
+- name: Drain nodes
+  hosts: oo_sg_current_nodes
+  # This var must be set with -e on invocation, as it is not a per-host inventory var
+  # and is evaluated early. Values such as "20%" can also be used.
+  serial: "{{ openshift_upgrade_nodes_serial | default(1) }}"
+  max_fail_percentage: "{{ openshift_upgrade_nodes_max_fail_percentage | default(0) }}"
+  tasks:
   - name: Drain Node for Kubelet upgrade
   - name: Drain Node for Kubelet upgrade
     command: >
     command: >
-      {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm drain {{ openshift.node.nodename | lower }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig --force --delete-local-data --ignore-daemonsets
+      {{ hostvars[groups.oo_first_master.0].openshift.common.client_binary }} adm drain {{ openshift.node.nodename | lower }}
+      --config={{ openshift.common.config_base }}/master/admin.kubeconfig
+      --force --delete-local-data --ignore-daemonsets
+      --timeout={{ openshift_upgrade_nodes_drain_timeout | default(0) }}s
     delegate_to: "{{ groups.oo_first_master.0 }}"
     delegate_to: "{{ groups.oo_first_master.0 }}"
     register: l_upgrade_nodes_drain_result
     register: l_upgrade_nodes_drain_result
     until: not l_upgrade_nodes_drain_result | failed
     until: not l_upgrade_nodes_drain_result | failed
-    retries: 60
-    delay: 60
+    retries: "{{ 1 if openshift_upgrade_nodes_drain_timeout | default(0) == '0' else 0  | int }}"
+    delay: 5
+    failed_when:
+    - l_upgrade_nodes_drain_result | failed
+    - openshift_upgrade_nodes_drain_timeout | default(0) == '0'
 
 
 # Alright, let's clean up!
 # Alright, let's clean up!
 - name: clean up the old scale group
 - name: clean up the old scale group

+ 15 - 26
roles/openshift_aws/defaults/main.yml

@@ -6,9 +6,7 @@ openshift_aws_create_security_groups: True
 openshift_aws_create_launch_config: True
 openshift_aws_create_launch_config: True
 openshift_aws_create_scale_group: True
 openshift_aws_create_scale_group: True
 
 
-openshift_aws_current_version: ''
-openshift_aws_new_version: ''
-
+openshift_aws_node_group_upgrade: False
 openshift_aws_wait_for_ssh: True
 openshift_aws_wait_for_ssh: True
 
 
 openshift_aws_clusterid: default
 openshift_aws_clusterid: default
@@ -19,7 +17,6 @@ openshift_aws_build_ami_group: "{{ openshift_aws_clusterid }}"
 openshift_aws_iam_cert_name: "{{ openshift_aws_clusterid }}-master-external"
 openshift_aws_iam_cert_name: "{{ openshift_aws_clusterid }}-master-external"
 openshift_aws_iam_cert_path: ''
 openshift_aws_iam_cert_path: ''
 openshift_aws_iam_cert_key_path: ''
 openshift_aws_iam_cert_key_path: ''
-openshift_aws_scale_group_basename: "{{ openshift_aws_clusterid }} openshift"
 
 
 openshift_aws_iam_role_name: openshift_node_describe_instances
 openshift_aws_iam_role_name: openshift_node_describe_instances
 openshift_aws_iam_role_policy_json: "{{ lookup('file', 'describeinstances.json') }}"
 openshift_aws_iam_role_policy_json: "{{ lookup('file', 'describeinstances.json') }}"
@@ -34,14 +31,12 @@ openshift_aws_ami_name: openshift-gi
 openshift_aws_base_ami_name: ami_base
 openshift_aws_base_ami_name: ami_base
 
 
 openshift_aws_launch_config_bootstrap_token: ''
 openshift_aws_launch_config_bootstrap_token: ''
-openshift_aws_launch_config_basename: "{{ openshift_aws_clusterid }}"
 
 
 openshift_aws_users: []
 openshift_aws_users: []
 
 
 openshift_aws_ami_tags:
 openshift_aws_ami_tags:
   bootstrap: "true"
   bootstrap: "true"
   openshift-created: "true"
   openshift-created: "true"
-  clusterid: "{{ openshift_aws_clusterid }}"
   parent: "{{ openshift_aws_base_ami | default('unknown') }}"
   parent: "{{ openshift_aws_base_ami | default('unknown') }}"
 
 
 openshift_aws_s3_mode: create
 openshift_aws_s3_mode: create
@@ -124,6 +119,20 @@ openshift_aws_ami_map:
   infra: "{{ openshift_aws_ami }}"
   infra: "{{ openshift_aws_ami }}"
   compute: "{{ openshift_aws_ami }}"
   compute: "{{ openshift_aws_ami }}"
 
 
+openshift_aws_master_group:
+- name: "{{ openshift_aws_clusterid }} master group"
+  group: master
+
+openshift_aws_node_groups:
+- name: "{{ openshift_aws_clusterid }} compute group"
+  group: compute
+- name: "{{ openshift_aws_clusterid }} infra group"
+  group: infra
+
+openshift_aws_created_asgs: []
+openshift_aws_current_asgs: []
+
+# these will be used during upgrade
 openshift_aws_master_group_config:
 openshift_aws_master_group_config:
   # The 'master' key is always required here.
   # The 'master' key is always required here.
   master:
   master:
@@ -139,7 +148,6 @@ openshift_aws_master_group_config:
       host-type: master
       host-type: master
       sub-host-type: default
       sub-host-type: default
       runtime: docker
       runtime: docker
-      version: "{{ openshift_aws_new_version }}"
     wait_for_instances: True
     wait_for_instances: True
     termination_policy: "{{ openshift_aws_node_group_termination_policy }}"
     termination_policy: "{{ openshift_aws_node_group_termination_policy }}"
     replace_all_instances: "{{ openshift_aws_node_group_replace_all_instances }}"
     replace_all_instances: "{{ openshift_aws_node_group_replace_all_instances }}"
@@ -163,7 +171,6 @@ openshift_aws_node_group_config:
       host-type: node
       host-type: node
       sub-host-type: compute
       sub-host-type: compute
       runtime: docker
       runtime: docker
-      version: "{{ openshift_aws_new_version }}"
     termination_policy: "{{ openshift_aws_node_group_termination_policy }}"
     termination_policy: "{{ openshift_aws_node_group_termination_policy }}"
     replace_all_instances: "{{ openshift_aws_node_group_replace_all_instances }}"
     replace_all_instances: "{{ openshift_aws_node_group_replace_all_instances }}"
     iam_role: "{{ openshift_aws_iam_role_name }}"
     iam_role: "{{ openshift_aws_iam_role_name }}"
@@ -183,7 +190,6 @@ openshift_aws_node_group_config:
       host-type: node
       host-type: node
       sub-host-type: infra
       sub-host-type: infra
       runtime: docker
       runtime: docker
-      version: "{{ openshift_aws_new_version }}"
     termination_policy: "{{ openshift_aws_node_group_termination_policy }}"
     termination_policy: "{{ openshift_aws_node_group_termination_policy }}"
     replace_all_instances: "{{ openshift_aws_node_group_replace_all_instances }}"
     replace_all_instances: "{{ openshift_aws_node_group_replace_all_instances }}"
     iam_role: "{{ openshift_aws_iam_role_name }}"
     iam_role: "{{ openshift_aws_iam_role_name }}"
@@ -283,21 +289,4 @@ openshift_aws_node_run_bootstrap_startup: True
 openshift_aws_node_user_data: ''
 openshift_aws_node_user_data: ''
 openshift_aws_node_config_namespace: openshift-node
 openshift_aws_node_config_namespace: openshift-node
 
 
-openshift_aws_node_groups: nodes
-
 openshift_aws_masters_groups: masters,etcd,nodes
 openshift_aws_masters_groups: masters,etcd,nodes
-
-# If creating extra node groups, you'll need to define all of the following
-
-# The format is the same as openshift_aws_node_group_config, but the top-level
-# key names should be different (ie, not == master or infra).
-# openshift_aws_node_group_config_extra: {}
-
-# This variable should look like openshift_aws_launch_config_security_groups
-# and contain a one-to-one mapping of top level keys that are defined in
-# openshift_aws_node_group_config_extra.
-# openshift_aws_launch_config_security_groups_extra: {}
-
-# openshift_aws_node_security_groups_extra: {}
-
-# openshift_aws_ami_map_extra: {}

+ 34 - 1
roles/openshift_aws/filter_plugins/openshift_aws_filters.py

@@ -4,11 +4,43 @@
 Custom filters for use in openshift_aws
 Custom filters for use in openshift_aws
 '''
 '''
 
 
+from ansible import errors
+
 
 
 class FilterModule(object):
 class FilterModule(object):
     ''' Custom ansible filters for use by openshift_aws role'''
     ''' Custom ansible filters for use by openshift_aws role'''
 
 
     @staticmethod
     @staticmethod
+    def scale_groups_serial(scale_group_info, upgrade=False):
+        ''' This function will determine what the deployment serial should be and return it
+
+          Search through the tags and find the deployment_serial tag. Once found,
+          determine if an increment is needed during an upgrade.
+          if upgrade is true then increment the serial and return it
+          else return the serial
+        '''
+        if scale_group_info == []:
+            return 1
+
+        scale_group_info = scale_group_info[0]
+
+        if not isinstance(scale_group_info, dict):
+            raise errors.AnsibleFilterError("|filter plugin failed: Expected scale_group_info to be a dict")
+
+        serial = None
+
+        for tag in scale_group_info['tags']:
+            if tag['key'] == 'deployment_serial':
+                serial = int(tag['value'])
+                if upgrade:
+                    serial += 1
+                break
+        else:
+            raise errors.AnsibleFilterError("|filter plugin failed: deployment_serial tag was not found")
+
+        return serial
+
+    @staticmethod
     def scale_groups_match_capacity(scale_group_info):
     def scale_groups_match_capacity(scale_group_info):
         ''' This function will verify that the scale group instance count matches
         ''' This function will verify that the scale group instance count matches
             the scale group desired capacity
             the scale group desired capacity
@@ -38,4 +70,5 @@ class FilterModule(object):
     def filters(self):
     def filters(self):
         ''' returns a mapping of filters to methods '''
         ''' returns a mapping of filters to methods '''
         return {'build_instance_tags': self.build_instance_tags,
         return {'build_instance_tags': self.build_instance_tags,
-                'scale_groups_match_capacity': self.scale_groups_match_capacity}
+                'scale_groups_match_capacity': self.scale_groups_match_capacity,
+                'scale_groups_serial': self.scale_groups_serial}

+ 8 - 3
roles/openshift_aws/tasks/accept_nodes.yml

@@ -1,6 +1,6 @@
 ---
 ---
 - name: fetch masters
 - name: fetch masters
-  ec2_remote_facts:
+  ec2_instance_facts:
     region: "{{ openshift_aws_region | default('us-east-1') }}"
     region: "{{ openshift_aws_region | default('us-east-1') }}"
     filters:
     filters:
       "{{ {'tag:kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid,
       "{{ {'tag:kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid,
@@ -11,7 +11,7 @@
   until: "'instances' in mastersout and mastersout.instances|length > 0"
   until: "'instances' in mastersout and mastersout.instances|length > 0"
 
 
 - name: fetch new node instances
 - name: fetch new node instances
-  ec2_remote_facts:
+  ec2_instance_facts:
     region: "{{ openshift_aws_region }}"
     region: "{{ openshift_aws_region }}"
     filters:
     filters:
       "{{ {'tag:kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid,
       "{{ {'tag:kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid,
@@ -22,9 +22,14 @@
   delay: 3
   delay: 3
   until: "'instances' in instancesout and instancesout.instances|length > 0"
   until: "'instances' in instancesout and instancesout.instances|length > 0"
 
 
-- debug:
+- name: Dump the private dns names
+  debug:
     msg: "{{ instancesout.instances|map(attribute='private_dns_name') | list }}"
     msg: "{{ instancesout.instances|map(attribute='private_dns_name') | list }}"
 
 
+- name: Dump the master public ip address
+  debug:
+    msg: "{{ mastersout.instances[0].public_ip_address }}"
+
 - name: approve nodes
 - name: approve nodes
   oc_adm_csr:
   oc_adm_csr:
     #approve_all: True
     #approve_all: True

+ 29 - 2
roles/openshift_aws/tasks/build_node_group.yml

@@ -1,6 +1,4 @@
 ---
 ---
-# This task file expects l_nodes_to_build to be passed in.
-
 # When openshift_aws_use_custom_ami is '' then
 # When openshift_aws_use_custom_ami is '' then
 # we retrieve the latest build AMI.
 # we retrieve the latest build AMI.
 # Then set openshift_aws_ami to the ami.
 # Then set openshift_aws_ami to the ami.
@@ -26,6 +24,35 @@
 # Need to set epoch time in one place to use for launch_config and scale_group
 # Need to set epoch time in one place to use for launch_config and scale_group
 - set_fact:
 - set_fact:
     l_epoch_time: "{{ ansible_date_time.epoch }}"
     l_epoch_time: "{{ ansible_date_time.epoch }}"
+#
+# query asg's and determine if we need to create the others.
+# if we find more than 1 for each type, then exit
+- name: query all asg's for this cluster
+  ec2_asg_facts:
+    region: "{{ openshift_aws_region }}"
+    tags: "{{ {'kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid} | combine(l_node_group_config[openshift_aws_node_group.group].tags) }}"
+  register: asgs
+
+- fail:
+    msg: "Found more than 1 auto scaling group that matches the query for group: {{ openshift_aws_node_group }}"
+  when:
+  - asgs.results|length > 1
+
+- debug:
+    msg: "{{ asgs }}"
+
+- name: set the value for the deployment_serial and the current asgs
+  set_fact:
+    l_deployment_serial: "{{  openshift_aws_node_group_deployment_serial if openshift_aws_node_group_deployment_serial is defined else asgs.results | scale_groups_serial(openshift_aws_node_group_upgrade) }}"
+    openshift_aws_current_asgs: "{{ asgs.results | map(attribute='auto_scaling_group_name') | list | union(openshift_aws_current_asgs) }}"
+
+- name: dump deployment serial
+  debug:
+    msg: "Deployment serial: {{ l_deployment_serial }}"
+
+- name: dump current_asgs
+  debug:
+    msg: "openshift_aws_current_asgs: {{ openshift_aws_current_asgs }}"
 
 
 - when: openshift_aws_create_iam_role
 - when: openshift_aws_create_iam_role
   include_tasks: iam_role.yml
   include_tasks: iam_role.yml

+ 6 - 8
roles/openshift_aws/tasks/iam_role.yml

@@ -13,11 +13,10 @@
 #####
 #####
 - name: Create an iam role
 - name: Create an iam role
   iam_role:
   iam_role:
-    name: "{{ item.value.iam_role }}"
+    name: "{{ l_node_group_config[openshift_aws_node_group.group].iam_role }}"
     assume_role_policy_document: "{{ lookup('file','trustpolicy.json') }}"
     assume_role_policy_document: "{{ lookup('file','trustpolicy.json') }}"
     state: "{{ openshift_aws_iam_role_state | default('present') }}"
     state: "{{ openshift_aws_iam_role_state | default('present') }}"
-  when: item.value.iam_role is defined
-  with_dict: "{{ l_nodes_to_build }}"
+  when: l_node_group_config[openshift_aws_node_group.group].iam_role is defined
 
 
 #####
 #####
 # The second part of this task file is linking the role to a policy
 # The second part of this task file is linking the role to a policy
@@ -28,9 +27,8 @@
 - name: create an iam policy
 - name: create an iam policy
   iam_policy:
   iam_policy:
     iam_type: role
     iam_type: role
-    iam_name: "{{ item.value.iam_role }}"
-    policy_json: "{{ item.value.policy_json }}"
-    policy_name: "{{ item.value.policy_name }}"
+    iam_name: "{{ l_node_group_config[openshift_aws_node_group.group].iam_role }}"
+    policy_json: "{{ l_node_group_config[openshift_aws_node_group.group].policy_json }}"
+    policy_name: "{{ l_node_group_config[openshift_aws_node_group.group].policy_name }}"
     state: "{{ openshift_aws_iam_role_state | default('present') }}"
     state: "{{ openshift_aws_iam_role_state | default('present') }}"
-  when: item.value.iam_role is defined
-  with_dict: "{{ l_nodes_to_build }}"
+  when: "'iam_role' in l_node_group_config[openshift_aws_node_group.group]"

+ 24 - 13
roles/openshift_aws/tasks/launch_config.yml

@@ -1,15 +1,26 @@
 ---
 ---
-- fail:
-    msg: "Ensure that an AMI value is defined for openshift_aws_ami or openshift_aws_launch_config_custom_image."
-  when:
-  - openshift_aws_ami is undefined
+- name: fetch the security groups for launch config
+  ec2_group_facts:
+    filters:
+      group-name: "{{ openshift_aws_launch_config_security_groups[openshift_aws_node_group.group] }}"
+      vpc-id: "{{ vpcout.vpcs[0].id }}"
+    region: "{{ openshift_aws_region }}"
+  register: ec2sgs
 
 
-- fail:
-    msg: "Ensure that openshift_deployment_type is defined."
-  when:
-  - openshift_deployment_type is undefined
-
-- include_tasks: launch_config_create.yml
-  with_dict: "{{ l_nodes_to_build }}"
-  loop_control:
-    loop_var: launch_config_item
+# Create the scale group config
+- name: Create the node scale group launch config
+  ec2_lc:
+    name: "{{ openshift_aws_node_group.name }}-{{ openshift_aws_ami_map[openshift_aws_node_group.group] | default(openshift_aws_ami) }}-{{ l_epoch_time }}"
+    region: "{{ openshift_aws_region }}"
+    image_id: "{{ openshift_aws_ami_map[openshift_aws_node_group.group] | default(openshift_aws_ami) }}"
+    instance_type: "{{ l_node_group_config[openshift_aws_node_group.group].instance_type }}"
+    security_groups: "{{ openshift_aws_launch_config_security_group_id  | default(ec2sgs.security_groups | map(attribute='group_id')| list) }}"
+    instance_profile_name: "{{ l_node_group_config[openshift_aws_node_group.group].iam_role if l_node_group_config[openshift_aws_node_group.group].iam_role is defined and
+                                                                           l_node_group_config[openshift_aws_node_group.group].iam_role != '' and
+                                                                           openshift_aws_create_iam_role
+                                                                        else omit }}"
+    user_data: "{{ lookup('template', 'user_data.j2') }}"
+    key_name: "{{ openshift_aws_ssh_key_name }}"
+    ebs_optimized: False
+    volumes: "{{ l_node_group_config[openshift_aws_node_group.group].volumes }}"
+    assign_public_ip: True

+ 0 - 26
roles/openshift_aws/tasks/launch_config_create.yml

@@ -1,26 +0,0 @@
----
-- name: fetch the security groups for launch config
-  ec2_group_facts:
-    filters:
-      group-name: "{{ l_launch_config_security_groups[launch_config_item.key] }}"
-      vpc-id: "{{ vpcout.vpcs[0].id }}"
-    region: "{{ openshift_aws_region }}"
-  register: ec2sgs
-
-# Create the scale group config
-- name: Create the node scale group launch config
-  ec2_lc:
-    name: "{{ openshift_aws_launch_config_basename }}-{{ launch_config_item.key }}{{'-' ~ openshift_aws_new_version if openshift_aws_new_version != '' else '' }}"
-    region: "{{ openshift_aws_region }}"
-    image_id: "{{ l_aws_ami_map[launch_config_item.key] | default(openshift_aws_ami) }}"
-    instance_type: "{{ launch_config_item.value.instance_type }}"
-    security_groups: "{{ openshift_aws_launch_config_security_group_id  | default(ec2sgs.security_groups | map(attribute='group_id')| list) }}"
-    instance_profile_name: "{{ launch_config_item.value.iam_role if launch_config_item.value.iam_role is defined and
-                                                                    launch_config_item.value.iam_role != '' and
-                                                                    openshift_aws_create_iam_role
-                                                                 else omit }}"
-    user_data: "{{ lookup('template', 'user_data.j2') }}"
-    key_name: "{{ openshift_aws_ssh_key_name }}"
-    ebs_optimized: False
-    volumes: "{{ launch_config_item.value.volumes }}"
-    assign_public_ip: True

+ 5 - 4
roles/openshift_aws/tasks/provision.yml

@@ -20,13 +20,14 @@
 
 
 - name: include scale group creation for master
 - name: include scale group creation for master
   include_tasks: build_node_group.yml
   include_tasks: build_node_group.yml
+  with_items: "{{ openshift_aws_master_group }}"
   vars:
   vars:
-    l_nodes_to_build: "{{ openshift_aws_master_group_config }}"
-    l_launch_config_security_groups: "{{ openshift_aws_launch_config_security_groups }}"
-    l_aws_ami_map: "{{ openshift_aws_ami_map }}"
+    l_node_group_config: "{{ openshift_aws_master_group_config }}"
+  loop_control:
+    loop_var: openshift_aws_node_group
 
 
 - name: fetch newly created instances
 - name: fetch newly created instances
-  ec2_remote_facts:
+  ec2_instance_facts:
     region: "{{ openshift_aws_region }}"
     region: "{{ openshift_aws_region }}"
     filters:
     filters:
       "tag:clusterid": "{{ openshift_aws_clusterid }}"
       "tag:clusterid": "{{ openshift_aws_clusterid }}"

+ 1 - 1
roles/openshift_aws/tasks/provision_instance.yml

@@ -27,7 +27,7 @@
       Name: "{{ openshift_aws_base_ami_name }}"
       Name: "{{ openshift_aws_base_ami_name }}"
 
 
 - name: fetch newly created instances
 - name: fetch newly created instances
-  ec2_remote_facts:
+  ec2_instance_facts:
     region: "{{ openshift_aws_region }}"
     region: "{{ openshift_aws_region }}"
     filters:
     filters:
       "tag:Name": "{{ openshift_aws_base_ami_name }}"
       "tag:Name": "{{ openshift_aws_base_ami_name }}"

+ 7 - 12
roles/openshift_aws/tasks/provision_nodes.yml

@@ -3,7 +3,7 @@
 # bootstrap should be created on first master
 # bootstrap should be created on first master
 # need to fetch it and shove it into cloud data
 # need to fetch it and shove it into cloud data
 - name: fetch master instances
 - name: fetch master instances
-  ec2_remote_facts:
+  ec2_instance_facts:
     region: "{{ openshift_aws_region }}"
     region: "{{ openshift_aws_region }}"
     filters:
     filters:
       "tag:clusterid": "{{ openshift_aws_clusterid }}"
       "tag:clusterid": "{{ openshift_aws_clusterid }}"
@@ -31,20 +31,15 @@
 
 
 - name: include build compute and infra node groups
 - name: include build compute and infra node groups
   include_tasks: build_node_group.yml
   include_tasks: build_node_group.yml
+  with_items: "{{ openshift_aws_node_groups }}"
   vars:
   vars:
-    l_nodes_to_build: "{{ openshift_aws_node_group_config }}"
-    l_launch_config_security_groups: "{{ openshift_aws_launch_config_security_groups }}"
-    l_aws_ami_map: "{{ openshift_aws_ami_map }}"
-
-- name: include build node group for extra nodes
-  include_tasks: build_node_group.yml
-  when: openshift_aws_node_group_config_extra is defined
-  vars:
-    l_nodes_to_build: "{{ openshift_aws_node_group_config_extra | default({}) }}"
-    l_launch_config_security_groups: "{{ openshift_aws_launch_config_security_groups_extra }}"
-    l_aws_ami_map: "{{ openshift_aws_ami_map_extra }}"
+    l_node_group_config: "{{ openshift_aws_node_group_config }}"
+  loop_control:
+    loop_var: openshift_aws_node_group
 
 
 # instances aren't scaling fast enough here, we need to wait for them
 # instances aren't scaling fast enough here, we need to wait for them
 - when: openshift_aws_wait_for_ssh | bool
 - when: openshift_aws_wait_for_ssh | bool
   name: wait for our new nodes to come up
   name: wait for our new nodes to come up
   include_tasks: wait_for_groups.yml
   include_tasks: wait_for_groups.yml
+  vars:
+    created_asgs: "{{ openshift_aws_created_asgs }}"

+ 6 - 3
roles/openshift_aws/tasks/remove_scale_group.yml

@@ -1,10 +1,13 @@
 ---
 ---
+# FIGURE OUT HOW TO REMOVE SCALE GROUPS
+# use openshift_aws_current_asgs??
 - name: fetch the scale groups
 - name: fetch the scale groups
   ec2_asg_facts:
   ec2_asg_facts:
     region: "{{ openshift_aws_region }}"
     region: "{{ openshift_aws_region }}"
+    name: "^{{ item }}$"
     tags:
     tags:
-      "{{ {'kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid,
-           'version': openshift_aws_current_version} }}"
+      "{{ {'kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid} }}"
+  with_items: "{{ openshift_aws_current_asgs if openshift_aws_current_asgs != [] else openshift_aws_asgs_to_remove }}"
   register: qasg
   register: qasg
 
 
 - name: remove non-master scale groups
 - name: remove non-master scale groups
@@ -14,7 +17,7 @@
     name: "{{ item.auto_scaling_group_name }}"
     name: "{{ item.auto_scaling_group_name }}"
   when: "'master'  not in item.auto_scaling_group_name"
   when: "'master'  not in item.auto_scaling_group_name"
   register: asg_results
   register: asg_results
-  with_items: "{{ qasg.results }}"
+  with_items: "{{ qasg | json_query('results[*]') | sum(attribute='results', start=[]) }}"
   async: 600
   async: 600
   poll: 0
   poll: 0
 
 

+ 1 - 1
roles/openshift_aws/tasks/s3.yml

@@ -1,6 +1,6 @@
 ---
 ---
 - name: Create an s3 bucket
 - name: Create an s3 bucket
-  s3:
+  aws_s3:
     bucket: "{{ openshift_aws_s3_bucket_name }}"
     bucket: "{{ openshift_aws_s3_bucket_name }}"
     mode: "{{ openshift_aws_s3_mode }}"
     mode: "{{ openshift_aws_s3_mode }}"
     region: "{{ openshift_aws_region }}"
     region: "{{ openshift_aws_region }}"

+ 23 - 13
roles/openshift_aws/tasks/scale_group.yml

@@ -1,20 +1,30 @@
 ---
 ---
+- name: set node group name
+  set_fact:
+    l_node_group_name: "{{ openshift_aws_node_group.name }} {{ l_deployment_serial }}"
+
 - name: Create the scale group
 - name: Create the scale group
   ec2_asg:
   ec2_asg:
-    name: "{{ openshift_aws_scale_group_basename }} {{ item.key }}"
-    launch_config_name: "{{ openshift_aws_launch_config_basename }}-{{ item.key }}{{ '-' ~ openshift_aws_new_version if openshift_aws_new_version != '' else '' }}"
-    health_check_period: "{{ item.value.health_check.period }}"
-    health_check_type: "{{ item.value.health_check.type }}"
-    min_size: "{{ item.value.min_size }}"
-    max_size: "{{ item.value.max_size }}"
-    desired_capacity: "{{ item.value.desired_size }}"
+    name: "{{ l_node_group_name }}"
+    launch_config_name: "{{ openshift_aws_node_group.name }}-{{ openshift_aws_ami_map[openshift_aws_node_group.group] | default(openshift_aws_ami) }}-{{ l_epoch_time }}"
+    health_check_period: "{{ l_node_group_config[openshift_aws_node_group.group].health_check.period }}"
+    health_check_type: "{{ l_node_group_config[openshift_aws_node_group.group].health_check.type }}"
+    min_size: "{{ l_node_group_config[openshift_aws_node_group.group].min_size }}"
+    max_size: "{{ l_node_group_config[openshift_aws_node_group.group].max_size }}"
+    desired_capacity: "{{ l_node_group_config[openshift_aws_node_group.group].desired_size }}"
     region: "{{ openshift_aws_region }}"
     region: "{{ openshift_aws_region }}"
-    termination_policies: "{{ item.value.termination_policy if 'termination_policy' in  item.value else omit }}"
-    load_balancers: "{{ item.value.elbs if 'elbs' in item.value else omit }}"
-    wait_for_instances: "{{ item.value.wait_for_instances | default(False)}}"
+    termination_policies: "{{ l_node_group_config[openshift_aws_node_group.group].termination_policy if 'termination_policy' in  l_node_group_config[openshift_aws_node_group.group] else omit }}"
+    load_balancers: "{{ l_node_group_config[openshift_aws_node_group.group].elbs if 'elbs' in l_node_group_config[openshift_aws_node_group.group] else omit }}"
+    wait_for_instances: "{{ l_node_group_config[openshift_aws_node_group.group].wait_for_instances | default(False)}}"
     vpc_zone_identifier: "{{ subnetout.subnets[0].id }}"
     vpc_zone_identifier: "{{ subnetout.subnets[0].id }}"
     replace_instances: "{{ openshift_aws_node_group_replace_instances if openshift_aws_node_group_replace_instances != [] else omit }}"
     replace_instances: "{{ openshift_aws_node_group_replace_instances if openshift_aws_node_group_replace_instances != [] else omit }}"
-    replace_all_instances: "{{ omit if openshift_aws_node_group_replace_instances != [] else (item.value.replace_all_instances | default(omit)) }}"
+    replace_all_instances: "{{ omit if openshift_aws_node_group_replace_instances != []
+                                    else (l_node_group_config[openshift_aws_node_group.group].replace_all_instances | default(omit)) }}"
     tags:
     tags:
-    - "{{ openshift_aws_node_group_config_tags | combine(item.value.tags) }}"
-  with_dict: "{{ l_nodes_to_build }}"
+    - "{{ openshift_aws_node_group_config_tags
+          | combine(l_node_group_config[openshift_aws_node_group.group].tags)
+          | combine({'deployment_serial': l_deployment_serial, 'ami': openshift_aws_ami_map[openshift_aws_node_group.group] | default(openshift_aws_ami)}) }}"
+
+- name: append the asg name to the openshift_aws_created_asgs fact
+  set_fact:
+    openshift_aws_created_asgs: "{{ [l_node_group_name] | union(openshift_aws_created_asgs) | list }}"

+ 3 - 3
roles/openshift_aws/tasks/seal_ami.yml

@@ -1,6 +1,6 @@
 ---
 ---
 - name: fetch newly created instances
 - name: fetch newly created instances
-  ec2_remote_facts:
+  ec2_instance_facts:
     region: "{{ openshift_aws_region }}"
     region: "{{ openshift_aws_region }}"
     filters:
     filters:
       "tag:Name": "{{ openshift_aws_base_ami_name }}"
       "tag:Name": "{{ openshift_aws_base_ami_name }}"
@@ -12,7 +12,7 @@
 
 
 - name: bundle ami
 - name: bundle ami
   ec2_ami:
   ec2_ami:
-    instance_id: "{{ instancesout.instances.0.id }}"
+    instance_id: "{{ instancesout.instances.0.instance_id }}"
     region: "{{ openshift_aws_region }}"
     region: "{{ openshift_aws_region }}"
     state: present
     state: present
     description: "This was provisioned {{ ansible_date_time.iso8601 }}"
     description: "This was provisioned {{ ansible_date_time.iso8601 }}"
@@ -46,4 +46,4 @@
   ec2:
   ec2:
     state: absent
     state: absent
     region: "{{ openshift_aws_region }}"
     region: "{{ openshift_aws_region }}"
-    instance_ids: "{{ instancesout.instances.0.id }}"
+    instance_ids: "{{ instancesout.instances.0.instance_id }}"

+ 23 - 7
roles/openshift_aws/tasks/security_group.yml

@@ -6,11 +6,27 @@
       "tag:Name": "{{ openshift_aws_clusterid }}"
       "tag:Name": "{{ openshift_aws_clusterid }}"
   register: vpcout
   register: vpcout
 
 
-- include_tasks: security_group_create.yml
-  vars:
-    l_security_groups: "{{ openshift_aws_node_security_groups }}"
+- name: create the node group sgs
+  oo_ec2_group:
+    name: "{{ item.value.name}}"
+    description: "{{ item.value.desc }}"
+    rules: "{{ item.value.rules if 'rules' in item.value else [] }}"
+    region: "{{ openshift_aws_region }}"
+    vpc_id: "{{ vpcout.vpcs[0].id }}"
+  with_dict: "{{ openshift_aws_node_security_groups }}"
+
+- name: create the k8s sgs for the node group
+  oo_ec2_group:
+    name: "{{ item.value.name }}_k8s"
+    description: "{{ item.value.desc }} for k8s"
+    region: "{{ openshift_aws_region }}"
+    vpc_id: "{{ vpcout.vpcs[0].id }}"
+  with_dict: "{{ openshift_aws_node_security_groups }}"
+  register: k8s_sg_create
 
 
-- include_tasks: security_group_create.yml
-  when: openshift_aws_node_security_groups_extra is defined
-  vars:
-    l_security_groups: "{{ openshift_aws_node_security_groups_extra | default({}) }}"
+- name: tag sg groups with proper tags
+  ec2_tag:
+    tags: "{{ openshift_aws_security_groups_tags }}"
+    resource: "{{ item.group_id }}"
+    region: "{{ openshift_aws_region }}"
+  with_items: "{{ k8s_sg_create.results }}"

+ 0 - 25
roles/openshift_aws/tasks/security_group_create.yml

@@ -1,25 +0,0 @@
----
-- name: create the node group sgs
-  oo_ec2_group:
-    name: "{{ item.value.name}}"
-    description: "{{ item.value.desc }}"
-    rules: "{{ item.value.rules if 'rules' in item.value else [] }}"
-    region: "{{ openshift_aws_region }}"
-    vpc_id: "{{ vpcout.vpcs[0].id }}"
-  with_dict: "{{ l_security_groups }}"
-
-- name: create the k8s sgs for the node group
-  oo_ec2_group:
-    name: "{{ item.value.name }}_k8s"
-    description: "{{ item.value.desc }} for k8s"
-    region: "{{ openshift_aws_region }}"
-    vpc_id: "{{ vpcout.vpcs[0].id }}"
-  with_dict: "{{ l_security_groups }}"
-  register: k8s_sg_create
-
-- name: tag sg groups with proper tags
-  ec2_tag:
-    tags: "{{ openshift_aws_security_groups_tags }}"
-    resource: "{{ item.group_id }}"
-    region: "{{ openshift_aws_region }}"
-  with_items: "{{ k8s_sg_create.results }}"

+ 4 - 2
roles/openshift_aws/tasks/setup_master_group.yml

@@ -8,7 +8,7 @@
     msg: "openshift_aws_region={{ openshift_aws_region }}"
     msg: "openshift_aws_region={{ openshift_aws_region }}"
 
 
 - name: fetch newly created instances
 - name: fetch newly created instances
-  ec2_remote_facts:
+  ec2_instance_facts:
     region: "{{ openshift_aws_region }}"
     region: "{{ openshift_aws_region }}"
     filters:
     filters:
       "tag:clusterid": "{{ openshift_aws_clusterid }}"
       "tag:clusterid": "{{ openshift_aws_clusterid }}"
@@ -19,11 +19,13 @@
   delay: 3
   delay: 3
   until: instancesout.instances|length > 0
   until: instancesout.instances|length > 0
 
 
+- debug: var=instancesout
+
 - name: add new master to masters group
 - name: add new master to masters group
   add_host:
   add_host:
     groups: "{{ openshift_aws_masters_groups }}"
     groups: "{{ openshift_aws_masters_groups }}"
     name: "{{ item.public_dns_name }}"
     name: "{{ item.public_dns_name }}"
-    hostname: "{{ openshift_aws_clusterid }}-master-{{ item.id[:-5] }}"
+    hostname: "{{ openshift_aws_clusterid }}-master-{{ item.instance_id[:-5] }}"
   with_items: "{{ instancesout.instances }}"
   with_items: "{{ instancesout.instances }}"
 
 
 - name: wait for ssh to become available
 - name: wait for ssh to become available

+ 17 - 5
roles/openshift_aws/tasks/setup_scale_group_facts.yml

@@ -1,11 +1,15 @@
 ---
 ---
-- name: group scale group nodes
-  ec2_remote_facts:
+- name: fetch all created instances
+  ec2_instance_facts:
     region: "{{ openshift_aws_region }}"
     region: "{{ openshift_aws_region }}"
     filters:
     filters:
-      "{{ {'tag:kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid }}}"
+      "{{ {'tag:kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid,
+           'instance-state-name': 'running'} }}"
   register: qinstances
   register: qinstances
 
 
+# The building of new and current groups is dependent of having a list of the current asgs and the created ones
+# that can be found in the variables: openshift_aws_created_asgs, openshift_aws_current_asgs.  If these do not exist, we cannot determine which hosts are
+# new and which hosts are current.
 - name: Build new node group
 - name: Build new node group
   add_host:
   add_host:
     groups: oo_sg_new_nodes
     groups: oo_sg_new_nodes
@@ -13,10 +17,16 @@
     name: "{{ item.public_dns_name }}"
     name: "{{ item.public_dns_name }}"
     hostname: "{{ item.public_dns_name }}"
     hostname: "{{ item.public_dns_name }}"
   when:
   when:
-  - (item.tags.version | default(False)) == openshift_aws_new_version
+  - openshift_aws_created_asgs != []
+  - "'aws:autoscaling:groupName' in item.tags"
+  - item.tags['aws:autoscaling:groupName'] in openshift_aws_created_asgs
   - "'node' in item.tags['host-type']"
   - "'node' in item.tags['host-type']"
   with_items: "{{ qinstances.instances }}"
   with_items: "{{ qinstances.instances }}"
 
 
+- name: dump openshift_aws_current_asgs
+  debug:
+    msg: "{{ openshift_aws_current_asgs }}"
+
 - name: Build current node group
 - name: Build current node group
   add_host:
   add_host:
     groups: oo_sg_current_nodes
     groups: oo_sg_current_nodes
@@ -24,7 +34,9 @@
     name: "{{ item.public_dns_name }}"
     name: "{{ item.public_dns_name }}"
     hostname: "{{ item.public_dns_name }}"
     hostname: "{{ item.public_dns_name }}"
   when:
   when:
-  - (item.tags.version | default('')) == openshift_aws_current_version
+  - openshift_aws_current_asgs != []
+  - "'aws:autoscaling:groupName' in item.tags"
+  - item.tags['aws:autoscaling:groupName'] in openshift_aws_current_asgs
   - "'node' in item.tags['host-type']"
   - "'node' in item.tags['host-type']"
   with_items: "{{ qinstances.instances }}"
   with_items: "{{ qinstances.instances }}"
 
 

+ 14 - 4
roles/openshift_aws/tasks/upgrade_node_group.yml

@@ -1,12 +1,22 @@
 ---
 ---
-- fail:
-    msg: 'Please ensure the current_version and new_version variables are not the same.'
+- include_tasks: provision_nodes.yml
+  vars:
+    openshift_aws_node_group_upgrade: True
   when:
   when:
-  - openshift_aws_current_version == openshift_aws_new_version
+  - openshift_aws_upgrade_provision_nodes | default(True)
 
 
-- include_tasks: provision_nodes.yml
+- debug: var=openshift_aws_current_asgs
+- debug: var=openshift_aws_created_asgs
+
+- name: fail if asg variables aren't set
+  fail:
+    msg: "Please ensure that openshift_aws_created_asgs and openshift_aws_current_asgs are defined."
+  when:
+  - openshift_aws_created_asgs == []
+  - openshift_aws_current_asgs == []
 
 
 - include_tasks: accept_nodes.yml
 - include_tasks: accept_nodes.yml
+  when: openshift_aws_upgrade_accept_nodes | default(True)
 
 
 - include_tasks: setup_scale_group_facts.yml
 - include_tasks: setup_scale_group_facts.yml
 
 

+ 16 - 6
roles/openshift_aws/tasks/wait_for_groups.yml

@@ -1,31 +1,41 @@
 ---
 ---
 # The idea here is to wait until all scale groups are at
 # The idea here is to wait until all scale groups are at
 # their desired capacity before continuing.
 # their desired capacity before continuing.
-- name: fetch the scale groups
+#  This is accomplished with a custom filter_plugin and until clause
+- name: "fetch the scale groups"
   ec2_asg_facts:
   ec2_asg_facts:
     region: "{{ openshift_aws_region }}"
     region: "{{ openshift_aws_region }}"
     tags:
     tags:
-      "{{ {'kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid} }}"
+      "{{ {'kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid } }}"
   register: qasg
   register: qasg
-  until: qasg.results | scale_groups_match_capacity | bool
+  until: qasg | json_query('results[*]') | scale_groups_match_capacity | bool
   delay: 10
   delay: 10
   retries: 60
   retries: 60
 
 
+- debug: var=openshift_aws_created_asgs
+
+# how do we gaurantee the instances are up?
 - name: fetch newly created instances
 - name: fetch newly created instances
-  ec2_remote_facts:
+  ec2_instance_facts:
     region: "{{ openshift_aws_region }}"
     region: "{{ openshift_aws_region }}"
     filters:
     filters:
       "{{ {'tag:kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid,
       "{{ {'tag:kubernetes.io/cluster/' ~ openshift_aws_clusterid: openshift_aws_clusterid,
-           'tag:version': openshift_aws_new_version} }}"
+           'tag:aws:autoscaling:groupName': item,
+           'instance-state-name': 'running'} }}"
+  with_items: "{{ openshift_aws_created_asgs if openshift_aws_created_asgs != [] else qasg | sum(attribute='results', start=[]) }}"
   register: instancesout
   register: instancesout
   until: instancesout.instances|length > 0
   until: instancesout.instances|length > 0
   delay: 5
   delay: 5
   retries: 60
   retries: 60
 
 
+- name: dump instances
+  debug:
+    msg: "{{ instancesout.results | sum(attribute='instances', start=[]) }}"
+
 - name: wait for ssh to become available
 - name: wait for ssh to become available
   wait_for:
   wait_for:
     port: 22
     port: 22
     host: "{{ item.public_ip_address }}"
     host: "{{ item.public_ip_address }}"
     timeout: 300
     timeout: 300
     search_regex: OpenSSH
     search_regex: OpenSSH
-  with_items: "{{ instancesout.instances }}"
+  with_items: "{{ instancesout.results | sum(attribute='instances', start=[]) }}"

+ 3 - 3
roles/openshift_aws/templates/user_data.j2

@@ -7,8 +7,8 @@ write_files:
   owner: 'root:root'
   owner: 'root:root'
   permissions: '0640'
   permissions: '0640'
   content: |
   content: |
-    openshift_group_type: {{ launch_config_item.key }}
-{%   if launch_config_item.key != 'master' %}
+    openshift_group_type: {{ openshift_aws_node_group.group }}
+{%   if openshift_aws_node_group.group != 'master' %}
 - path: /etc/origin/node/bootstrap.kubeconfig
 - path: /etc/origin/node/bootstrap.kubeconfig
   owner: 'root:root'
   owner: 'root:root'
   permissions: '0640'
   permissions: '0640'
@@ -19,7 +19,7 @@ runcmd:
 {%     if openshift_aws_node_run_bootstrap_startup %}
 {%     if openshift_aws_node_run_bootstrap_startup %}
 - [ ansible-playbook, /root/openshift_bootstrap/bootstrap.yml]
 - [ ansible-playbook, /root/openshift_bootstrap/bootstrap.yml]
 {%     endif %}
 {%     endif %}
-{%     if launch_config_item.key != 'master' %}
+{%     if openshift_aws_node_group.group != 'master' %}
 - [ systemctl, restart, NetworkManager]
 - [ systemctl, restart, NetworkManager]
 - [ systemctl, enable, {% if openshift_deployment_type == 'openshift-enterprise' %}atomic-openshift{% else %}origin{% endif %}-node]
 - [ systemctl, enable, {% if openshift_deployment_type == 'openshift-enterprise' %}atomic-openshift{% else %}origin{% endif %}-node]
 - [ systemctl, start, {% if openshift_deployment_type == 'openshift-enterprise' %}atomic-openshift{% else %}origin{% endif %}-node]
 - [ systemctl, start, {% if openshift_deployment_type == 'openshift-enterprise' %}atomic-openshift{% else %}origin{% endif %}-node]