Browse Source

Merge pull request #1078 from joelddiaz/master

sync master -> prod
Joel Diaz 9 years ago
parent
commit
c607f1ba93

+ 47 - 21
bin/ohi

@@ -1,14 +1,16 @@
 #!/usr/bin/env python
+'''
+Ohi = Openshift Host Inventory
+
+This script provides an easy way to look at your host inventory.
+
+This depends on multi_inventory being setup correctly.
+'''
 # vim: expandtab:tabstop=4:shiftwidth=4
 
 import argparse
-import traceback
 import sys
 import os
-import re
-import tempfile
-import time
-import subprocess
 import ConfigParser
 
 from openshift_ansible import awsutil
@@ -20,6 +22,9 @@ CONFIG_HOST_TYPE_ALIAS_SECTION = 'host_type_aliases'
 
 
 class Ohi(object):
+    '''
+        Class for managing openshift host inventory
+    '''
     def __init__(self):
         self.host_type_aliases = {}
         self.file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)))
@@ -35,6 +40,10 @@ class Ohi(object):
         self.aws = awsutil.AwsUtil(self.host_type_aliases)
 
     def run(self):
+        '''
+            Call into awsutil and retrieve the desired hosts and environments
+        '''
+
         if self.args.list_host_types:
             self.aws.print_host_types()
             return 0
@@ -43,18 +52,24 @@ class Ohi(object):
         if self.args.host_type is not None and \
            self.args.env is not None:
             # Both env and host-type specified
-            hosts = self.aws.get_host_list(host_type=self.args.host_type, \
-                                           envs=self.args.env)
+            hosts = self.aws.get_host_list(host_type=self.args.host_type,
+                                           envs=self.args.env,
+                                           version=self.args.openshift_version,
+                                           cached=self.args.cache_only)
 
         if self.args.host_type is None and \
            self.args.env is not None:
             # Only env specified
-            hosts = self.aws.get_host_list(envs=self.args.env)
+            hosts = self.aws.get_host_list(envs=self.args.env,
+                                           version=self.args.openshift_version,
+                                           cached=self.args.cache_only)
 
         if self.args.host_type is not None and \
            self.args.env is None:
             # Only host-type specified
-            hosts = self.aws.get_host_list(host_type=self.args.host_type)
+            hosts = self.aws.get_host_list(host_type=self.args.host_type,
+                                           version=self.args.openshift_version,
+                                           cached=self.args.cache_only)
 
         if hosts is None:
             # We weren't able to determine what they wanted to do
@@ -69,6 +84,9 @@ class Ohi(object):
         return 0
 
     def parse_config_file(self):
+        '''
+            Parse the config file for ohi
+        '''
         if os.path.isfile(self.config_path):
             config = ConfigParser.ConfigParser()
             config.read(self.config_path)
@@ -85,23 +103,27 @@ class Ohi(object):
 
         parser = argparse.ArgumentParser(description='OpenShift Host Inventory')
 
-        parser.add_argument('--list-host-types', default=False, action='store_true',
-                       help='List all of the host types')
+        parser.add_argument('--list-host-types', default=False, action='store_true', help='List all of the host types')
 
-        parser.add_argument('-e', '--env', action="store",
-                       help="Which environment to use")
+        parser.add_argument('-e', '--env', action="store", help="Which environment to use")
 
-        parser.add_argument('-t', '--host-type', action="store",
-                       help="Which host type to use")
+        parser.add_argument('-t', '--host-type', action="store", help="Which host type to use")
 
-        parser.add_argument('-l', '--user', action='store', default=None,
-                               help='username')
+        parser.add_argument('-l', '--user', action='store', default=None, help='username')
 
+        parser.add_argument('-c', '--cache-only', action='store_true', default=False,
+                            help='Retrieve the host inventory by cache only. Default is false.')
 
-        self.args = parser.parse_args()
+        parser.add_argument('-o', '--openshift-version', action='store', default='2',
+                            help='Specify the openshift version. Default is 2')
 
 
-if __name__ == '__main__':
+        self.args = parser.parse_args()
+
+def main():
+    '''
+    Ohi will do its work here
+    '''
     if len(sys.argv) == 1:
         print "\nError: No options given. Use --help to see the available options\n"
         sys.exit(0)
@@ -110,5 +132,9 @@ if __name__ == '__main__':
         ohi = Ohi()
         exitcode = ohi.run()
         sys.exit(exitcode)
-    except ArgumentError as e:
-        print "\nError: %s\n" % e.message
+    except ArgumentError as err:
+        print "\nError: %s\n" % err.message
+
+if __name__ == '__main__':
+    main()
+

+ 19 - 19
bin/openshift_ansible/awsutil.py

@@ -46,14 +46,17 @@ class AwsUtil(object):
                 self.alias_lookup[value] = key
 
     @staticmethod
-    def get_inventory(args=None):
+    def get_inventory(args=None, cached=False):
         """Calls the inventory script and returns a dictionary containing the inventory."
 
         Keyword arguments:
         args -- optional arguments to pass to the inventory script
         """
         minv = multi_inventory.MultiInventory(args)
-        minv.run()
+        if cached:
+            minv.get_inventory_from_cache()
+        else:
+            minv.run()
         return minv.result
 
     def get_environments(self):
@@ -168,11 +171,12 @@ class AwsUtil(object):
         host_type = self.resolve_host_type(host_type)
         return "tag_env-host-type_%s-%s" % (env, host_type)
 
-    def get_host_list(self, host_type=None, envs=None):
+    def get_host_list(self, host_type=None, envs=None, version=None, cached=False):
         """Get the list of hosts from the inventory using host-type and environment
         """
+        retval = set([])
         envs = envs or []
-        inv = self.get_inventory()
+        inv = self.get_inventory(cached=cached)
 
         # We prefer to deal with a list of environments
         if issubclass(type(envs), basestring):
@@ -183,29 +187,25 @@ class AwsUtil(object):
 
         if host_type and envs:
             # Both host type and environment were specified
-            retval = []
             for env in envs:
-                env_host_type_tag = self.gen_env_host_type_tag(host_type, env)
-                if env_host_type_tag in inv.keys():
-                    retval += inv[env_host_type_tag]
-            return set(retval)
+                retval.update(inv.get('tag_environment_%s' % env, []))
+            retval.intersection_update(inv.get(self.gen_host_type_tag(host_type), []))
 
-        if envs and not host_type:
+        elif envs and not host_type:
             # Just environment was specified
-            retval = []
             for env in envs:
                 env_tag = AwsUtil.gen_env_tag(env)
                 if env_tag in inv.keys():
-                    retval += inv[env_tag]
-            return set(retval)
+                    retval.update(inv.get(env_tag, []))
 
-        if host_type and not envs:
+        elif host_type and not envs:
             # Just host-type was specified
-            retval = []
             host_type_tag = self.gen_host_type_tag(host_type)
             if host_type_tag in inv.keys():
-                retval = inv[host_type_tag]
-            return set(retval)
+                retval.update(inv.get(host_type_tag, []))
+
+        # If version is specified then return only hosts in that version
+        if version:
+            retval.intersection_update(inv.get('oo_version_%s' % version, []))
 
-        # We should never reach here!
-        raise ArgumentError("Invalid combination of parameters")
+        return retval

+ 9 - 9
inventory/multi_inventory.py

@@ -56,15 +56,6 @@ class MultiInventory(object):
         else:
             self.config_file = None # expect env vars
 
-
-    def run(self):
-        '''This method checks to see if the local
-           cache is valid for the inventory.
-
-           if the cache is valid; return cache
-           else the credentials are loaded from multi_inventory.yaml or from the env
-           and we attempt to get the inventory from the provider specified.
-        '''
         # load yaml
         if self.config_file and os.path.isfile(self.config_file):
             self.config = self.load_yaml_config()
@@ -91,6 +82,15 @@ class MultiInventory(object):
         if self.config.has_key('cache_location'):
             self.cache_path = self.config['cache_location']
 
+    def run(self):
+        '''This method checks to see if the local
+           cache is valid for the inventory.
+
+           if the cache is valid; return cache
+           else the credentials are loaded from multi_inventory.yaml or from the env
+           and we attempt to get the inventory from the provider specified.
+        '''
+
         if self.args.get('refresh_cache', None):
             self.get_inventory()
             self.write_to_cache()

+ 3 - 3
roles/openshift_master/tasks/main.yml

@@ -228,7 +228,7 @@
   register: start_result
 
 - set_fact:
-    master_service_status_changed = start_result | changed
+    master_service_status_changed: start_result | changed
   when: not openshift_master_ha | bool
 
 - name: Start and enable master api
@@ -237,7 +237,7 @@
   register: start_result
 
 - set_fact:
-    master_api_service_status_changed = start_result | changed
+    master_api_service_status_changed: start_result | changed
   when: openshift_master_ha | bool and openshift.master.cluster_method == 'native'
 
 - name: Start and enable master controller
@@ -246,7 +246,7 @@
   register: start_result
 
 - set_fact:
-    master_controllers_service_status_changed = start_result | changed
+    master_controllers_service_status_changed: start_result | changed
   when: openshift_master_ha | bool and openshift.master.cluster_method == 'native'
 
 - name: Install cluster packages

+ 3 - 3
roles/openshift_node/tasks/main.yml

@@ -85,11 +85,11 @@
     docker_additional_registries: "{{ lookup('oo_option', 'docker_additional_registries')
                                       | oo_split() | union(['registry.access.redhat.com'])
                                       | difference(['']) }}"
-  when: openshift.common.deployment_type == 'enterprise'
+  when: openshift.common.deployment_type in ['enterprise', 'openshift-enterprise', 'atomic-enterprise']
 - set_fact:
     docker_additional_registries: "{{ lookup('oo_option', 'docker_additional_registries')
                                       | oo_split() | difference(['']) }}"
-  when: openshift.common.deployment_type != 'enterprise'
+  when: openshift.common.deployment_type not in ['enterprise', 'openshift-enterprise', 'atomic-enterprise']
 
 - name: Add personal registries
   lineinfile:
@@ -131,4 +131,4 @@
   register: start_result
 
 - set_fact:
-    node_service_status_changed = start_result | changed
+    node_service_status_changed: start_result | changed

+ 10 - 0
roles/os_zabbix/vars/template_docker.yml

@@ -12,6 +12,11 @@ g_template_docker:
     - Docker Daemon
     value_type: int
 
+  - key: docker.container.dns.resolution
+    applications:
+    - Docker Daemon
+    value_type: int
+
   - key: docker.storage.is_loopback
     applications:
     - Docker Storage
@@ -62,6 +67,11 @@ g_template_docker:
     url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_ping.asciidoc'
     priority: high
 
+  - name: 'docker.container.dns.resolution failed on {HOST.NAME}'
+    expression: '{Template Docker:docker.container.dns.resolution.max(#3)}>0'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_dns.asciidoc'
+    priority: high
+
   - name: 'Docker storage is using LOOPBACK on {HOST.NAME}'
     expression: '{Template Docker:docker.storage.is_loopback.last()}<>0'
     url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_loopback.asciidoc'

+ 8 - 0
roles/os_zabbix/vars/template_openshift_master.yml

@@ -269,6 +269,14 @@ g_template_openshift_master:
     - 'Openshift Master process not running on {HOST.NAME}'
     priority: avg
 
+  - name: 'Application creation has failed multiple times in the last hour on {HOST.NAME}'
+    expression: '{Template Openshift Master:create_app.sum(1h)}>3'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'
+    dependencies:
+    - 'Openshift Master process not running on {HOST.NAME}'
+    description: The application create loop has failed 4 or more times in the last hour
+    priority: avg
+
   - name: 'Openshift Master API health check is failing on {HOST.NAME}'
     expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1'
     url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'

+ 13 - 3
roles/os_zabbix/vars/template_zagg_server.yml

@@ -7,7 +7,12 @@ g_template_zagg_server:
     - Zagg Server
     value_type: int
 
-  - key: zagg.server.processor.errors
+  - key: zagg.server.metrics.errors
+    applications:
+    - Zagg Server
+    value_type: int
+
+  - key: zagg.server.heartbeat.errors
     applications:
     - Zagg Server
     value_type: int
@@ -18,8 +23,13 @@ g_template_zagg_server:
     value_type: int
 
   ztriggers:
-  - name: 'Error sending metrics on {HOST.NAME}'
-    expression: '{Template Zagg Server:zagg.server.processor.errors.min(#3)}>0'
+  - name: 'Error processing metrics on {HOST.NAME}'
+    expression: '{Template Zagg Server:zagg.server.metrics.errors.min(#3)}>0'
+    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc'
+    priority: average
+
+  - name: 'Error processing heartbeats on {HOST.NAME}'
+    expression: '{Template Zagg Server:zagg.server.heartbeat.errors.min(#3)}>0'
     url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc'
     priority: average
 

+ 1 - 1
roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2

@@ -42,7 +42,7 @@ ExecStart=/usr/bin/docker run --name {{ osohm_zagg_client }}
            -v /etc/localtime:/etc/localtime                                                  \
            -v /run/pcp:/run/pcp                                                              \
            -v /var/run/docker.sock:/var/run/docker.sock                                      \
-           -v /var/run/openvswitch/db.sock:/var/run/openvswitch/db.sock                      \
+           -v /var/run/openvswitch:/var/run/openvswitch                      \
 {% if hostvars[inventory_hostname]['ec2_tag_host-type'] == 'master' %}
            -v /etc/openshift/master/admin.kubeconfig:/etc/openshift/master/admin.kubeconfig  \
            -v /etc/openshift/master/master.etcd-client.crt:/etc/openshift/master/master.etcd-client.crt \