shixiong
/
okd


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
							"""
Module for performing checks on a Kibana logging deployment
"""

import json
import ssl

try:
    from urllib2 import HTTPError, URLError
    import urllib2
except ImportError:
    from urllib.error import HTTPError, URLError
    import urllib.request as urllib2

from openshift_checks import get_var
from openshift_checks.logging.logging import LoggingCheck


class Kibana(LoggingCheck):
    """Module that checks an integrated logging Kibana deployment"""

    name = "kibana"
    tags = ["health", "logging"]

    logging_namespace = None

    def run(self, tmp, task_vars):
        """Check various things and gather errors. Returns: result as hash"""

        self.logging_namespace = get_var(task_vars, "openshift_logging_namespace", default="logging")
        kibana_pods, error = super(Kibana, self).get_pods_for_component(
            self.execute_module,
            self.logging_namespace,
            "kibana",
            task_vars,
        )
        if error:
            return {"failed": True, "changed": False, "msg": error}
        check_error = self.check_kibana(kibana_pods)

        if not check_error:
            check_error = self._check_kibana_route(task_vars)

        if check_error:
            msg = ("The following Kibana deployment issue was found:"
                   "\n-------\n"
                   "{}".format(check_error))
            return {"failed": True, "changed": False, "msg": msg}

        # TODO(lmeyer): run it all again for the ops cluster
        return {"failed": False, "changed": False, "msg": 'No problems found with Kibana deployment.'}

    def _verify_url_internal(self, url, task_vars):
        """
        Try to reach a URL from the host.
        Returns: success (bool), reason (for failure)
        """
        args = dict(
            url=url,
            follow_redirects='none',
            validate_certs='no',  # likely to be signed with internal CA
            # TODO(lmeyer): give users option to validate certs
            status_code=302,
        )
        result = self.execute_module('uri', args, task_vars)
        if result.get('failed'):
            return result['msg']
        return None

    @staticmethod
    def _verify_url_external(url):
        """
        Try to reach a URL from ansible control host.
        Returns: success (bool), reason (for failure)
        """
        # This actually checks from the ansible control host, which may or may not
        # really be "external" to the cluster.

        # Disable SSL cert validation to work around internally signed certs
        ctx = ssl.create_default_context()
        ctx.check_hostname = False  # or setting CERT_NONE is refused
        ctx.verify_mode = ssl.CERT_NONE

        # Verify that the url is returning a valid response
        try:
            # We only care if the url connects and responds
            return_code = urllib2.urlopen(url, context=ctx).getcode()
        except HTTPError as httperr:
            return httperr.reason
        except URLError as urlerr:
            return str(urlerr)

        # there appears to be no way to prevent urlopen from following redirects
        if return_code != 200:
            return 'Expected success (200) but got return code {}'.format(int(return_code))

        return None

    def check_kibana(self, pods):
        """Check to see if Kibana is up and working. Returns: error string."""

        if not pods:
            return "There are no Kibana pods deployed, so no access to the logging UI."

        not_running = self.not_running_pods(pods)
        if len(not_running) == len(pods):
            return "No Kibana pod is in a running state, so there is no access to the logging UI."
        elif not_running:
            return (
                "The following Kibana pods are not currently in a running state:\n"
                "{pods}"
                "However at least one is, so service may not be impacted."
            ).format(pods="".join("  " + pod['metadata']['name'] + "\n" for pod in not_running))

        return None

    def _get_kibana_url(self, task_vars):
        """
        Get kibana route or report error.
        Returns: url (or empty), reason for failure
        """

        # Get logging url
        get_route = self._exec_oc("get route logging-kibana -o json", [], task_vars)
        if not get_route:
            return None, 'no_route_exists'

        route = json.loads(get_route)

        # check that the route has been accepted by a router
        ingress = route["status"]["ingress"]
        # ingress can be null if there is no router, or empty if not routed
        if not ingress or not ingress[0]:
            return None, 'route_not_accepted'

        host = route.get("spec", {}).get("host")
        if not host:
            return None, 'route_missing_host'

        return 'https://{}/'.format(host), None

    def _check_kibana_route(self, task_vars):
        """
        Check to see if kibana route is up and working.
        Returns: error string
        """
        known_errors = dict(
            no_route_exists=(
                'No route is defined for Kibana in the logging namespace,\n'
                'so the logging stack is not accessible. Is logging deployed?\n'
                'Did something remove the logging-kibana route?'
            ),
            route_not_accepted=(
                'The logging-kibana route is not being routed by any router.\n'
                'Is the router deployed and working?'
            ),
            route_missing_host=(
                'The logging-kibana route has no hostname defined,\n'
                'which should never happen. Did something alter its definition?'
            ),
        )

        kibana_url, error = self._get_kibana_url(task_vars)
        if not kibana_url:
            return known_errors.get(error, error)

        # first, check that kibana is reachable from the master.
        error = self._verify_url_internal(kibana_url, task_vars)
        if error:
            if 'urlopen error [Errno 111] Connection refused' in error:
                error = (
                    'Failed to connect from this master to Kibana URL {url}\n'
                    'Is kibana running, and is at least one router routing to it?'
                ).format(url=kibana_url)
            elif 'urlopen error [Errno -2] Name or service not known' in error:
                error = (
                    'Failed to connect from this master to Kibana URL {url}\n'
                    'because the hostname does not resolve.\n'
                    'Is DNS configured for the Kibana hostname?'
                ).format(url=kibana_url)
            elif 'Status code was not' in error:
                error = (
                    'A request from this master to the Kibana URL {url}\n'
                    'did not return the correct status code (302).\n'
                    'This could mean that Kibana is malfunctioning, the hostname is\n'
                    'resolving incorrectly, or other network issues. The output was:\n'
                    '  {error}'
                ).format(url=kibana_url, error=error)
            return 'Error validating the logging Kibana route:\n' + error

        # in production we would like the kibana route to work from outside the
        # cluster too; but that may not be the case, so allow disabling just this part.
        if not get_var(task_vars, "openshift_check_efk_kibana_external", default=True):
            return None
        error = self._verify_url_external(kibana_url)
        if error:
            if 'urlopen error [Errno 111] Connection refused' in error:
                error = (
                    'Failed to connect from the Ansible control host to Kibana URL {url}\n'
                    'Is the router for the Kibana hostname exposed externally?'
                ).format(url=kibana_url)
            elif 'urlopen error [Errno -2] Name or service not known' in error:
                error = (
                    'Failed to resolve the Kibana hostname in {url}\n'
                    'from the Ansible control host.\n'
                    'Is DNS configured to resolve this Kibana hostname externally?'
                ).format(url=kibana_url)
            elif 'Expected success (200)' in error:
                error = (
                    'A request to Kibana at {url}\n'
                    'returned the wrong error code:\n'
                    '  {error}\n'
                    'This could mean that Kibana is malfunctioning, the hostname is\n'
                    'resolving incorrectly, or other network issues.'
                ).format(url=kibana_url, error=error)
            error = (
                'Error validating the logging Kibana route:\n{error}\n'
                'To disable external Kibana route validation, set in your inventory:\n'
                '  openshift_check_efk_kibana_external=False'
            ).format(error=error)
            return error
        return None

    def _exec_oc(self, cmd_str, extra_args, task_vars):
        return super(Kibana, self).exec_oc(self.execute_module,
                                           self.logging_namespace,
                                           cmd_str,
                                           extra_args,
                                           task_vars)