kibana.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. """
  2. Module for performing checks on a Kibana logging deployment
  3. """
  4. import json
  5. import ssl
  6. try:
  7. from urllib2 import HTTPError, URLError
  8. import urllib2
  9. except ImportError:
  10. from urllib.error import HTTPError, URLError
  11. import urllib.request as urllib2
  12. from openshift_checks import get_var
  13. from openshift_checks.logging.logging import LoggingCheck
  14. class Kibana(LoggingCheck):
  15. """Module that checks an integrated logging Kibana deployment"""
  16. name = "kibana"
  17. tags = ["health", "logging"]
  18. logging_namespace = None
  19. def run(self, tmp, task_vars):
  20. """Check various things and gather errors. Returns: result as hash"""
  21. self.logging_namespace = get_var(task_vars, "openshift_logging_namespace", default="logging")
  22. kibana_pods, error = super(Kibana, self).get_pods_for_component(
  23. self.execute_module,
  24. self.logging_namespace,
  25. "kibana",
  26. task_vars,
  27. )
  28. if error:
  29. return {"failed": True, "changed": False, "msg": error}
  30. check_error = self.check_kibana(kibana_pods)
  31. if not check_error:
  32. check_error = self._check_kibana_route(task_vars)
  33. if check_error:
  34. msg = ("The following Kibana deployment issue was found:"
  35. "\n-------\n"
  36. "{}".format(check_error))
  37. return {"failed": True, "changed": False, "msg": msg}
  38. # TODO(lmeyer): run it all again for the ops cluster
  39. return {"failed": False, "changed": False, "msg": 'No problems found with Kibana deployment.'}
  40. def _verify_url_internal(self, url, task_vars):
  41. """
  42. Try to reach a URL from the host.
  43. Returns: success (bool), reason (for failure)
  44. """
  45. args = dict(
  46. url=url,
  47. follow_redirects='none',
  48. validate_certs='no', # likely to be signed with internal CA
  49. # TODO(lmeyer): give users option to validate certs
  50. status_code=302,
  51. )
  52. result = self.execute_module('uri', args, task_vars)
  53. if result.get('failed'):
  54. return result['msg']
  55. return None
  56. @staticmethod
  57. def _verify_url_external(url):
  58. """
  59. Try to reach a URL from ansible control host.
  60. Returns: success (bool), reason (for failure)
  61. """
  62. # This actually checks from the ansible control host, which may or may not
  63. # really be "external" to the cluster.
  64. # Disable SSL cert validation to work around internally signed certs
  65. ctx = ssl.create_default_context()
  66. ctx.check_hostname = False # or setting CERT_NONE is refused
  67. ctx.verify_mode = ssl.CERT_NONE
  68. # Verify that the url is returning a valid response
  69. try:
  70. # We only care if the url connects and responds
  71. return_code = urllib2.urlopen(url, context=ctx).getcode()
  72. except HTTPError as httperr:
  73. return httperr.reason
  74. except URLError as urlerr:
  75. return str(urlerr)
  76. # there appears to be no way to prevent urlopen from following redirects
  77. if return_code != 200:
  78. return 'Expected success (200) but got return code {}'.format(int(return_code))
  79. return None
  80. def check_kibana(self, pods):
  81. """Check to see if Kibana is up and working. Returns: error string."""
  82. if not pods:
  83. return "There are no Kibana pods deployed, so no access to the logging UI."
  84. not_running = self.not_running_pods(pods)
  85. if len(not_running) == len(pods):
  86. return "No Kibana pod is in a running state, so there is no access to the logging UI."
  87. elif not_running:
  88. return (
  89. "The following Kibana pods are not currently in a running state:\n"
  90. "{pods}"
  91. "However at least one is, so service may not be impacted."
  92. ).format(pods="".join(" " + pod['metadata']['name'] + "\n" for pod in not_running))
  93. return None
  94. def _get_kibana_url(self, task_vars):
  95. """
  96. Get kibana route or report error.
  97. Returns: url (or empty), reason for failure
  98. """
  99. # Get logging url
  100. get_route = self._exec_oc("get route logging-kibana -o json", [], task_vars)
  101. if not get_route:
  102. return None, 'no_route_exists'
  103. route = json.loads(get_route)
  104. # check that the route has been accepted by a router
  105. ingress = route["status"]["ingress"]
  106. # ingress can be null if there is no router, or empty if not routed
  107. if not ingress or not ingress[0]:
  108. return None, 'route_not_accepted'
  109. host = route.get("spec", {}).get("host")
  110. if not host:
  111. return None, 'route_missing_host'
  112. return 'https://{}/'.format(host), None
  113. def _check_kibana_route(self, task_vars):
  114. """
  115. Check to see if kibana route is up and working.
  116. Returns: error string
  117. """
  118. known_errors = dict(
  119. no_route_exists=(
  120. 'No route is defined for Kibana in the logging namespace,\n'
  121. 'so the logging stack is not accessible. Is logging deployed?\n'
  122. 'Did something remove the logging-kibana route?'
  123. ),
  124. route_not_accepted=(
  125. 'The logging-kibana route is not being routed by any router.\n'
  126. 'Is the router deployed and working?'
  127. ),
  128. route_missing_host=(
  129. 'The logging-kibana route has no hostname defined,\n'
  130. 'which should never happen. Did something alter its definition?'
  131. ),
  132. )
  133. kibana_url, error = self._get_kibana_url(task_vars)
  134. if not kibana_url:
  135. return known_errors.get(error, error)
  136. # first, check that kibana is reachable from the master.
  137. error = self._verify_url_internal(kibana_url, task_vars)
  138. if error:
  139. if 'urlopen error [Errno 111] Connection refused' in error:
  140. error = (
  141. 'Failed to connect from this master to Kibana URL {url}\n'
  142. 'Is kibana running, and is at least one router routing to it?'
  143. ).format(url=kibana_url)
  144. elif 'urlopen error [Errno -2] Name or service not known' in error:
  145. error = (
  146. 'Failed to connect from this master to Kibana URL {url}\n'
  147. 'because the hostname does not resolve.\n'
  148. 'Is DNS configured for the Kibana hostname?'
  149. ).format(url=kibana_url)
  150. elif 'Status code was not' in error:
  151. error = (
  152. 'A request from this master to the Kibana URL {url}\n'
  153. 'did not return the correct status code (302).\n'
  154. 'This could mean that Kibana is malfunctioning, the hostname is\n'
  155. 'resolving incorrectly, or other network issues. The output was:\n'
  156. ' {error}'
  157. ).format(url=kibana_url, error=error)
  158. return 'Error validating the logging Kibana route:\n' + error
  159. # in production we would like the kibana route to work from outside the
  160. # cluster too; but that may not be the case, so allow disabling just this part.
  161. if not get_var(task_vars, "openshift_check_efk_kibana_external", default=True):
  162. return None
  163. error = self._verify_url_external(kibana_url)
  164. if error:
  165. if 'urlopen error [Errno 111] Connection refused' in error:
  166. error = (
  167. 'Failed to connect from the Ansible control host to Kibana URL {url}\n'
  168. 'Is the router for the Kibana hostname exposed externally?'
  169. ).format(url=kibana_url)
  170. elif 'urlopen error [Errno -2] Name or service not known' in error:
  171. error = (
  172. 'Failed to resolve the Kibana hostname in {url}\n'
  173. 'from the Ansible control host.\n'
  174. 'Is DNS configured to resolve this Kibana hostname externally?'
  175. ).format(url=kibana_url)
  176. elif 'Expected success (200)' in error:
  177. error = (
  178. 'A request to Kibana at {url}\n'
  179. 'returned the wrong error code:\n'
  180. ' {error}\n'
  181. 'This could mean that Kibana is malfunctioning, the hostname is\n'
  182. 'resolving incorrectly, or other network issues.'
  183. ).format(url=kibana_url, error=error)
  184. error = (
  185. 'Error validating the logging Kibana route:\n{error}\n'
  186. 'To disable external Kibana route validation, set in your inventory:\n'
  187. ' openshift_check_efk_kibana_external=False'
  188. ).format(error=error)
  189. return error
  190. return None
  191. def _exec_oc(self, cmd_str, extra_args, task_vars):
  192. return super(Kibana, self).exec_oc(self.execute_module,
  193. self.logging_namespace,
  194. cmd_str,
  195. extra_args,
  196. task_vars)