kibana.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. """
  2. Module for performing checks on a Kibana logging deployment
  3. """
  4. import json
  5. import ssl
  6. try:
  7. from urllib2 import HTTPError, URLError
  8. import urllib2
  9. except ImportError:
  10. from urllib.error import HTTPError, URLError
  11. import urllib.request as urllib2
  12. from openshift_checks.logging.logging import LoggingCheck, OpenShiftCheckException
  13. class Kibana(LoggingCheck):
  14. """Module that checks an integrated logging Kibana deployment"""
  15. name = "kibana"
  16. tags = ["health", "logging"]
  17. def run(self):
  18. """Check various things and gather errors. Returns: result as hash"""
  19. kibana_pods = self.get_pods_for_component("kibana")
  20. self.check_kibana(kibana_pods)
  21. self.check_kibana_route()
  22. # TODO(lmeyer): run it all again for the ops cluster
  23. return {}
  24. def _verify_url_internal(self, url):
  25. """
  26. Try to reach a URL from the host.
  27. Returns: success (bool), reason (for failure)
  28. """
  29. args = dict(
  30. url=url,
  31. follow_redirects='none',
  32. validate_certs='no', # likely to be signed with internal CA
  33. # TODO(lmeyer): give users option to validate certs
  34. status_code=302,
  35. )
  36. result = self.execute_module('uri', args)
  37. if result.get('failed'):
  38. return result['msg']
  39. return None
  40. @staticmethod
  41. def _verify_url_external(url):
  42. """
  43. Try to reach a URL from ansible control host.
  44. Raise an OpenShiftCheckException if anything goes wrong.
  45. """
  46. # This actually checks from the ansible control host, which may or may not
  47. # really be "external" to the cluster.
  48. # Disable SSL cert validation to work around internally signed certs
  49. ctx = ssl.create_default_context()
  50. ctx.check_hostname = False # or setting CERT_NONE is refused
  51. ctx.verify_mode = ssl.CERT_NONE
  52. # Verify that the url is returning a valid response
  53. try:
  54. # We only care if the url connects and responds
  55. return_code = urllib2.urlopen(url, context=ctx).getcode()
  56. except HTTPError as httperr:
  57. return httperr.reason
  58. except URLError as urlerr:
  59. return str(urlerr)
  60. # there appears to be no way to prevent urlopen from following redirects
  61. if return_code != 200:
  62. return 'Expected success (200) but got return code {}'.format(int(return_code))
  63. return None
  64. def check_kibana(self, pods):
  65. """Check to see if Kibana is up and working. Raises OpenShiftCheckException if not."""
  66. if not pods:
  67. raise OpenShiftCheckException(
  68. "MissingComponentPods",
  69. "There are no Kibana pods deployed, so no access to the logging UI."
  70. )
  71. not_running = self.not_running_pods(pods)
  72. if len(not_running) == len(pods):
  73. raise OpenShiftCheckException(
  74. "NoRunningPods",
  75. "No Kibana pod is in a running state, so there is no access to the logging UI."
  76. )
  77. elif not_running:
  78. raise OpenShiftCheckException(
  79. "PodNotRunning",
  80. "The following Kibana pods are not currently in a running state:\n"
  81. " {pods}\n"
  82. "However at least one is, so service may not be impacted.".format(
  83. pods="\n ".join(pod['metadata']['name'] for pod in not_running)
  84. )
  85. )
  86. def _get_kibana_url(self):
  87. """
  88. Get kibana route or report error.
  89. Returns: url
  90. """
  91. # Get logging url
  92. get_route = self.exec_oc("get route logging-kibana -o json", [])
  93. if not get_route:
  94. raise OpenShiftCheckException(
  95. 'no_route_exists',
  96. 'No route is defined for Kibana in the logging namespace,\n'
  97. 'so the logging stack is not accessible. Is logging deployed?\n'
  98. 'Did something remove the logging-kibana route?'
  99. )
  100. try:
  101. route = json.loads(get_route)
  102. # check that the route has been accepted by a router
  103. ingress = route["status"]["ingress"]
  104. except (ValueError, KeyError):
  105. raise OpenShiftCheckException(
  106. 'get_route_failed',
  107. '"oc get route" returned an unexpected response:\n' + get_route
  108. )
  109. # ingress can be null if there is no router, or empty if not routed
  110. if not ingress or not ingress[0]:
  111. raise OpenShiftCheckException(
  112. 'route_not_accepted',
  113. 'The logging-kibana route is not being routed by any router.\n'
  114. 'Is the router deployed and working?'
  115. )
  116. host = route.get("spec", {}).get("host")
  117. if not host:
  118. raise OpenShiftCheckException(
  119. 'route_missing_host',
  120. 'The logging-kibana route has no hostname defined,\n'
  121. 'which should never happen. Did something alter its definition?'
  122. )
  123. return 'https://{}/'.format(host)
  124. def check_kibana_route(self):
  125. """
  126. Check to see if kibana route is up and working.
  127. Raises exception if not.
  128. """
  129. kibana_url = self._get_kibana_url()
  130. # first, check that kibana is reachable from the master.
  131. error = self._verify_url_internal(kibana_url)
  132. if error:
  133. if 'urlopen error [Errno 111] Connection refused' in error:
  134. raise OpenShiftCheckException(
  135. 'FailedToConnectInternal',
  136. 'Failed to connect from this master to Kibana URL {url}\n'
  137. 'Is kibana running, and is at least one router routing to it?'.format(url=kibana_url)
  138. )
  139. elif 'urlopen error [Errno -2] Name or service not known' in error:
  140. raise OpenShiftCheckException(
  141. 'FailedToResolveInternal',
  142. 'Failed to connect from this master to Kibana URL {url}\n'
  143. 'because the hostname does not resolve.\n'
  144. 'Is DNS configured for the Kibana hostname?'.format(url=kibana_url)
  145. )
  146. elif 'Status code was not' in error:
  147. raise OpenShiftCheckException(
  148. 'WrongReturnCodeInternal',
  149. 'A request from this master to the Kibana URL {url}\n'
  150. 'did not return the correct status code (302).\n'
  151. 'This could mean that Kibana is malfunctioning, the hostname is\n'
  152. 'resolving incorrectly, or other network issues. The output was:\n'
  153. ' {error}'.format(url=kibana_url, error=error)
  154. )
  155. raise OpenShiftCheckException(
  156. 'MiscRouteErrorInternal',
  157. 'Error validating the logging Kibana route internally:\n' + error
  158. )
  159. # in production we would like the kibana route to work from outside the
  160. # cluster too; but that may not be the case, so allow disabling just this part.
  161. if self.get_var("openshift_check_efk_kibana_external", default="True").lower() != "true":
  162. return
  163. error = self._verify_url_external(kibana_url)
  164. if not error:
  165. return
  166. error_fmt = (
  167. 'Error validating the logging Kibana route:\n{error}\n'
  168. 'To disable external Kibana route validation, set the variable:\n'
  169. ' openshift_check_efk_kibana_external=False'
  170. )
  171. if 'urlopen error [Errno 111] Connection refused' in error:
  172. msg = (
  173. 'Failed to connect from the Ansible control host to Kibana URL {url}\n'
  174. 'Is the router for the Kibana hostname exposed externally?'
  175. ).format(url=kibana_url)
  176. raise OpenShiftCheckException('FailedToConnect', error_fmt.format(error=msg))
  177. elif 'urlopen error [Errno -2] Name or service not known' in error:
  178. msg = (
  179. 'Failed to resolve the Kibana hostname in {url}\n'
  180. 'from the Ansible control host.\n'
  181. 'Is DNS configured to resolve this Kibana hostname externally?'
  182. ).format(url=kibana_url)
  183. raise OpenShiftCheckException('FailedToResolve', error_fmt.format(error=msg))
  184. elif 'Expected success (200)' in error:
  185. msg = (
  186. 'A request to Kibana at {url}\n'
  187. 'returned the wrong error code:\n'
  188. ' {error}\n'
  189. 'This could mean that Kibana is malfunctioning, the hostname is\n'
  190. 'resolving incorrectly, or other network issues.'
  191. ).format(url=kibana_url, error=error)
  192. raise OpenShiftCheckException('WrongReturnCode', error_fmt.format(error=msg))
  193. raise OpenShiftCheckException(
  194. 'MiscRouteError',
  195. 'Error validating the logging Kibana route externally:\n' + error
  196. )