kibana.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. """
  2. Module for performing checks on a Kibana logging deployment
  3. """
  4. import json
  5. import ssl
  6. # pylint can't find the package when its installed in virtualenv
  7. # pylint: disable=import-error,no-name-in-module
  8. from ansible.module_utils.six.moves.urllib import request
  9. # pylint: disable=import-error,no-name-in-module
  10. from ansible.module_utils.six.moves.urllib.error import HTTPError, URLError
  11. from openshift_checks.logging.logging import LoggingCheck, OpenShiftCheckException
  12. class Kibana(LoggingCheck):
  13. """Module that checks an integrated logging Kibana deployment"""
  14. name = "kibana"
  15. tags = ["health", "logging"]
  16. def run(self):
  17. """Check various things and gather errors. Returns: result as hash"""
  18. kibana_pods = self.get_pods_for_component("kibana")
  19. self.check_kibana(kibana_pods)
  20. self.check_kibana_route()
  21. # TODO(lmeyer): run it all again for the ops cluster
  22. return {}
  23. def _verify_url_internal(self, url):
  24. """
  25. Try to reach a URL from the host.
  26. Returns: success (bool), reason (for failure)
  27. """
  28. args = dict(
  29. url=url,
  30. follow_redirects='none',
  31. validate_certs='no', # likely to be signed with internal CA
  32. # TODO(lmeyer): give users option to validate certs
  33. status_code=302,
  34. )
  35. result = self.execute_module('uri', args)
  36. if result.get('failed'):
  37. return result['msg']
  38. return None
  39. @staticmethod
  40. def _verify_url_external(url):
  41. """
  42. Try to reach a URL from ansible control host.
  43. Raise an OpenShiftCheckException if anything goes wrong.
  44. """
  45. # This actually checks from the ansible control host, which may or may not
  46. # really be "external" to the cluster.
  47. # Disable SSL cert validation to work around internally signed certs
  48. ctx = ssl.create_default_context()
  49. ctx.check_hostname = False # or setting CERT_NONE is refused
  50. ctx.verify_mode = ssl.CERT_NONE
  51. # Verify that the url is returning a valid response
  52. try:
  53. # We only care if the url connects and responds
  54. return_code = request.urlopen(url, context=ctx).getcode()
  55. except HTTPError as httperr:
  56. return httperr.reason
  57. except URLError as urlerr:
  58. return str(urlerr)
  59. # there appears to be no way to prevent urlopen from following redirects
  60. if return_code != 200:
  61. return 'Expected success (200) but got return code {}'.format(int(return_code))
  62. return None
  63. def check_kibana(self, pods):
  64. """Check to see if Kibana is up and working. Raises OpenShiftCheckException if not."""
  65. if not pods:
  66. raise OpenShiftCheckException(
  67. "MissingComponentPods",
  68. "There are no Kibana pods deployed, so no access to the logging UI."
  69. )
  70. not_running = self.not_running_pods(pods)
  71. if len(not_running) == len(pods):
  72. raise OpenShiftCheckException(
  73. "NoRunningPods",
  74. "No Kibana pod is in a running state, so there is no access to the logging UI."
  75. )
  76. elif not_running:
  77. raise OpenShiftCheckException(
  78. "PodNotRunning",
  79. "The following Kibana pods are not currently in a running state:\n"
  80. " {pods}\n"
  81. "However at least one is, so service may not be impacted.".format(
  82. pods="\n ".join(pod['metadata']['name'] for pod in not_running)
  83. )
  84. )
  85. def _get_kibana_url(self):
  86. """
  87. Get kibana route or report error.
  88. Returns: url
  89. """
  90. # Get logging url
  91. get_route = self.exec_oc("get route logging-kibana -o json", [])
  92. if not get_route:
  93. raise OpenShiftCheckException(
  94. 'no_route_exists',
  95. 'No route is defined for Kibana in the logging namespace,\n'
  96. 'so the logging stack is not accessible. Is logging deployed?\n'
  97. 'Did something remove the logging-kibana route?'
  98. )
  99. try:
  100. route = json.loads(get_route)
  101. # check that the route has been accepted by a router
  102. ingress = route["status"]["ingress"]
  103. except (ValueError, KeyError):
  104. raise OpenShiftCheckException(
  105. 'get_route_failed',
  106. '"oc get route" returned an unexpected response:\n' + get_route
  107. )
  108. # ingress can be null if there is no router, or empty if not routed
  109. if not ingress or not ingress[0]:
  110. raise OpenShiftCheckException(
  111. 'route_not_accepted',
  112. 'The logging-kibana route is not being routed by any router.\n'
  113. 'Is the router deployed and working?'
  114. )
  115. host = route.get("spec", {}).get("host")
  116. if not host:
  117. raise OpenShiftCheckException(
  118. 'route_missing_host',
  119. 'The logging-kibana route has no hostname defined,\n'
  120. 'which should never happen. Did something alter its definition?'
  121. )
  122. return 'https://{}/'.format(host)
  123. def check_kibana_route(self):
  124. """
  125. Check to see if kibana route is up and working.
  126. Raises exception if not.
  127. """
  128. kibana_url = self._get_kibana_url()
  129. # first, check that kibana is reachable from the master.
  130. error = self._verify_url_internal(kibana_url)
  131. if error:
  132. if 'urlopen error [Errno 111] Connection refused' in error:
  133. raise OpenShiftCheckException(
  134. 'FailedToConnectInternal',
  135. 'Failed to connect from this master to Kibana URL {url}\n'
  136. 'Is kibana running, and is at least one router routing to it?'.format(url=kibana_url)
  137. )
  138. elif 'urlopen error [Errno -2] Name or service not known' in error:
  139. raise OpenShiftCheckException(
  140. 'FailedToResolveInternal',
  141. 'Failed to connect from this master to Kibana URL {url}\n'
  142. 'because the hostname does not resolve.\n'
  143. 'Is DNS configured for the Kibana hostname?'.format(url=kibana_url)
  144. )
  145. elif 'Status code was not' in error:
  146. raise OpenShiftCheckException(
  147. 'WrongReturnCodeInternal',
  148. 'A request from this master to the Kibana URL {url}\n'
  149. 'did not return the correct status code (302).\n'
  150. 'This could mean that Kibana is malfunctioning, the hostname is\n'
  151. 'resolving incorrectly, or other network issues. The output was:\n'
  152. ' {error}'.format(url=kibana_url, error=error)
  153. )
  154. raise OpenShiftCheckException(
  155. 'MiscRouteErrorInternal',
  156. 'Error validating the logging Kibana route internally:\n' + error
  157. )
  158. # in production we would like the kibana route to work from outside the
  159. # cluster too; but that may not be the case, so allow disabling just this part.
  160. if self.get_var("openshift_check_efk_kibana_external", default="True").lower() != "true":
  161. return
  162. error = self._verify_url_external(kibana_url)
  163. if not error:
  164. return
  165. error_fmt = (
  166. 'Error validating the logging Kibana route:\n{error}\n'
  167. 'To disable external Kibana route validation, set the variable:\n'
  168. ' openshift_check_efk_kibana_external=False'
  169. )
  170. if 'urlopen error [Errno 111] Connection refused' in error:
  171. msg = (
  172. 'Failed to connect from the Ansible control host to Kibana URL {url}\n'
  173. 'Is the router for the Kibana hostname exposed externally?'
  174. ).format(url=kibana_url)
  175. raise OpenShiftCheckException('FailedToConnect', error_fmt.format(error=msg))
  176. elif 'urlopen error [Errno -2] Name or service not known' in error:
  177. msg = (
  178. 'Failed to resolve the Kibana hostname in {url}\n'
  179. 'from the Ansible control host.\n'
  180. 'Is DNS configured to resolve this Kibana hostname externally?'
  181. ).format(url=kibana_url)
  182. raise OpenShiftCheckException('FailedToResolve', error_fmt.format(error=msg))
  183. elif 'Expected success (200)' in error:
  184. msg = (
  185. 'A request to Kibana at {url}\n'
  186. 'returned the wrong error code:\n'
  187. ' {error}\n'
  188. 'This could mean that Kibana is malfunctioning, the hostname is\n'
  189. 'resolving incorrectly, or other network issues.'
  190. ).format(url=kibana_url, error=error)
  191. raise OpenShiftCheckException('WrongReturnCode', error_fmt.format(error=msg))
  192. raise OpenShiftCheckException(
  193. 'MiscRouteError',
  194. 'Error validating the logging Kibana route externally:\n' + error
  195. )