glusterfs_check_containerized.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. #!/usr/bin/env python
  2. """glusterfs_check_containerized module"""
  3. # Copyright 2018 Red Hat, Inc. and/or its affiliates
  4. # and other contributors as indicated by the @author tags.
  5. #
  6. # Licensed under the Apache License, Version 2.0 (the "License");
  7. # you may not use this file except in compliance with the License.
  8. # You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. import subprocess
  18. from ansible.module_utils.basic import AnsibleModule
  19. DOCUMENTATION = '''
  20. ---
  21. module: glusterfs_check_containerized
  22. short_description: Check health of each volume in glusterfs on openshift.
  23. version_added: "2.6"
  24. description:
  25. - This module attempts to ensure all volumes are in healthy state
  26. in a glusterfs cluster. The module is meant to be failure-prone, retries
  27. should be executed at the ansible level, they are not implemented in
  28. this module.
  29. This module by executing the following (roughly):
  30. oc exec --namespace=<namespace> <podname> -- gluster volume list
  31. for volume in <volume list>:
  32. gluster volume heal <volume> info
  33. author:
  34. - "Michael Gugino <mgugino@redhat.com>"
  35. '''
  36. EXAMPLES = '''
  37. - name: glusterfs volumes check
  38. glusterfs_check_containerized
  39. oc_bin: "/usr/bin/oc"
  40. oc_conf: "/etc/origin/master/admin.kubeconfig"
  41. oc_namespace: "glusterfs"
  42. cluster_name: "glusterfs"
  43. '''
  44. def fail(module, err):
  45. """Fail on error"""
  46. result = {'failed': True,
  47. 'changed': False,
  48. 'msg': err,
  49. 'state': 'unknown'}
  50. module.fail_json(**result)
  51. def call_or_fail(module, call_args):
  52. """Call subprocess.check_output and return utf-8 decoded stdout or fail"""
  53. try:
  54. # Must decode as utf-8 for python3 compatibility
  55. res = subprocess.check_output(call_args).decode('utf-8')
  56. except subprocess.CalledProcessError as err:
  57. fail(module, str(err))
  58. return res
  59. def get_valid_nodes(module, oc_exec, exclude_node):
  60. """Return a list of nodes that will be used to filter running pods"""
  61. call_args = oc_exec + ['get', 'nodes']
  62. res = call_or_fail(module, call_args)
  63. valid_nodes = []
  64. for line in res.split('\n'):
  65. fields = line.split()
  66. if not fields:
  67. continue
  68. if fields[0] != exclude_node and fields[1] == "Ready":
  69. valid_nodes.append(fields[0])
  70. if not valid_nodes:
  71. fail(module,
  72. 'Unable to find suitable node in get nodes output: {}'.format(res))
  73. return valid_nodes
  74. def select_pod(module, oc_exec, cluster_name, valid_nodes):
  75. """Select a pod to attempt to run gluster commands on"""
  76. call_args = oc_exec + ['get', 'pods', '-owide']
  77. res = call_or_fail(module, call_args)
  78. # res is returned as a tab/space-separated list with headers.
  79. res_lines = res.split('\n')
  80. pod_name = None
  81. name_search = 'glusterfs-{}'.format(cluster_name)
  82. res_lines = list(filter(None, res.split('\n')))
  83. for line in res_lines[1:]:
  84. fields = line.split()
  85. if not fields:
  86. continue
  87. if name_search in fields[0]:
  88. if fields[2] == "Running" and fields[6] in valid_nodes:
  89. pod_name = fields[0]
  90. break
  91. if pod_name is None:
  92. fail(module,
  93. "Unable to find suitable pod in get pods output: {}".format(res))
  94. else:
  95. return pod_name
  96. def get_volume_list(module, oc_exec, pod_name):
  97. """Retrieve list of active volumes from gluster cluster"""
  98. call_args = oc_exec + ['exec', pod_name, '--', 'gluster', 'volume', 'list']
  99. res = call_or_fail(module, call_args)
  100. # This should always at least return heketidbstorage, so no need to check
  101. # for empty string.
  102. return list(filter(None, res.split('\n')))
  103. def check_volume_health_info(module, oc_exec, pod_name, volume):
  104. """Check health info of gluster volume"""
  105. call_args = oc_exec + ['exec', pod_name, '--', 'gluster', 'volume', 'heal',
  106. volume, 'info']
  107. res = call_or_fail(module, call_args)
  108. # Output is not easily parsed
  109. for line in res.split('\n'):
  110. if line.startswith('Number of entries:'):
  111. cols = line.split(':')
  112. if cols[1].strip() != '0':
  113. fail(module, 'volume {} is not ready'.format(volume))
  114. def check_volumes(module, oc_exec, pod_name):
  115. """Check status of all volumes on cluster"""
  116. volume_list = get_volume_list(module, oc_exec, pod_name)
  117. for volume in volume_list:
  118. check_volume_health_info(module, oc_exec, pod_name, volume)
  119. def run_module():
  120. '''Run this module'''
  121. module_args = dict(
  122. oc_bin=dict(type='path', required=True),
  123. oc_conf=dict(type='path', required=True),
  124. oc_namespace=dict(type='str', required=True),
  125. cluster_name=dict(type='str', required=True),
  126. exclude_node=dict(type='str', required=True),
  127. )
  128. module = AnsibleModule(
  129. supports_check_mode=False,
  130. argument_spec=module_args
  131. )
  132. oc_bin = module.params['oc_bin']
  133. oc_conf = '--config={}'.format(module.params['oc_conf'])
  134. oc_namespace = '--namespace={}'.format(module.params['oc_namespace'])
  135. cluster_name = module.params['cluster_name']
  136. exclude_node = module.params['exclude_node']
  137. oc_exec = [oc_bin, oc_conf, oc_namespace]
  138. # create a nodes to find a pod on; We don't want to try to execute on a
  139. # pod running on a "NotReady" node or the inventory_hostname node because
  140. # the pods might not actually be alive.
  141. valid_nodes = get_valid_nodes(module, [oc_bin, oc_conf], exclude_node)
  142. # Need to find an alive pod to run gluster commands in.
  143. pod_name = select_pod(module, oc_exec, cluster_name, valid_nodes)
  144. check_volumes(module, oc_exec, pod_name)
  145. result = {'changed': False}
  146. module.exit_json(**result)
  147. def main():
  148. """main"""
  149. run_module()
  150. if __name__ == '__main__':
  151. main()