ptl_test_data.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. # coding: utf-8
  2. # Copyright (C) 1994-2018 Altair Engineering, Inc.
  3. # For more information, contact Altair at www.altair.com.
  4. #
  5. # This file is part of the PBS Professional ("PBS Pro") software.
  6. #
  7. # Open Source License Information:
  8. #
  9. # PBS Pro is free software. You can redistribute it and/or modify it under the
  10. # terms of the GNU Affero General Public License as published by the Free
  11. # Software Foundation, either version 3 of the License, or (at your option) any
  12. # later version.
  13. #
  14. # PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
  15. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  16. # FOR A PARTICULAR PURPOSE.
  17. # See the GNU Affero General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU Affero General Public License
  20. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. #
  22. # Commercial License Information:
  23. #
  24. # For a copy of the commercial license terms and conditions,
  25. # go to: (http://www.pbspro.com/UserArea/agreement.html)
  26. # or contact the Altair Legal Department.
  27. #
  28. # Altair’s dual-license business model allows companies, individuals, and
  29. # organizations to create proprietary derivative works of PBS Pro and
  30. # distribute them - whether embedded or bundled with other software -
  31. # under a commercial license agreement.
  32. #
  33. # Use of Altair’s trademarks, including but not limited to "PBS™",
  34. # "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
  35. # trademark licensing policies.
  36. import os
  37. import sys
  38. import socket
  39. import logging
  40. import signal
  41. import pwd
  42. import re
  43. from nose.util import isclass
  44. from nose.plugins.base import Plugin
  45. from nose.plugins.skip import SkipTest
  46. from ptl.utils.plugins.ptl_test_runner import TimeOut
  47. from ptl.utils.pbs_dshutils import DshUtils
  48. log = logging.getLogger('nose.plugins.PTLTestData')
  49. class PTLTestData(Plugin):
  50. """
  51. Save post analysis data on test cases failure or error
  52. """
  53. name = 'PTLTestData'
  54. score = sys.maxint - 6
  55. logger = logging.getLogger(__name__)
  56. def __init__(self):
  57. Plugin.__init__(self)
  58. self.post_data_dir = None
  59. self.max_postdata_threshold = None
  60. self.__save_data_count = 0
  61. self.__priv_sn = ''
  62. self.du = DshUtils()
  63. def options(self, parser, env):
  64. """
  65. Register command line options
  66. """
  67. pass
  68. def set_data(self, post_data_dir, max_postdata_threshold):
  69. self.post_data_dir = post_data_dir
  70. self.max_postdata_threshold = max_postdata_threshold
  71. def configure(self, options, config):
  72. """
  73. Configure the plugin and system, based on selected options
  74. """
  75. self.config = config
  76. if self.post_data_dir is not None:
  77. self.enabled = True
  78. else:
  79. self.enabled = False
  80. def __save_home(self, test, status, err=None):
  81. if hasattr(test, 'test'):
  82. _test = test.test
  83. sn = _test.__class__.__name__
  84. elif hasattr(test, 'context'):
  85. _test = test.context
  86. sn = _test.__name__
  87. else:
  88. # test does not have any PBS Objects, so just return
  89. return
  90. if self.__priv_sn != sn:
  91. self.__save_data_count = 0
  92. self.__priv_sn = sn
  93. # Saving home might take time so disable timeout
  94. # handler set by runner
  95. tn = getattr(_test, '_testMethodName', 'unknown')
  96. testlogs = getattr(test, 'captured_logs', '')
  97. datadir = os.path.join(self.post_data_dir, sn, tn)
  98. if os.path.exists(datadir):
  99. _msg = 'Old post analysis data exists at %s' % datadir
  100. _msg += ', skipping saving data for this test case'
  101. self.logger.warn(_msg)
  102. _msg = 'Please remove old directory or'
  103. _msg += ' provide different directory'
  104. self.logger.warn(_msg)
  105. return
  106. if getattr(test, 'old_sigalrm_handler', None) is not None:
  107. _h = getattr(test, 'old_sigalrm_handler')
  108. signal.signal(signal.SIGALRM, _h)
  109. signal.alarm(0)
  110. self.logger.log(logging.DEBUG2, 'Saving post analysis data...')
  111. current_host = socket.gethostname().split('.')[0]
  112. self.du.mkdir(current_host, path=datadir, mode=0755,
  113. parents=True, logerr=False, level=logging.DEBUG2)
  114. if err is not None:
  115. if isclass(err[0]) and issubclass(err[0], SkipTest):
  116. status = 'SKIP'
  117. status_data = 'Reason = %s' % (err[1])
  118. else:
  119. if isclass(err[0]) and issubclass(err[0], TimeOut):
  120. status = 'TIMEDOUT'
  121. status_data = getattr(test, 'err_in_string', '')
  122. else:
  123. status_data = ''
  124. logfile = os.path.join(datadir, 'logfile_' + status)
  125. f = open(logfile, 'w+')
  126. f.write(testlogs + '\n')
  127. f.write(status_data + '\n')
  128. f.write('test duration: %s\n' % str(getattr(test, 'duration', '0')))
  129. if status in ('PASS', 'SKIP'):
  130. # Test case passed or skipped, no need to save post analysis data
  131. f.close()
  132. return
  133. if ((self.max_postdata_threshold != 0) and
  134. (self.__save_data_count >= self.max_postdata_threshold)):
  135. _msg = 'Total number of saved post analysis data for this'
  136. _msg += ' testsuite is exceeded max postdata threshold'
  137. _msg += ' (%d)' % self.max_postdata_threshold
  138. f.write(_msg + '\n')
  139. self.logger.error(_msg)
  140. f.close()
  141. return
  142. svr = getattr(_test, 'server', None)
  143. if svr is not None:
  144. svr_host = svr.hostname
  145. else:
  146. _msg = 'Could not find Server Object in given test object'
  147. _msg += ', skipping saving post analysis data'
  148. f.write(_msg + '\n')
  149. self.logger.warning(_msg)
  150. f.close()
  151. return
  152. pbs_diag = os.path.join(svr.pbs_conf['PBS_EXEC'],
  153. 'unsupported', 'pbs_diag')
  154. cur_user = self.du.get_current_user()
  155. cmd = [pbs_diag, '-f', '-d', '2']
  156. cmd += ['-u', cur_user]
  157. cmd += ['-o', pwd.getpwnam(cur_user).pw_dir]
  158. if len(svr.jobs) > 0:
  159. cmd += ['-j', ','.join(svr.jobs.keys())]
  160. ret = self.du.run_cmd(svr_host, cmd, sudo=True, level=logging.DEBUG2)
  161. if ret['rc'] != 0:
  162. _msg = 'Failed to get diag information for '
  163. _msg += 'on %s:' % svr_host
  164. _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
  165. f.write(_msg + '\n')
  166. self.logger.error(_msg)
  167. f.close()
  168. return
  169. else:
  170. diag_re = r"(?P<path>\/.*\/pbs_diag_[\d]+_[\d]+\.tar\.gz).*"
  171. m = re.search(diag_re, '\n'.join(ret['out']))
  172. if m is not None:
  173. diag_out = m.group('path')
  174. else:
  175. _msg = 'Failed to find generated diag path in below output:'
  176. _msg += '\n\n' + '-' * 80 + '\n'
  177. _msg += '\n'.join(ret['out']) + '\n'
  178. _msg += '-' * 80 + '\n\n'
  179. f.write(_msg)
  180. self.logger.error(_msg)
  181. f.close()
  182. return
  183. diag_out_dest = os.path.join(datadir, os.path.basename(diag_out))
  184. if not self.du.is_localhost(svr_host):
  185. diag_out_r = svr_host + ':' + diag_out
  186. else:
  187. diag_out_r = diag_out
  188. ret = self.du.run_copy(current_host, diag_out_r, diag_out_dest,
  189. sudo=True, level=logging.DEBUG2)
  190. if ret['rc'] != 0:
  191. _msg = 'Failed to copy generated diag from'
  192. _msg += ' %s to %s' % (diag_out_r, diag_out_dest)
  193. f.write(_msg + '\n')
  194. self.logger.error(_msg)
  195. f.close()
  196. return
  197. else:
  198. self.du.rm(svr_host, path=diag_out, sudo=True, force=True,
  199. level=logging.DEBUG2)
  200. cores = []
  201. dir_list = ['server_priv', 'sched_priv', 'mom_priv']
  202. for d in dir_list:
  203. path = os.path.join(svr.pbs_conf['PBS_HOME'], d)
  204. files = self.du.listdir(hostname=svr_host, path=path, sudo=True,
  205. level=logging.DEBUG2)
  206. for _f in files:
  207. if os.path.basename(_f).startswith('core'):
  208. cores.append(_f)
  209. cores = list(set(cores))
  210. if len(cores) > 0:
  211. cmd = ['gunzip', diag_out_dest]
  212. ret = self.du.run_cmd(current_host, cmd, sudo=True,
  213. level=logging.DEBUG2)
  214. if ret['rc'] != 0:
  215. _msg = 'Failed unzip generated diag at %s:' % diag_out_dest
  216. _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
  217. f.write(_msg + '\n')
  218. self.logger.error(_msg)
  219. f.close()
  220. return
  221. diag_out_dest = diag_out_dest.rstrip('.gz')
  222. cmd = ['tar', '-xf', diag_out_dest, '-C', datadir]
  223. ret = self.du.run_cmd(current_host, cmd, sudo=True,
  224. level=logging.DEBUG2)
  225. if ret['rc'] != 0:
  226. _msg = 'Failed extract generated diag %s' % diag_out_dest
  227. _msg += ' to %s:' % datadir
  228. _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
  229. f.write(_msg + '\n')
  230. self.logger.error(_msg)
  231. f.close()
  232. return
  233. self.du.rm(hostname=current_host, path=diag_out_dest,
  234. force=True, sudo=True, level=logging.DEBUG2)
  235. diag_out_dest = diag_out_dest.rstrip('.tar')
  236. for c in cores:
  237. cmd = [pbs_diag, '-g', c]
  238. ret = self.du.run_cmd(svr_host, cmd, sudo=True,
  239. level=logging.DEBUG2)
  240. if ret['rc'] != 0:
  241. _msg = 'Failed to get core file information for '
  242. _msg += '%s on %s:' % (c, svr_host)
  243. _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
  244. f.write(_msg + '\n')
  245. self.logger.error(_msg)
  246. else:
  247. of = os.path.join(diag_out_dest,
  248. os.path.basename(c) + '.out')
  249. _f = open(of, 'w+')
  250. _f.write('\n'.join(ret['out']) + '\n')
  251. _f.close()
  252. self.du.rm(hostname=svr_host, path=c, force=True,
  253. sudo=True, level=logging.DEBUG2)
  254. cmd = ['tar', '-cf', diag_out_dest + '.tar']
  255. cmd += [os.path.basename(diag_out_dest)]
  256. ret = self.du.run_cmd(current_host, cmd, sudo=True, cwd=datadir,
  257. level=logging.DEBUG2)
  258. if ret['rc'] != 0:
  259. _msg = 'Failed generate tarball of diag directory'
  260. _msg += ' %s' % diag_out_dest
  261. _msg += ' after adding core(s) information in it:'
  262. _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
  263. f.write(_msg + '\n')
  264. self.logger.error(_msg)
  265. f.close()
  266. return
  267. cmd = ['gzip', diag_out_dest + '.tar']
  268. ret = self.du.run_cmd(current_host, cmd, sudo=True,
  269. level=logging.DEBUG2)
  270. if ret['rc'] != 0:
  271. _msg = 'Failed compress tarball of diag %s' % diag_out_dest
  272. _msg += '.tar after adding core(s) information in it:'
  273. _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
  274. f.write(_msg + '\n')
  275. self.logger.error(_msg)
  276. f.close()
  277. return
  278. self.du.rm(current_host, diag_out_dest, sudo=True,
  279. recursive=True, force=True, level=logging.DEBUG2)
  280. else:
  281. diag_out_dest = diag_out_dest.rstrip('.tar.gz')
  282. dest = os.path.join(datadir,
  283. 'PBS_' + current_host.split('.')[0] + '.tar.gz')
  284. ret = self.du.run_copy(current_host, diag_out_dest + '.tar.gz',
  285. dest, sudo=True, level=logging.DEBUG2)
  286. if ret['rc'] != 0:
  287. _msg = 'Failed rename tarball of diag from %s' % diag_out_dest
  288. _msg += '.tar.gz to %s:' % dest
  289. _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
  290. f.write(_msg + '\n')
  291. self.logger.error(_msg)
  292. f.close()
  293. return
  294. self.du.rm(current_host, path=diag_out_dest + '.tar.gz',
  295. force=True, sudo=True, level=logging.DEBUG2)
  296. f.close()
  297. self.__save_data_count += 1
  298. _msg = 'Successfully saved post analysis data'
  299. self.logger.log(logging.DEBUG2, _msg)
  300. def addError(self, test, err):
  301. self.__save_home(test, 'ERROR', err)
  302. def addFailure(self, test, err):
  303. self.__save_home(test, 'FAIL', err)
  304. def addSuccess(self, test):
  305. self.__save_home(test, 'PASS')