pbs_cray_smoketest.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. # coding: utf-8
  2. # Copyright (C) 1994-2018 Altair Engineering, Inc.
  3. # For more information, contact Altair at www.altair.com.
  4. #
  5. # This file is part of the PBS Professional ("PBS Pro") software.
  6. #
  7. # Open Source License Information:
  8. #
  9. # PBS Pro is free software. You can redistribute it and/or modify it under the
  10. # terms of the GNU Affero General Public License as published by the Free
  11. # Software Foundation, either version 3 of the License, or (at your option) any
  12. # later version.
  13. #
  14. # PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
  15. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  16. # FOR A PARTICULAR PURPOSE.
  17. # See the GNU Affero General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU Affero General Public License
  20. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. #
  22. # Commercial License Information:
  23. #
  24. # For a copy of the commercial license terms and conditions,
  25. # go to: (http://www.pbspro.com/UserArea/agreement.html)
  26. # or contact the Altair Legal Department.
  27. #
  28. # Altair’s dual-license business model allows companies, individuals, and
  29. # organizations to create proprietary derivative works of PBS Pro and
  30. # distribute them - whether embedded or bundled with other software -
  31. # under a commercial license agreement.
  32. #
  33. # Use of Altair’s trademarks, including but not limited to "PBS™",
  34. # "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
  35. # trademark licensing policies.
  36. from tests.functional import *
  37. from ptl.utils.pbs_crayutils import CrayUtils
  38. import os
  39. @tags('cray', 'smoke')
  40. class TestCraySmokeTest(TestFunctional):
  41. """
  42. Set of tests that qualifies as smoketest for Cray platform
  43. """
  44. def setUp(self):
  45. if not self.du.get_platform().startswith('cray'):
  46. self.skipTest("Test suite only meant to run on a Cray")
  47. TestFunctional.setUp(self)
  48. # no node in 'resv' and 'use' in apstat
  49. cu = CrayUtils()
  50. self.assertEqual(cu.count_node_summ('resv'), 0,
  51. "No compute node should be having ALPS reservation")
  52. self.assertEqual(cu.count_node_summ('use'), 0,
  53. "No compute node should be in use")
  54. # The number of compute nodes in State up and batch mode
  55. # (State = 'UP B') should equal the number of cray_compute nodes.
  56. nodes_up_b = cu.count_node_state('UP B')
  57. self.logger.info("Nodes with State 'UP B' : %s" % nodes_up_b)
  58. nodes_up_i = cu.count_node_state('UP I')
  59. self.logger.info("Nodes with State 'UP I' : %s" % nodes_up_i)
  60. nodes = self.server.filter(NODE,
  61. {ATTR_rescavail + '.vntype':
  62. 'cray_compute'})
  63. num_cray_compute = len(nodes[ATTR_rescavail + '.vntype=cray_compute'])
  64. self.assertEqual(nodes_up_b, num_cray_compute)
  65. self.logger.info("nodes in State 'UP B': %s == cray_compute: %s" %
  66. (nodes_up_b, num_cray_compute))
  67. # nodes are free and resources are available.
  68. nodes = self.server.status(NODE)
  69. for node in nodes:
  70. self.assertEqual(node['state'], 'free')
  71. self.assertEqual(node['resources_assigned.ncpus'], '0')
  72. self.assertEqual(node['resources_assigned.mem'], '0kb')
  73. @staticmethod
  74. def find_hw(output_file):
  75. """
  76. Find the string "Hello World" in the specified file.
  77. Return 1 if found.
  78. """
  79. found = 0
  80. with open(output_file, 'r') as outf:
  81. for line in outf:
  82. if "Hello World" in line:
  83. found = 1
  84. break
  85. else:
  86. continue
  87. return found
  88. @tags('cray', 'smoke')
  89. def test_cray_login_job(self):
  90. """
  91. Submit a simple sleep job that requests to run on a login node
  92. and expect that job to go in running state on a login node.
  93. Verify that the job runs to completion and check job output/error.
  94. """
  95. self.server.manager(MGR_CMD_SET, SERVER,
  96. {'job_history_enable': 'True'})
  97. j1 = Job(TEST_USER, {ATTR_l + '.vntype': 'cray_login',
  98. ATTR_N: 'cray_login'})
  99. scr = []
  100. scr += ['echo Hello World\n']
  101. scr += ['/bin/sleep 5\n']
  102. sub_dir = self.du.mkdtemp(uid=TEST_USER.uid)
  103. j1.create_script(scr)
  104. jid1 = self.server.submit(j1, submit_dir=sub_dir)
  105. self.server.expect(JOB, {ATTR_state: 'R'}, id=jid1)
  106. # fetch node name where the job is running and check that the
  107. # node is a login node
  108. self.server.status(JOB, 'exec_vnode', id=jid1)
  109. vname = j1.get_vnodes()[0]
  110. self.server.expect(NODE, {ATTR_rescavail + '.vntype': 'cray_login'},
  111. id=vname, max_attempts=1)
  112. cu = CrayUtils()
  113. # Check if number of compute nodes in use are 0
  114. self.assertEqual(cu.count_node_summ('use'), 0)
  115. # verify the contents of output/error files
  116. self.server.expect(JOB, {'job_state': 'F'}, id=jid1, extend='x')
  117. error_file = os.path.join(sub_dir, 'cray_login.e' + jid1.split('.')[0])
  118. self.assertEqual(os.stat(error_file).st_size, 0,
  119. msg="Job error file should be empty")
  120. output_file = os.path.join(
  121. sub_dir, 'cray_login.o' + jid1.split('.')[0])
  122. foundhw = self.find_hw(output_file)
  123. self.assertEqual(foundhw, 1, msg="Job output file incorrect")
  124. @tags('cray', 'smoke')
  125. def test_cray_compute_job(self):
  126. """
  127. Submit a simple sleep job that runs on a compute node and
  128. expect the job to go in running state on a compute node.
  129. Verify that the job runs to completion and check job output/error.
  130. """
  131. self.server.manager(MGR_CMD_SET, SERVER,
  132. {'job_history_enable': 'True'})
  133. j1 = Job(TEST_USER, {ATTR_l + '.vntype': 'cray_compute',
  134. ATTR_N: 'cray_compute'})
  135. scr = []
  136. scr += ['echo Hello World\n']
  137. scr += ['/bin/sleep 5\n']
  138. scr += ['aprun -b -B /bin/sleep 10\n']
  139. sub_dir = self.du.mkdtemp(uid=TEST_USER.uid)
  140. j1.create_script(scr)
  141. jid1 = self.server.submit(j1, submit_dir=sub_dir)
  142. self.server.expect(JOB, {ATTR_state: 'R'}, id=jid1)
  143. # fetch node name where the job is running and check that the
  144. # node is a compute node
  145. self.server.status(JOB, 'exec_vnode', id=jid1)
  146. vname = j1.get_vnodes()[0]
  147. self.server.expect(NODE, {ATTR_rescavail + '.vntype': 'cray_compute'},
  148. id=vname)
  149. # Sleep for some time before aprun actually starts
  150. # using the reservation
  151. self.logger.info(
  152. "Sleeping 6 seconds before aprun starts using the reservation")
  153. time.sleep(6)
  154. cu = CrayUtils()
  155. # Check if number of compute nodes in use is 1
  156. self.assertEqual(cu.count_node_summ('resv'), 1)
  157. if self.du.get_platform() == 'cray':
  158. # Cray simulator will not show anything in 'use' because
  159. # aprun command is just a pass through on simulator
  160. self.assertEqual(cu.count_node_summ('use'), 1)
  161. # verify the contents of output/error files
  162. self.server.expect(JOB, {'job_state': 'F'}, id=jid1, extend='x')
  163. error_file = os.path.join(
  164. sub_dir, 'cray_compute.e' + jid1.split('.')[0])
  165. self.assertEqual(os.stat(error_file).st_size, 0,
  166. msg="Job error file should be empty")
  167. output_file = os.path.join(
  168. sub_dir, 'cray_compute.o' + jid1.split('.')[0])
  169. foundhw = self.find_hw(output_file)
  170. self.assertEqual(foundhw, 1, msg="Job output file incorrect")
  171. (cu.node_status, cu.node_summary) = cu.parse_apstat_rn()
  172. self.assertEqual(cu.count_node_summ('resv'), 0)
  173. if self.du.get_platform() == 'cray':
  174. self.assertEqual(cu.count_node_summ('use'), 0)