pbs_mom_walltime.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. # coding: utf-8
  2. # Copyright (C) 1994-2018 Altair Engineering, Inc.
  3. # For more information, contact Altair at www.altair.com.
  4. #
  5. # This file is part of the PBS Professional ("PBS Pro") software.
  6. #
  7. # Open Source License Information:
  8. #
  9. # PBS Pro is free software. You can redistribute it and/or modify it under the
  10. # terms of the GNU Affero General Public License as published by the Free
  11. # Software Foundation, either version 3 of the License, or (at your option) any
  12. # later version.
  13. #
  14. # PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
  15. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  16. # FOR A PARTICULAR PURPOSE.
  17. # See the GNU Affero General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU Affero General Public License
  20. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. #
  22. # Commercial License Information:
  23. #
  24. # For a copy of the commercial license terms and conditions,
  25. # go to: (http://www.pbspro.com/UserArea/agreement.html)
  26. # or contact the Altair Legal Department.
  27. #
  28. # Altair’s dual-license business model allows companies, individuals, and
  29. # organizations to create proprietary derivative works of PBS Pro and
  30. # distribute them - whether embedded or bundled with other software -
  31. # under a commercial license agreement.
  32. #
  33. # Use of Altair’s trademarks, including but not limited to "PBS™",
  34. # "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
  35. # trademark licensing policies.
  36. from tests.functional import *
  37. from ptl.lib.pbs_testlib import BatchUtils
  38. class TestMomWalltime(TestFunctional):
  39. def test_mom_hook_not_counted_in_walltime(self):
  40. """
  41. Test that time spent on mom hooks is not counted in walltime of the job
  42. """
  43. hook_name_event_dict = {
  44. 'begin': 'execjob_begin',
  45. 'prologue': 'execjob_prologue',
  46. 'launch': 'execjob_launch',
  47. 'epilogue': 'execjob_epilogue',
  48. 'preterm': 'execjob_preterm',
  49. 'end': 'execjob_end'
  50. }
  51. hook_script = (
  52. "import pbs\n"
  53. "import time\n"
  54. "time.sleep(2)\n"
  55. "pbs.event().accept\n"
  56. )
  57. hook_attrib = {'event': '', 'enabled': 'True'}
  58. for name, event in hook_name_event_dict.items():
  59. hook_attrib['event'] = event
  60. self.server.create_import_hook(name, hook_attrib, hook_script)
  61. self.server.manager(MGR_CMD_SET, SERVER,
  62. {'job_history_enable': 'True'})
  63. job = Job(TEST_USER)
  64. job.set_sleep_time(3)
  65. jid = self.server.submit(job)
  66. self.server.expect(JOB, {ATTR_state: 'F'}, id=jid, extend='x',
  67. offset=15)
  68. self.server.expect(JOB, {'resources_used.walltime': 5}, op=LE, id=jid,
  69. extend='x')
  70. def test_hold_time_not_counted_in_walltime(self):
  71. """
  72. Test that hold time is not counted in walltime
  73. """
  74. self.server.manager(MGR_CMD_SET, SERVER,
  75. {'job_history_enable': 'True'})
  76. a = {'Resource_List.ncpus': 1}
  77. J1 = Job(TEST_USER, attrs=a)
  78. J1.set_sleep_time(60)
  79. jid1 = self.server.submit(J1)
  80. self.server.expect(JOB, {'job_state': 'R'}, id=jid1)
  81. # Wait for job to run for sometime
  82. time.sleep(15)
  83. self.server.expect(JOB, {'resources_used.walltime': 0}, op=GT, id=jid1,
  84. extend='x')
  85. self.server.holdjob(jid1, USER_HOLD)
  86. self.server.rerunjob(jid1)
  87. self.server.expect(JOB, {'Hold_Types': 'u'}, jid1)
  88. # Wait for sometime to verify that this time is not
  89. # accounted in 'resource_used.walltime'
  90. time.sleep(20)
  91. self.server.rlsjob(jid1, USER_HOLD)
  92. self.server.expect(JOB, {ATTR_state: 'F'}, id=jid1, extend='x',
  93. offset=45)
  94. # Verify if the job's walltime is in between 60 to 70
  95. self.server.expect(JOB, {'resources_used.walltime': 60}, op=GE,
  96. id=jid1, extend='x')
  97. self.server.expect(JOB, {'resources_used.walltime': 70}, op=LE,
  98. id=jid1, extend='x')
  99. def test_suspend_time_not_counted_in_walltime(self):
  100. """
  101. Test that suspend time is not counted in walltime
  102. """
  103. self.server.manager(MGR_CMD_SET, SERVER,
  104. {'job_history_enable': 'True'})
  105. a = {'Resource_List.ncpus': 1}
  106. script_content = (
  107. 'for i in {1..30}\n'
  108. 'do\n'
  109. '\techo "time wait"\n'
  110. '\tsleep 1\n'
  111. 'done'
  112. )
  113. J1 = Job(TEST_USER, attrs=a)
  114. J1.create_script(body=script_content)
  115. jid1 = self.server.submit(J1)
  116. self.server.expect(JOB, {'job_state': 'R'}, id=jid1)
  117. # Accumulate wall time
  118. time.sleep(10)
  119. self.server.sigjob(jobid=jid1, signal="suspend")
  120. self.server.expect(JOB, {'job_state': 'S'}, id=jid1)
  121. # Make sure the sched cycle is completed before reading
  122. # the walltime
  123. self.server.manager(MGR_CMD_SET, MGR_OBJ_SERVER,
  124. {'scheduling': 'True'}, expect=True)
  125. self.server.manager(MGR_CMD_SET, MGR_OBJ_SERVER,
  126. {'scheduling': 'False'}, expect=True)
  127. jstat = self.server.status(JOB, id=jid1,
  128. attrib=['resources_used.walltime'])
  129. walltime = BatchUtils().convert_duration(
  130. jstat[0]['resources_used.walltime'])
  131. self.logger.info("Walltime before sleep: %d secs" % walltime)
  132. self.server.manager(MGR_CMD_SET, MGR_OBJ_SERVER,
  133. {'scheduling': 'True'})
  134. # Sleep for the job's entire walltime secs so we can catch any
  135. # walltime increment during job suspension time
  136. self.logger.info("Suspending job for 30s, job's execution time. " +
  137. "Walltime should not get incremented while job " +
  138. "is suspended")
  139. time.sleep(30)
  140. # Used walltime should remain the same
  141. self.server.expect(JOB, {'resources_used.walltime': walltime}, op=EQ,
  142. id=jid1)
  143. self.server.sigjob(jobid=jid1, signal="resume")
  144. self.server.expect(JOB, {'job_state': 'R'}, id=jid1)
  145. self.server.expect(JOB, {ATTR_state: 'F'}, id=jid1, extend='x',
  146. offset=20)
  147. # Verify if the job's total walltime is within limits
  148. # Adding 10s buffer since min mom poll time is 10s
  149. jstat = self.server.status(JOB, id=jid1,
  150. attrib=['resources_used.walltime'],
  151. extend='x')
  152. walltime_final = BatchUtils().convert_duration(
  153. jstat[0]['resources_used.walltime'])
  154. self.assertGreater(walltime_final, 0,
  155. 'Error fetching resources_used.walltime value')
  156. self.logger.info("Walltime at job completion: %d secs"
  157. % walltime_final)
  158. self.assertIn(walltime_final, range(25, 41),
  159. 'Walltime is not in expected range')
  160. def test_mom_restart(self):
  161. """
  162. Test that time spent on jobs running on MoM will not reset when
  163. MoM is restarted
  164. """
  165. job = Job(TEST_USER)
  166. job.set_sleep_time(300)
  167. jid = self.server.submit(job)
  168. self.server.expect(JOB, {ATTR_state: 'R'}, id=jid)
  169. self.server.expect(JOB, {'resources_used.walltime': 30}, op=GT,
  170. id=jid, offset=30)
  171. self.mom.stop(sig='-INT')
  172. self.mom.start(args=['-p'])
  173. self.server.expect(JOB, {ATTR_state: 'R'}, id=jid)
  174. try:
  175. self.assertFalse(
  176. self.server.expect(JOB, {'resources_used.walltime': 30},
  177. op=LT, id=jid, max_attempts=5, interval=5))
  178. except PtlExpectError:
  179. pass