pbs_job_routing.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. # coding: utf-8
  2. # Copyright (C) 1994-2018 Altair Engineering, Inc.
  3. # For more information, contact Altair at www.altair.com.
  4. #
  5. # This file is part of the PBS Professional ("PBS Pro") software.
  6. #
  7. # Open Source License Information:
  8. #
  9. # PBS Pro is free software. You can redistribute it and/or modify it under the
  10. # terms of the GNU Affero General Public License as published by the Free
  11. # Software Foundation, either version 3 of the License, or (at your option) any
  12. # later version.
  13. #
  14. # PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
  15. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  16. # FOR A PARTICULAR PURPOSE.
  17. # See the GNU Affero General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU Affero General Public License
  20. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. #
  22. # Commercial License Information:
  23. #
  24. # For a copy of the commercial license terms and conditions,
  25. # go to: (http://www.pbspro.com/UserArea/agreement.html)
  26. # or contact the Altair Legal Department.
  27. #
  28. # Altair’s dual-license business model allows companies, individuals, and
  29. # organizations to create proprietary derivative works of PBS Pro and
  30. # distribute them - whether embedded or bundled with other software -
  31. # under a commercial license agreement.
  32. #
  33. # Use of Altair’s trademarks, including but not limited to "PBS™",
  34. # "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
  35. # trademark licensing policies.
  36. from tests.functional import *
  37. class TestJobRouting(TestFunctional):
  38. """
  39. This test suite validates state of parent job and subjobs in a Job Array.
  40. """
  41. def setUp(self):
  42. TestFunctional.setUp(self)
  43. self.momA = self.moms.values()[0]
  44. self.momA.delete_vnode_defs()
  45. self.hostA = self.momA.shortname
  46. self.server.manager(MGR_CMD_DELETE, NODE, None, "")
  47. self.server.manager(MGR_CMD_CREATE, NODE, id=self.hostA)
  48. a = {'resources_available.ncpus': 3}
  49. self.server.manager(MGR_CMD_SET, NODE, a,
  50. id=self.hostA, expect=True)
  51. self.server.manager(MGR_CMD_SET, SERVER, {'scheduling': 'false'},
  52. expect=True)
  53. def test_t1(self):
  54. """
  55. This test case validates Job array state when one
  56. of the subjob is deleted while the array job is HELD in a routing
  57. queue and is released after the subjob is deleted.
  58. """
  59. dflt_q = self.server.default_queue
  60. # Create a route queue with destination to default queue
  61. queue_attrib = {ATTR_qtype: 'route',
  62. ATTR_routedest: dflt_q,
  63. ATTR_enable: 'True'}
  64. self.server.manager(MGR_CMD_CREATE, QUEUE, queue_attrib, id='routeq')
  65. job_attrib = Job(TEST_USER, attrs={ATTR_queue: 'routeq',
  66. ATTR_l + '.ncpus': 1,
  67. ATTR_h: None,
  68. ATTR_J: '1-2',
  69. ATTR_r: 'y'})
  70. # Submit an array job in Held state
  71. jid = self.server.submit(job_attrib)
  72. self.server.expect(JOB, {ATTR_state: 'H'}, jid)
  73. self.server.expect(JOB, {ATTR_state + '=Q': 2}, count=True,
  74. id=jid, extend='t')
  75. subjobs = self.server.status(JOB, id=jid, extend='t')
  76. # Delete one of the subjob
  77. self.server.deljob(subjobs[-1]['id'])
  78. self.server.expect(JOB, {ATTR_state: 'H'}, jid)
  79. self.server.expect(JOB, {ATTR_state + '=Q': 1}, count=True,
  80. id=jid, extend='t')
  81. self.server.expect(JOB, {ATTR_state + '=X': 1}, count=True,
  82. id=jid, extend='t')
  83. self.server.expect(JOB, {ATTR_queue + '=routeq': 3}, count=True,
  84. id=jid, extend='t')
  85. # Release the array and verify job array state
  86. self.server.rlsjob(jid, 'u')
  87. self.server.expect(JOB, {ATTR_state: 'Q'}, jid)
  88. self.server.expect(JOB, {ATTR_state + '=Q': 2}, count=True,
  89. id=jid, extend='t')
  90. self.server.expect(JOB, {ATTR_state + '=X': 1}, count=True,
  91. id=jid, extend='t')
  92. self.server.expect(JOB, {ATTR_queue + '=routeq': 3}, count=True,
  93. id=jid, extend='t')
  94. # No errors should be in server logs
  95. msg = '(job_route) Request invalid for state of job, state=7'
  96. self.server.log_match(msg, id=jid, existence=False)
  97. # Start routing queue and verify job array queue set to default queue
  98. a = {ATTR_start: 'True'}
  99. self.server.manager(MGR_CMD_SET, QUEUE, a, id='routeq')
  100. self.server.expect(JOB, {ATTR_queue + '=' + dflt_q: 3}, count=True,
  101. id=jid, extend='t')
  102. def test_t2(self):
  103. """
  104. This test case validates Job array state when running subjobs
  105. are force fully deleted. After deleting the running subjob
  106. Array job is held and released, this should cause job array
  107. state change to Q from B.
  108. """
  109. # Submit a job array of size 3
  110. job = Job()
  111. job.set_attributes({ATTR_l + '.ncpus': 1,
  112. ATTR_J: '1-3',
  113. ATTR_r: 'y'})
  114. job.set_sleep_time(1000)
  115. jid = self.server.submit(job)
  116. self.server.expect(JOB, {ATTR_state + '=Q': 4}, count=True,
  117. id=jid, extend='t')
  118. # Start scheduling cycle. This will move all 3 subjobs to R state.
  119. # And parent job state to B state.
  120. self.server.manager(MGR_CMD_SET, SERVER, {'scheduling': 'true'},
  121. expect=True)
  122. self.server.expect(JOB, {ATTR_state + '=R': 3}, count=True,
  123. id=jid, extend='t')
  124. self.server.expect(JOB, {ATTR_state + '=B': 1}, count=True,
  125. id=jid, extend='t')
  126. # Delete two of the subjobs.
  127. subjobs = self.server.status(JOB, id=jid, extend='t')
  128. self.server.deljob(subjobs[1]['id'])
  129. self.server.deljob(subjobs[2]['id'])
  130. # Mark node offline, and rerun the third job.
  131. self.momA = self.moms.values()[0]
  132. self.hostA = self.momA.shortname
  133. a = {'state': 'offline'}
  134. self.server.manager(MGR_CMD_SET, NODE, a,
  135. id=self.hostA, expect=True)
  136. # Rerun Third job, job will move to Q state.
  137. self.server.rerunjob(subjobs[3]['id'])
  138. self.server.expect(JOB, {ATTR_state + '=Q': 1}, count=True,
  139. id=jid, extend='t')
  140. self.server.expect(JOB, {ATTR_state + '=X': 2}, count=True,
  141. id=jid, extend='t')
  142. self.server.expect(JOB, {ATTR_state + '=B': 1}, count=True,
  143. id=jid, extend='t')
  144. # Hold the job array. Parent job will move to H state.
  145. self.server.holdjob(jid)
  146. self.server.expect(JOB, {ATTR_state + '=H': 1}, count=True,
  147. id=jid, extend='t')
  148. self.server.expect(JOB, {ATTR_state + '=Q': 1}, count=True,
  149. id=jid, extend='t')
  150. self.server.expect(JOB, {ATTR_state + '=X': 2}, count=True,
  151. id=jid, extend='t')
  152. # Release the job and validate array job state.
  153. # Expected parent array job state is Q
  154. self.server.rlsjob(jid, 'u')
  155. self.server.expect(JOB, {ATTR_state + '=Q': 2}, count=True,
  156. id=jid, extend='t')
  157. self.server.expect(JOB, {ATTR_state + '=X': 2}, count=True,
  158. id=jid, extend='t')