pbs_equiv_classes_perf.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. # coding: utf-8
  2. # Copyright (C) 1994-2018 Altair Engineering, Inc.
  3. # For more information, contact Altair at www.altair.com.
  4. #
  5. # This file is part of the PBS Professional ("PBS Pro") software.
  6. #
  7. # Open Source License Information:
  8. #
  9. # PBS Pro is free software. You can redistribute it and/or modify it under the
  10. # terms of the GNU Affero General Public License as published by the Free
  11. # Software Foundation, either version 3 of the License, or (at your option) any
  12. # later version.
  13. #
  14. # PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
  15. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  16. # FOR A PARTICULAR PURPOSE.
  17. # See the GNU Affero General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU Affero General Public License
  20. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. #
  22. # Commercial License Information:
  23. #
  24. # For a copy of the commercial license terms and conditions,
  25. # go to: (http://www.pbspro.com/UserArea/agreement.html)
  26. # or contact the Altair Legal Department.
  27. #
  28. # Altair’s dual-license business model allows companies, individuals, and
  29. # organizations to create proprietary derivative works of PBS Pro and
  30. # distribute them - whether embedded or bundled with other software -
  31. # under a commercial license agreement.
  32. #
  33. # Use of Altair’s trademarks, including but not limited to "PBS™",
  34. # "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
  35. # trademark licensing policies.
  36. import os
  37. from tests.performance import *
  38. class TestJobEquivClassPerf(TestPerformance):
  39. """
  40. Test job equivalence class performance
  41. """
  42. def setUp(self):
  43. TestPerformance.setUp(self)
  44. self.scheduler.set_sched_config({'log_filter': 2048})
  45. # Create vnodes
  46. a = {'resources_available.ncpus': 1, 'resources_available.mem': '8gb'}
  47. self.server.create_vnodes('vnode', a, 10000, self.mom,
  48. sharednode=False)
  49. def run_n_get_cycle_time(self):
  50. """
  51. Run a scheduling cycle and calculate its duration
  52. """
  53. t = int(time.time())
  54. # Run only one cycle
  55. self.server.manager(MGR_CMD_SET, MGR_OBJ_SERVER,
  56. {'scheduling': 'True'})
  57. self.server.manager(MGR_CMD_SET, MGR_OBJ_SERVER,
  58. {'scheduling': 'False'})
  59. # Wait for cycle to finish
  60. self.scheduler.log_match("Leaving Scheduling Cycle", starttime=t,
  61. max_attempts=300, interval=3)
  62. c = self.scheduler.cycles(lastN=1)[0]
  63. cycle_time = c.end - c.start
  64. return cycle_time
  65. @timeout(2000)
  66. def test_basic(self):
  67. """
  68. Test basic functionality of job equivalence classes.
  69. Pre test: one class per job
  70. Post test: one class for all jobs
  71. """
  72. self.server.manager(MGR_CMD_SET, MGR_OBJ_SERVER,
  73. {'scheduling': 'False'})
  74. num_jobs = 5000
  75. jids = []
  76. # Create num_jobs different equivalence classes. These jobs can't run
  77. # because there aren't 2cpu nodes. This bypasses the quick
  78. # 'can I run?' check the scheduler does. It will better show the
  79. # equivalence class performance.
  80. for n in range(num_jobs):
  81. a = {'Resource_List.select': str(n + 1) + ':ncpus=2',
  82. "Resource_List.place": "free"}
  83. J = Job(TEST_USER, attrs=a)
  84. jid = self.server.submit(J)
  85. jids += [jid]
  86. cycle1_time = self.run_n_get_cycle_time()
  87. # Make all jobs into one equivalence class
  88. a = {'Resource_List.select': str(num_jobs) + ":ncpus=2",
  89. "Resource_List.place": "free"}
  90. for n in range(num_jobs):
  91. self.server.alterjob(jids[n], a)
  92. cycle2_time = self.run_n_get_cycle_time()
  93. self.logger.info('Cycle 1: %d Cycle 2: %d Cycle time difference: %d' %
  94. (cycle1_time, cycle2_time, cycle1_time - cycle2_time))
  95. self.assertGreaterEqual(cycle1_time, cycle2_time)
  96. @timeout(10000)
  97. def test_server_queue_limit(self):
  98. """
  99. Test the performance with hard and soft limits
  100. on resources
  101. """
  102. # Create workq2
  103. self.server.manager(MGR_CMD_CREATE, QUEUE,
  104. {'queue_type': 'e', 'started': 'True',
  105. 'enabled': 'True'}, id='workq2')
  106. # Set queue limit
  107. a = {
  108. 'max_run': '[o:PBS_ALL=100],[g:PBS_GENERIC=20],\
  109. [u:PBS_GENERIC=20],[g:tstgrp01 = 8],[u:pbsuser1=10]'}
  110. self.server.manager(MGR_CMD_SET, QUEUE,
  111. a, id='workq2')
  112. a = {'max_run_res.ncpus':
  113. '[o:PBS_ALL=100],[g:PBS_GENERIC=50],\
  114. [u:PBS_GENERIC=20],[g:tstgrp01=13],[u:pbsuser1=12]'}
  115. self.server.manager(MGR_CMD_SET, QUEUE, a, id='workq2')
  116. a = {'max_run_res_soft.ncpus':
  117. '[o:PBS_ALL=100],[g:PBS_GENERIC=30],\
  118. [u:PBS_GENERIC=10],[g:tstgrp01=10],[u:pbsuser1=10]'}
  119. self.server.manager(MGR_CMD_SET, QUEUE, a, id='workq2')
  120. # Set server limits
  121. a = {
  122. 'max_run': '[o:PBS_ALL=100],[g:PBS_GENERIC=50],\
  123. [u:PBS_GENERIC=20],[g:tstgrp01=13],[u:pbsuser1=13]'}
  124. self.server.manager(MGR_CMD_SET, SERVER, a)
  125. a = {'max_run_soft':
  126. '[o:PBS_ALL=50],[g:PBS_GENERIC=25],[u:PBS_GENERIC=10],\
  127. [g:tstgrp01=10],[u:pbsuser1=10]'}
  128. self.server.manager(MGR_CMD_SET, SERVER, a)
  129. # Turn scheduling off
  130. self.server.manager(MGR_CMD_SET, SERVER, {'scheduling': 'false'})
  131. # Submit jobs as pbsuser1 from group tstgrp01 in workq2
  132. for x in range(100):
  133. a = {'Resource_List.select': '1:ncpus=2',
  134. 'Resource_List.walltime': int(x),
  135. 'group_list': TSTGRP1, ATTR_q: 'workq2'}
  136. J = Job(TEST_USER1, attrs=a)
  137. for y in range(100):
  138. self.server.submit(J)
  139. # Get time for ~100 classes
  140. cyc1 = self.run_n_get_cycle_time()
  141. # Submit jobs as pbsuser1 from group tstgrp02 in workq2
  142. for x in range(100):
  143. a = {'Resource_List.select': '1:ncpus=2',
  144. 'Resource_List.walltime': int(x),
  145. 'group_list': TSTGRP2, ATTR_q: 'workq2'}
  146. J = Job(TEST_USER1, attrs=a)
  147. for y in range(100):
  148. self.server.submit(J)
  149. # Get time for ~200 classes
  150. cyc2 = self.run_n_get_cycle_time()
  151. # Submit jobs as pbsuser2 from tstgrp01 in workq2
  152. for x in range(100):
  153. a = {'Resource_List.select': '1:ncpus=2',
  154. 'Resource_List.walltime': int(x),
  155. 'group_list': TSTGRP1, ATTR_q: 'workq2'}
  156. J = Job(TEST_USER2, attrs=a)
  157. for y in range(100):
  158. self.server.submit(J)
  159. # Get time for ~300 classes
  160. cyc3 = self.run_n_get_cycle_time()
  161. # Submit jobs as pbsuser2 from tstgrp03 in workq2
  162. for x in range(100):
  163. a = {'Resource_List.select': '1:ncpus=2',
  164. 'Resource_List.walltime': int(x),
  165. 'group_list': TSTGRP3, ATTR_q: 'workq2'}
  166. J = Job(TEST_USER2, attrs=a)
  167. for y in range(100):
  168. self.server.submit(J)
  169. # Get time for ~400 classes
  170. cyc4 = self.run_n_get_cycle_time()
  171. # Submit jobs as pbsuser1 from tstgrp01 in workq
  172. for x in range(100):
  173. a = {'Resource_List.select': '1:ncpus=2',
  174. 'Resource_List.walltime': int(x),
  175. 'group_list': TSTGRP1, ATTR_q: 'workq'}
  176. J = Job(TEST_USER1, attrs=a)
  177. for y in range(100):
  178. self.server.submit(J)
  179. # Get time for ~500 classes
  180. cyc5 = self.run_n_get_cycle_time()
  181. # Submit jobs as pbsuser1 from tstgrp02 in workq
  182. for x in range(100):
  183. a = {'Resource_List.select': '1:ncpus=2',
  184. 'Resource_List.walltime': int(x),
  185. 'group_list': TSTGRP2, ATTR_q: 'workq'}
  186. J = Job(TEST_USER1, attrs=a)
  187. for y in range(100):
  188. self.server.submit(J)
  189. # Get time for 60k jobs for ~600 classes
  190. cyc6 = self.run_n_get_cycle_time()
  191. # Submit jobs as pbsuser2 from tstgrp01 in workq
  192. for x in range(100):
  193. a = {'Resource_List.select': '1:ncpus=2',
  194. 'Resource_List.walltime': int(x),
  195. 'group_list': TSTGRP1, ATTR_q: 'workq'}
  196. J = Job(TEST_USER2, attrs=a)
  197. for y in range(100):
  198. self.server.submit(J)
  199. # Get time for 70k jobs for ~700 classes
  200. cyc7 = self.run_n_get_cycle_time()
  201. # Submit jobs as pbsuser2 from tstgrp03 in workq
  202. for x in range(100):
  203. a = {'Resource_List.select': '1:ncpus=2',
  204. 'Resource_List.walltime': int(x),
  205. 'group_list': TSTGRP3, ATTR_q: 'workq'}
  206. J = Job(TEST_USER2, attrs=a)
  207. for y in range(100):
  208. self.server.submit(J)
  209. # Get time for 80k jobs for ~800 classes
  210. cyc8 = self.run_n_get_cycle_time()
  211. # Print the time taken for all the classes and compare
  212. # it against previous releases
  213. self.logger.info("time taken for \n100 classes is %d"
  214. "\n200 classes is %d,"
  215. "\n300 classes is %d,"
  216. "\n400 classes is %d,"
  217. "\n500 classes is %d,"
  218. "\n600 classes is %d,"
  219. "\n700 classes is %d,"
  220. "\n800 classes is %d"
  221. % (cyc1, cyc2, cyc3, cyc4, cyc5, cyc6, cyc7, cyc8))