pbs_svr_dyn_res.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
  1. # coding: utf-8
  2. # Copyright (C) 1994-2018 Altair Engineering, Inc.
  3. # For more information, contact Altair at www.altair.com.
  4. #
  5. # This file is part of the PBS Professional ("PBS Pro") software.
  6. #
  7. # Open Source License Information:
  8. #
  9. # PBS Pro is free software. You can redistribute it and/or modify it under the
  10. # terms of the GNU Affero General Public License as published by the Free
  11. # Software Foundation, either version 3 of the License, or (at your option) any
  12. # later version.
  13. #
  14. # PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
  15. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  16. # FOR A PARTICULAR PURPOSE.
  17. # See the GNU Affero General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU Affero General Public License
  20. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. #
  22. # Commercial License Information:
  23. #
  24. # For a copy of the commercial license terms and conditions,
  25. # go to: (http://www.pbspro.com/UserArea/agreement.html)
  26. # or contact the Altair Legal Department.
  27. #
  28. # Altair’s dual-license business model allows companies, individuals, and
  29. # organizations to create proprietary derivative works of PBS Pro and
  30. # distribute them - whether embedded or bundled with other software -
  31. # under a commercial license agreement.
  32. #
  33. # Use of Altair’s trademarks, including but not limited to "PBS™",
  34. # "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
  35. # trademark licensing policies.
  36. from tests.functional import *
  37. from ptl.lib.pbs_ifl_mock import *
  38. class TestServerDynRes(TestFunctional):
  39. def setUp(self):
  40. TestFunctional.setUp(self)
  41. # Setup node
  42. a = {'resources_available.ncpus': 4}
  43. self.server.manager(MGR_CMD_SET, NODE, a,
  44. id=self.mom.shortname, expect=True)
  45. def setup_dyn_res(self, resname, restype, resval):
  46. """
  47. Helper function to setup server dynamic resources
  48. """
  49. self.server.manager(MGR_CMD_SET, SERVER, {'scheduling': 'False'})
  50. for i in resname:
  51. attr = {"type": restype[0]}
  52. self.server.manager(MGR_CMD_CREATE, RSC, attr, id=i, expect=True)
  53. # Add resource to sched_config's 'resources' line
  54. self.scheduler.add_resource(i)
  55. # Add server_dyn_res entry in sched_config
  56. if len(resval) > 1: # Mutliple resources
  57. # To create multiple server dynamic resources in sched_config
  58. # from PTL, a list containing "resource !<script>" should be
  59. # supplied as value to the key 'server_dyn_res' when calling
  60. # set_sched_config().
  61. # But this workaround works only if sched_config already has a
  62. # server_dyn_res entry.
  63. # HACK: So adding a single resource first and then the list.
  64. # There wouldn't be any duplicate entries though.
  65. a = {'server_dyn_res': resval[0]}
  66. self.scheduler.set_sched_config(a)
  67. a = {'server_dyn_res': resval}
  68. else:
  69. a = {'server_dyn_res': resval[0]}
  70. self.scheduler.set_sched_config(a)
  71. # The server dynamic resource script gets executed for every
  72. # scheduling cycle
  73. self.server.manager(MGR_CMD_SET, SERVER, {'scheduling': 'True'})
  74. def test_invalid_script_out(self):
  75. """
  76. Test that the scheduler handles incorrect output from server_dyn_res
  77. script correctly
  78. """
  79. # Create a server_dyn_res of type long
  80. resname = ["mybadres"]
  81. restype = ["long"]
  82. script_body = "echo abc"
  83. fn = self.du.create_temp_file(prefix="PtlPbs_badoutfile",
  84. body=script_body)
  85. self.du.chmod(path=fn, mode=0755, sudo=True)
  86. resval = ['"' + resname[0] + ' ' + '!' + fn + '"']
  87. # Add it as a server_dyn_res that returns a string output
  88. self.setup_dyn_res(resname, restype, resval)
  89. # Submit a job
  90. j = Job(TEST_USER)
  91. jid = self.server.submit(j)
  92. # Make sure that "Problem with creating server data structure"
  93. # is not logged in sched_logs
  94. self.scheduler.log_match("Problem with creating server data structure",
  95. existence=False, max_attempts=10)
  96. # Also check that "<script> returned bad output"
  97. # is in the logs
  98. self.scheduler.log_match("%s returned bad output" % (fn))
  99. # The scheduler uses 0 as the available amount of the dynamic resource
  100. # if the server_dyn_res script output is bad
  101. # So, submit a job that requests 1 of the resource
  102. attr = {"Resource_List." + resname[0]: 1}
  103. # Submit job
  104. j = Job(TEST_USER, attrs=attr)
  105. jid = self.server.submit(j)
  106. # The job shouldn't run
  107. self.server.expect(JOB, {'job_state': 'Q'}, id=jid)
  108. # Check for the expected log message for insufficient resources
  109. self.scheduler.log_match(
  110. "Insufficient amount of server resource: %s (R: 1 A: 0 T: 0)"
  111. % (resname[0]))
  112. def test_res_long_pos(self):
  113. """
  114. Test that server_dyn_res accepts command line arguments to the
  115. commands it runs. Resource value set to a positive long int.
  116. """
  117. # Create a resource of type long. positive value
  118. resname = ["foobar"]
  119. restype = ["long"]
  120. resval = ['"' + resname[0] + ' ' + '!/bin/echo 4' + '"']
  121. # Add server_dyn_res entry in sched_config
  122. self.setup_dyn_res(resname, restype, resval)
  123. a = {'Resource_List.foobar': 4}
  124. # Submit job
  125. j = Job(TEST_USER, attrs=a)
  126. jid = self.server.submit(j)
  127. # Job must run successfully
  128. a = {'job_state': 'R', 'Resource_List.foobar': '4'}
  129. self.server.expect(JOB, a, id=jid)
  130. def test_res_long_neg(self):
  131. """
  132. Test that server_dyn_res accepts command line arguments to the
  133. commands it runs. Resource value set to a negative long int.
  134. """
  135. # Create a resource of type long. negative value
  136. resname = ["foobar"]
  137. restype = ["long"]
  138. resval = ['"' + resname[0] + ' ' + '!/bin/echo -1' + '"']
  139. # Add server_dyn_res entry in sched_config
  140. self.setup_dyn_res(resname, restype, resval)
  141. # Submit job
  142. a = {'Resource_List.foobar': '1'}
  143. # Submit job
  144. j = Job(TEST_USER, attrs=a)
  145. jid = self.server.submit(j)
  146. # Check for the expected log message for insufficient resources
  147. job_comment = "Can Never Run: Insufficient amount of server resource:"
  148. job_comment += " foobar (R: 1 A: -1 T: -1)"
  149. # The job shouldn't run
  150. a = {'job_state': 'Q', 'comment': job_comment}
  151. self.server.expect(JOB, a, id=jid, attrop=PTL_AND)
  152. def test_res_whitespace(self):
  153. """
  154. Test for parse errors when more than one white space
  155. is added between the resource name and the !<script> in a
  156. server_dyn_res line. There shouldn't be any errors.
  157. """
  158. # Create a resource of type long
  159. resname = ["foo"]
  160. restype = ["long"]
  161. # Prep for server_dyn_resource scripts. Script "PbsPtl_get_foo*"
  162. # generates file "PbsPtl_got_foo" and returns 1.
  163. script_body = "echo get_foo > /tmp/PtlPbs_got_foo; echo 1"
  164. fpath_out = os.path.join(os.sep, "tmp", "PtlPbs_got_foo")
  165. fn_in = self.du.create_temp_file(prefix="PtlPbs_get_foo",
  166. body=script_body)
  167. self.du.chmod(path=fn_in, mode=0755, sudo=True)
  168. # Add additional white space between resource name and the script
  169. resval = ['"' + resname[0] + ' ' + ' !' + fn_in + '"']
  170. self.setup_dyn_res(resname, restype, resval)
  171. # Check if the file "PbsPtl_got_foo" was created
  172. for _ in range(10):
  173. self.logger.info("Waiting for the file [%s] to appear",
  174. fpath_out)
  175. if self.du.isfile(path=fpath_out):
  176. break
  177. time.sleep(1)
  178. self.assertTrue(self.du.isfile(path=fpath_out))
  179. # Submit job
  180. a = {'Resource_List.foo': '1'}
  181. # Submit job
  182. j = Job(TEST_USER, attrs=a)
  183. jid = self.server.submit(j)
  184. # Job must run successfully
  185. a = {'job_state': 'R', 'Resource_List.foo': 1}
  186. self.server.expect(JOB, a, id=jid)
  187. def test_multiple_res(self):
  188. """
  189. Test multiple dynamic resources specified in resourcedef
  190. and sched_config
  191. """
  192. # Create resources of type long
  193. resname = ["foobar_small", "foobar_medium", "foobar_large"]
  194. restype = ["long", "long", "long"]
  195. # Prep for server_dyn_resource scripts.
  196. script_body_s = "echo 8"
  197. script_body_m = "echo 12"
  198. script_body_l = "echo 20"
  199. fn_s = self.du.create_temp_file(prefix="PtlPbs_small",
  200. suffix=".scr",
  201. body=script_body_s)
  202. fn_m = self.du.create_temp_file(prefix="PtlPbs_medium",
  203. suffix=".scr",
  204. body=script_body_m)
  205. fn_l = self.du.create_temp_file(prefix="PtlPbs_large",
  206. suffix=".scr",
  207. body=script_body_l)
  208. self.du.chmod(path=fn_s, mode=0755, sudo=True)
  209. self.du.chmod(path=fn_m, mode=0755, sudo=True)
  210. self.du.chmod(path=fn_l, mode=0755, sudo=True)
  211. resval = ['"' + resname[0] + ' ' + '!' + fn_s + '"',
  212. '"' + resname[1] + ' ' + '!' + fn_m + '"',
  213. '"' + resname[2] + ' ' + '!' + fn_l + '"']
  214. self.setup_dyn_res(resname, restype, resval)
  215. a = {'Resource_List.foobar_small': '4'}
  216. # Submit job
  217. j = Job(TEST_USER, attrs=a)
  218. jid = self.server.submit(j)
  219. # Job must run successfully
  220. a = {'job_state': 'R', 'Resource_List.foobar_small': 4}
  221. self.server.expect(JOB, a, id=jid)
  222. a = {'Resource_List.foobar_medium': '10'}
  223. # Submit job
  224. j = Job(TEST_USER, attrs=a)
  225. jid = self.server.submit(j)
  226. # Job must run successfully
  227. a = {'job_state': 'R', 'Resource_List.foobar_medium': 10}
  228. self.server.expect(JOB, a, id=jid)
  229. a = {'Resource_List.foobar_large': '18'}
  230. # Submit job
  231. j = Job(TEST_USER, attrs=a)
  232. jid = self.server.submit(j)
  233. # Job must run successfully
  234. a = {'job_state': 'R', 'Resource_List.foobar_large': 18}
  235. self.server.expect(JOB, a, id=jid)
  236. def test_res_string(self):
  237. """
  238. Test that server_dyn_res accepts a string value returned
  239. by a script
  240. """
  241. # Create a resource of type string
  242. resname = ["foobar"]
  243. restype = ["string"]
  244. # Prep for server_dyn_resource script
  245. script_body = "echo abc"
  246. fn = self.du.create_temp_file(prefix="PtlPbs_check",
  247. suffix=".scr",
  248. body=script_body)
  249. self.du.chmod(path=fn, mode=0755, sudo=True)
  250. resval = ['"' + resname[0] + ' ' + '!' + fn + '"']
  251. self.setup_dyn_res(resname, restype, resval)
  252. # Submit job
  253. a = {'Resource_List.foobar': 'abc'}
  254. j = Job(TEST_USER, attrs=a)
  255. jid = self.server.submit(j)
  256. # Job must run successfully
  257. a = {'job_state': 'R', 'Resource_List.foobar': 'abc'}
  258. self.server.expect(JOB, a, id=jid)
  259. # Submit job
  260. a = {'Resource_List.foobar': 'xyz'}
  261. j = Job(TEST_USER, attrs=a)
  262. jid = self.server.submit(j)
  263. # Check for the expected log message for insufficient resources
  264. job_comment = "Can Never Run: Insufficient amount of server resource:"
  265. job_comment += " foobar (xyz != abc)"
  266. # The job shouldn't run
  267. a = {'job_state': 'Q', 'comment': job_comment}
  268. self.server.expect(JOB, a, id=jid, attrop=PTL_AND)
  269. def test_res_string_array(self):
  270. """
  271. Test that server_dyn_res accepts string array returned
  272. by a script
  273. """
  274. # Create a resource of type string_array
  275. resname = ["foobar"]
  276. restype = ["string_array"]
  277. # Prep for server_dyn_resource script
  278. script_body = "echo white, red, blue"
  279. fn = self.du.create_temp_file(prefix="PtlPbs_color",
  280. suffix=".scr",
  281. body=script_body)
  282. self.du.chmod(path=fn, mode=0755, sudo=True)
  283. resval = ['"' + resname[0] + ' ' + '!' + fn + '"']
  284. self.setup_dyn_res(resname, restype, resval)
  285. # Submit job
  286. a = {'Resource_List.foobar': 'red'}
  287. j = Job(TEST_USER, attrs=a)
  288. jid = self.server.submit(j)
  289. # Job must run successfully
  290. a = {'job_state': 'R', 'Resource_List.foobar': 'red'}
  291. self.server.expect(JOB, a, id=jid)
  292. # Submit job
  293. a = {'Resource_List.foobar': 'green'}
  294. j = Job(TEST_USER, attrs=a)
  295. jid = self.server.submit(j)
  296. # Check for the expected log message for insufficient resources
  297. job_comment = "Can Never Run: Insufficient amount of server resource:"
  298. job_comment += " foobar (green != white,red,blue)"
  299. # The job shouldn't run
  300. a = {'job_state': 'Q', 'comment': job_comment}
  301. self.server.expect(JOB, a, id=jid, attrop=PTL_AND)
  302. def test_res_size(self):
  303. """
  304. Test that server_dyn_res accepts type "size" and a "value"
  305. returned by a script
  306. """
  307. # Create a resource of type size
  308. resname = ["foobar"]
  309. restype = ["size"]
  310. # Prep for server_dyn_resource script
  311. script_body = "echo 100gb"
  312. fn = self.du.create_temp_file(prefix="PtlPbs_size",
  313. suffix=".scr",
  314. body=script_body)
  315. self.du.chmod(path=fn, mode=0755, sudo=True)
  316. resval = ['"' + resname[0] + ' ' + '!' + fn + '"']
  317. self.setup_dyn_res(resname, restype, resval)
  318. # Submit job
  319. a = {'Resource_List.foobar': '95gb'}
  320. j1 = Job(TEST_USER, attrs=a)
  321. jid1 = self.server.submit(j1)
  322. # Job must run successfully
  323. a = {'job_state': 'R', 'Resource_List.foobar': '95gb'}
  324. self.server.expect(JOB, a, id=jid1)
  325. # Submit job
  326. a = {'Resource_List.foobar': '101gb'}
  327. j2 = Job(TEST_USER, attrs=a)
  328. jid2 = self.server.submit(j2)
  329. # Check for the expected log message for insufficient resources
  330. job_comment = "Can Never Run: Insufficient amount of server resource:"
  331. job_comment += " foobar (R: 101gb A: 100gb T: 100gb)"
  332. # The job shouldn't run
  333. a = {'job_state': 'Q', 'comment': job_comment}
  334. self.server.expect(JOB, a, id=jid2, attrop=PTL_AND)
  335. # Delete jobs
  336. self.server.deljob(jid1, wait=True, runas=TEST_USER)
  337. self.server.deljob(jid2, wait=True, runas=TEST_USER)
  338. # Submit jobs again
  339. a = {'Resource_List.foobar': '50gb'}
  340. j1 = Job(TEST_USER, attrs=a)
  341. jid1 = self.server.submit(j1)
  342. a = {'Resource_List.foobar': '50gb'}
  343. j2 = Job(TEST_USER, attrs=a)
  344. jid2 = self.server.submit(j2)
  345. # Both jobs must run successfully
  346. a = {'job_state': 'R', 'Resource_List.foobar': '50gb'}
  347. self.server.expect(JOB, a, id=jid1)
  348. self.server.expect(JOB, a, id=jid2)
  349. def test_res_size_runtime(self):
  350. """
  351. Test that server_dyn_res accepts type "size" and a "value"
  352. returned by a script. Check if the script change during
  353. job run is correctly considered
  354. """
  355. # Create a resource of type size
  356. resname = ["foobar"]
  357. restype = ["size"]
  358. # Prep for server_dyn_resource script
  359. script_body = "echo 100gb"
  360. fn = self.du.create_temp_file(prefix="PtlPbs_size",
  361. suffix=".scr",
  362. body=script_body)
  363. self.du.chmod(path=fn, mode=0755, sudo=True)
  364. resval = ['"' + resname[0] + ' ' + '!' + fn + '"']
  365. self.setup_dyn_res(resname, restype, resval)
  366. # Submit job
  367. a = {'Resource_List.foobar': '95gb'}
  368. j = Job(TEST_USER, attrs=a)
  369. jid = self.server.submit(j)
  370. # Job must run successfully
  371. a = {'job_state': 'R', 'Resource_List.foobar': '95gb'}
  372. self.server.expect(JOB, a, id=jid)
  373. # Change script during job run
  374. with open(fn, "rw+") as fd:
  375. fd.truncate()
  376. fd.write("echo 50gb")
  377. # Rerun job
  378. self.server.rerunjob(jid)
  379. # The job shouldn't run
  380. job_comment = "Can Never Run: Insufficient amount of server resource:"
  381. job_comment += " foobar (R: 95gb A: 50gb T: 50gb)"
  382. a = {'job_state': 'Q', 'comment': job_comment}
  383. self.server.expect(JOB, a, id=jid, attrop=PTL_AND)
  384. def test_res_size_invalid_input(self):
  385. """
  386. Test invalid values returned from server_dyn_resource
  387. script for resource type 'size'.
  388. Script returns a 'string' instead of type 'size'.
  389. """
  390. # Create a resource of type size
  391. resname = ["foobar"]
  392. restype = ["size"]
  393. # Script returns invalid value for resource type 'size'
  394. script_body = "echo two gb"
  395. fn = self.du.create_temp_file(prefix="PtlPbs_size",
  396. suffix=".scr",
  397. body=script_body)
  398. self.du.chmod(path=fn, mode=0755, sudo=True)
  399. resval = ['"' + resname[0] + ' ' + '!' + fn + '"']
  400. self.setup_dyn_res(resname, restype, resval)
  401. # Submit job
  402. a = {'Resource_List.foobar': '2gb'}
  403. j = Job(TEST_USER, attrs=a)
  404. jid = self.server.submit(j)
  405. # Also check that "<script> returned bad output"
  406. # is in the logs
  407. self.scheduler.log_match("%s returned bad output" % (fn))
  408. # The job shouldn't run
  409. job_comment = "Can Never Run: Insufficient amount of server resource:"
  410. job_comment += " foobar (R: 2gb A: 0kb T: 0kb)"
  411. a = {'job_state': 'Q', 'comment': job_comment}
  412. self.server.expect(JOB, a, id=jid, attrop=PTL_AND)
  413. def test_res_float_invalid_input(self):
  414. """
  415. Test invalid values returned from server_dyn_resource
  416. script for resource type 'float'
  417. Script returns 'string' instead of type 'float'.
  418. """
  419. # Create a resource of type float
  420. resname = ["foo"]
  421. restype = ["float"]
  422. # Prep for server_dyn_resource script
  423. script_body = "echo abc"
  424. fn = self.du.create_temp_file(prefix="PtlPbs_float",
  425. suffix=".scr",
  426. body=script_body)
  427. self.du.chmod(path=fn, mode=0755, sudo=True)
  428. resval = ['"' + resname[0] + ' ' + '!' + fn + '"']
  429. self.setup_dyn_res(resname, restype, resval)
  430. # Submit job
  431. a = {'Resource_List.foo': '1.2'}
  432. j = Job(TEST_USER, attrs=a)
  433. jid = self.server.submit(j)
  434. # Also check that "<script> returned bad output"
  435. # is in the logs
  436. self.scheduler.log_match("%s returned bad output" % (fn))
  437. # The job shouldn't run
  438. job_comment = "Can Never Run: Insufficient amount of server resource:"
  439. job_comment += " foo (R: 1.2 A: 0 T: 0)"
  440. a = {'job_state': 'Q', 'comment': job_comment}
  441. self.server.expect(JOB, a, id=jid, attrop=PTL_AND)
  442. def test_res_boolean_invalid_input(self):
  443. """
  444. Test invalid values returned from server_dyn_resource
  445. script for resource type 'boolean'.
  446. Script returns 'non boolean' values
  447. """
  448. # Create a resource of type boolean
  449. resname = ["foo"]
  450. restype = ["boolean"]
  451. # Prep for server_dyn_resource script
  452. script_body = "echo yes"
  453. fn = self.du.create_temp_file(prefix="PtlPbs_bool",
  454. suffix=".scr",
  455. body=script_body)
  456. self.du.chmod(path=fn, mode=0755, sudo=True)
  457. resval = ['"' + resname[0] + ' ' + '!' + fn + '"']
  458. self.setup_dyn_res(resname, restype, resval)
  459. # Submit job
  460. a = {'Resource_List.foo': '"true"'}
  461. j = Job(TEST_USER, attrs=a)
  462. jid = self.server.submit(j)
  463. # Also check that "<script> returned bad output"
  464. # is in the logs
  465. self.scheduler.log_match("%s returned bad output" % (fn))
  466. # The job shouldn't run
  467. job_comment = "Can Never Run: Insufficient amount of server resource:"
  468. job_comment += " foo (True != False)"
  469. a = {'job_state': 'Q', 'comment': job_comment}
  470. self.server.expect(JOB, a, id=jid, attrop=PTL_AND)