pbs_python_restart_settings.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425
  1. # coding: utf-8
  2. # Copyright (C) 1994-2018 Altair Engineering, Inc.
  3. # For more information, contact Altair at www.altair.com.
  4. #
  5. # This file is part of the PBS Professional ("PBS Pro") software.
  6. #
  7. # Open Source License Information:
  8. #
  9. # PBS Pro is free software. You can redistribute it and/or modify it under the
  10. # terms of the GNU Affero General Public License as published by the Free
  11. # Software Foundation, either version 3 of the License, or (at your option) any
  12. # later version.
  13. #
  14. # PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
  15. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  16. # FOR A PARTICULAR PURPOSE.
  17. # See the GNU Affero General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU Affero General Public License
  20. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. #
  22. # Commercial License Information:
  23. #
  24. # For a copy of the commercial license terms and conditions,
  25. # go to: (http://www.pbspro.com/UserArea/agreement.html)
  26. # or contact the Altair Legal Department.
  27. #
  28. # Altair’s dual-license business model allows companies, individuals, and
  29. # organizations to create proprietary derivative works of PBS Pro and
  30. # distribute them - whether embedded or bundled with other software -
  31. # under a commercial license agreement.
  32. #
  33. # Use of Altair’s trademarks, including but not limited to "PBS™",
  34. # "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
  35. # trademark licensing policies.
  36. from tests.functional import *
  37. class TestPythonRestartSettings(TestFunctional):
  38. """
  39. For addressing memory leak in server due to python objects python
  40. interpreter needs to be restarted. Previously there were macros in
  41. code to do that. The new design has added attributes in server to
  42. configure how frequently python interpreter should be restarted
  43. This test suite is to validate the server attributes. Actual memory
  44. leak test is still manual
  45. """
  46. def test_non_integer(self):
  47. """
  48. This is to test that qmgr will throw error when non-integer
  49. values are provided
  50. """
  51. exp_err = "Illegal attribute or resource value"
  52. # -1 will throw error
  53. try:
  54. self.server.manager(MGR_CMD_SET, SERVER,
  55. {'python_restart_max_hooks': '-1'},
  56. runas=ROOT_USER, logerr=True)
  57. except PbsManagerError, e:
  58. self.assertTrue(exp_err in e.msg[0],
  59. "Error message is not expected")
  60. try:
  61. self.server.manager(MGR_CMD_SET, SERVER,
  62. {'python_restart_max_objects': '-1'},
  63. runas=ROOT_USER, logerr=True)
  64. except PbsManagerError, e:
  65. self.assertTrue(exp_err in e.msg[0],
  66. "Error message is not expected")
  67. try:
  68. self.server.manager(MGR_CMD_SET, SERVER,
  69. {'python_restart_min_interval': '-1'},
  70. runas=ROOT_USER, logerr=True)
  71. except PbsManagerError, e:
  72. self.assertTrue(exp_err in e.msg[0],
  73. "Error message is not expected")
  74. # 0 will also give error
  75. try:
  76. self.server.manager(MGR_CMD_SET, SERVER,
  77. {'python_restart_max_hooks': 0},
  78. runas=ROOT_USER, logerr=True)
  79. except PbsManagerError, e:
  80. self.assertTrue(exp_err in e.msg[0],
  81. "Error message is not expected")
  82. try:
  83. self.server.manager(MGR_CMD_SET, SERVER,
  84. {'python_restart_max_objects': 0},
  85. runas=ROOT_USER, logerr=True)
  86. except PbsManagerError, e:
  87. self.assertTrue(exp_err in e.msg[0],
  88. "Error message is not expected")
  89. try:
  90. self.server.manager(MGR_CMD_SET, SERVER,
  91. {'python_restart_min_interval': 0},
  92. runas=ROOT_USER, logerr=True)
  93. except PbsManagerError, e:
  94. self.assertTrue(exp_err in e.msg[0],
  95. "Error message is not expected")
  96. try:
  97. self.server.manager(MGR_CMD_SET, SERVER,
  98. {'python_restart_min_interval': "00:00:00"},
  99. runas=ROOT_USER, logerr=True)
  100. except PbsManagerError, e:
  101. self.assertTrue(exp_err in e.msg[0],
  102. "Error message is not expected")
  103. try:
  104. self.server.manager(MGR_CMD_SET, SERVER,
  105. {'python_restart_min_interval': "HH:MM:SS"},
  106. runas=ROOT_USER, logerr=True)
  107. except PbsManagerError, e:
  108. self.assertTrue(exp_err in e.msg[0],
  109. "Error message is not expected")
  110. def test_non_manager(self):
  111. """
  112. Test that values are not set as operator or users
  113. """
  114. exp_err = "Unauthorized Request"
  115. try:
  116. self.server.manager(MGR_CMD_SET, SERVER,
  117. {'python_restart_max_hooks': 30},
  118. runas=OPER_USER, logerr=True)
  119. except PbsManagerError, e:
  120. self.assertTrue(exp_err in e.msg[0],
  121. "Error message is not expected")
  122. try:
  123. self.server.manager(MGR_CMD_SET, SERVER,
  124. {'python_restart_max_objects': 2000},
  125. runas=OPER_USER, logerr=True)
  126. except PbsManagerError, e:
  127. self.assertTrue(exp_err in e.msg[0],
  128. "Error message is not expected")
  129. try:
  130. self.server.manager(MGR_CMD_SET, SERVER,
  131. {'python_restart_min_interval': 10},
  132. runas=OPER_USER, logerr=True)
  133. except PbsManagerError, e:
  134. self.assertTrue(exp_err in e.msg[0],
  135. "Error message is not expected")
  136. try:
  137. self.server.manager(MGR_CMD_SET, SERVER,
  138. {'python_restart_max_hooks': 30},
  139. runas=TEST_USER, logerr=True)
  140. except PbsManagerError, e:
  141. self.assertTrue(exp_err in e.msg[0],
  142. "Error message is not expected")
  143. try:
  144. self.server.manager(MGR_CMD_SET, SERVER,
  145. {'python_restart_max_objects': 2000},
  146. runas=TEST_USER, logerr=True)
  147. except PbsManagerError, e:
  148. self.assertTrue(exp_err in e.msg[0],
  149. "Error message is not expected")
  150. try:
  151. self.server.manager(MGR_CMD_SET, SERVER,
  152. {'python_restart_min_interval': 10},
  153. runas=TEST_USER, logerr=True)
  154. except PbsManagerError, e:
  155. self.assertTrue(exp_err in e.msg[0],
  156. "Error message is not expected")
  157. def test_log_message(self):
  158. """
  159. Test that message logged in server_logs when values get set
  160. """
  161. self.server.manager(MGR_CMD_SET, SERVER,
  162. {'python_restart_max_hooks': 200},
  163. runas=ROOT_USER, logerr=True)
  164. self.server.log_match("python_restart_max_hooks = 200",
  165. max_attempts=5)
  166. self.server.manager(MGR_CMD_SET, SERVER,
  167. {'python_restart_max_objects': 2000},
  168. runas=ROOT_USER, logerr=True)
  169. self.server.log_match("python_restart_max_objects = 2000",
  170. max_attempts=5)
  171. self.server.manager(MGR_CMD_SET, SERVER,
  172. {'python_restart_min_interval': "00:01:00"},
  173. runas=ROOT_USER, logerr=True)
  174. self.server.log_match("python_restart_min_interval = 00:01:00",
  175. max_attempts=5)
  176. def test_long_values(self):
  177. """
  178. Test that very long values are accepted
  179. """
  180. self.server.manager(MGR_CMD_SET, SERVER,
  181. {'python_restart_max_hooks': 2147483647},
  182. runas=ROOT_USER, logerr=True)
  183. self.server.manager(MGR_CMD_SET, SERVER,
  184. {'python_restart_max_objects': 2147483647},
  185. runas=ROOT_USER, logerr=True)
  186. self.server.manager(MGR_CMD_SET, SERVER,
  187. {'python_restart_min_interval': 2147483647},
  188. runas=ROOT_USER, logerr=True)
  189. self.server.manager(MGR_CMD_SET, SERVER,
  190. {'python_restart_min_interval': "596523:00:00"},
  191. runas=ROOT_USER, logerr=True)
  192. def test_set_unset(self):
  193. """
  194. Test that when unset attribte is not visible in qmgr.
  195. Also values will not change after server restart.
  196. """
  197. self.server.manager(MGR_CMD_SET, SERVER,
  198. {'python_restart_max_hooks': 20},
  199. runas=ROOT_USER, logerr=True)
  200. self.server.manager(MGR_CMD_SET, SERVER,
  201. {'python_restart_max_objects': 20},
  202. runas=ROOT_USER, logerr=True)
  203. self.server.manager(MGR_CMD_SET, SERVER,
  204. {'python_restart_min_interval': "00:00:20"},
  205. runas=ROOT_USER, logerr=True)
  206. # Restart server
  207. self.server.restart()
  208. self.server.expect(SERVER, {'python_restart_max_hooks': 20},
  209. op=SET, runas=ROOT_USER)
  210. self.server.expect(SERVER, {'python_restart_max_objects': 20},
  211. op=SET, runas=ROOT_USER)
  212. self.server.expect(SERVER, {'python_restart_min_interval': 20},
  213. op=SET, runas=ROOT_USER)
  214. self.server.manager(MGR_CMD_UNSET, SERVER,
  215. 'python_restart_max_hooks',
  216. runas=ROOT_USER, logerr=True)
  217. self.server.manager(MGR_CMD_UNSET, SERVER,
  218. 'python_restart_max_objects',
  219. runas=ROOT_USER, logerr=True)
  220. self.server.manager(MGR_CMD_UNSET, SERVER,
  221. 'python_restart_min_interval',
  222. runas=ROOT_USER, logerr=True)
  223. # Restart server again
  224. self.server.restart()
  225. self.server.expect(SERVER, "python_restart_max_hooks",
  226. op=UNSET, runas=ROOT_USER)
  227. self.server.expect(SERVER, "python_restart_max_objects",
  228. op=UNSET, runas=ROOT_USER)
  229. self.server.expect(SERVER, "python_restart_min_interval",
  230. op=UNSET, runas=ROOT_USER)
  231. def test_max_hooks(self):
  232. """
  233. Test that python restarts at set interval
  234. """
  235. # create a hook
  236. hook_body = """
  237. import pbs
  238. e = pbs.event()
  239. s = pbs.server()
  240. localnode = pbs.get_local_nodename()
  241. vn = pbs.server().vnode(localnode)
  242. pbs.event().accept()
  243. """
  244. a = {'event': "queuejob,movejob,modifyjob,runjob", 'enabled': "True"}
  245. self.server.create_import_hook("test", a, hook_body, overwrite=True)
  246. # Create workq2
  247. a = {'queue_type': 'e', 'started': 't', 'enabled': 't'}
  248. self.server.manager(MGR_CMD_CREATE, QUEUE, a, "workq2")
  249. # Set max_hooks and min_interval so that further changes
  250. # will generate a log message.
  251. self.server.manager(MGR_CMD_SET, SERVER,
  252. {'python_restart_max_hooks': 100},
  253. runas=ROOT_USER)
  254. self.server.manager(MGR_CMD_SET, SERVER,
  255. {'python_restart_min_interval': 30},
  256. runas=ROOT_USER)
  257. # Need to run a job so these new settings are remembered
  258. j = Job()
  259. jid = self.server.submit(j)
  260. # Set server log_events
  261. self.server.manager(MGR_CMD_SET, SERVER, {"log_events": 2047})
  262. # Set time to start scanning logs
  263. time.sleep(1)
  264. stime = int(time.time())
  265. # Set max_hooks to low to hit max_hooks only
  266. self.server.manager(MGR_CMD_SET, SERVER,
  267. {'python_restart_max_hooks': 1},
  268. runas=ROOT_USER)
  269. # Set min_interval to 3
  270. self.server.manager(MGR_CMD_SET, SERVER,
  271. {'python_restart_min_interval': 3},
  272. runas=ROOT_USER)
  273. # Submit multiple jobs
  274. for x in range(6):
  275. j = Job()
  276. j.set_attributes({ATTR_h: None})
  277. j.set_sleep_time(1)
  278. jid = self.server.submit(j)
  279. self.server.expect(JOB, {'job_state': "H"}, id=jid)
  280. self.server.alterjob(jid, {ATTR_N: "yaya"})
  281. self.server.movejob(jid, "workq2")
  282. self.server.rlsjob(jid, None)
  283. time.sleep(1)
  284. # Verify the logs and make sure that python interpreter is restarted
  285. # every 3s
  286. logs = self.server.log_match(
  287. "Restarting Python interpreter to reduce mem usage",
  288. allmatch=True, starttime=stime, max_attempts=8)
  289. self.assertTrue(len(logs) > 1)
  290. log1 = logs[0][1]
  291. log2 = logs[1][1]
  292. pattern = '%m/%d/%Y %H:%M:%S'
  293. tmp = log1.split(';')
  294. # Convert the time into epoch time
  295. time1 = int(time.mktime(time.strptime(tmp[0], pattern)))
  296. tmp = log2.split(';')
  297. time2 = int(time.mktime(time.strptime(tmp[0], pattern)))
  298. # Difference between log message should not be less than 3
  299. diff = time2 - time1
  300. self.logger.info("Time difference between log message is " +
  301. str(diff) + " seconds")
  302. # Leave a little wiggle room for slow systems
  303. self.assertTrue(diff >= 3 and diff <= 5)
  304. # This message only gets printed if /proc/self/statm is present
  305. if os.path.isfile("/proc/self/statm"):
  306. self.server.log_match("Current memory usage:",
  307. starttime=self.server.ctime,
  308. max_attempts=5)
  309. else:
  310. self.server.log_match("unknown", max_attempts=5)
  311. # Verify other log messages
  312. self.server.log_match("python_restart_max_hooks is now 1",
  313. starttime=stime, max_attempts=5)
  314. self.server.log_match("python_restart_min_interval is now 3",
  315. starttime=stime, max_attempts=5)
  316. def test_max_objects(self):
  317. """
  318. Test that python restarts if max objects limit have met
  319. """
  320. hook_body = """
  321. import pbs
  322. pbs.event().accept()
  323. """
  324. a = {'event': "queuejob,modifyjob", 'enabled': 'True'}
  325. self.server.create_import_hook("test", a, hook_body, overwrite=True)
  326. # Set max_objects and min_interval so that further changes
  327. # will generate a log message.
  328. self.server.manager(MGR_CMD_SET, SERVER,
  329. {'python_restart_max_objects': 1000},
  330. runas=ROOT_USER)
  331. self.server.manager(MGR_CMD_SET, SERVER,
  332. {'python_restart_min_interval': 30},
  333. runas=ROOT_USER)
  334. # Need to run a job so these new settings are remembered
  335. j = Job()
  336. jid = self.server.submit(j)
  337. # Set server log_events
  338. self.server.manager(MGR_CMD_SET, SERVER, {"log_events": 2047})
  339. # Set time to start scanning logs
  340. time.sleep(1)
  341. stime = int(time.time())
  342. # Set max_objects only
  343. self.server.manager(MGR_CMD_SET, SERVER,
  344. {'python_restart_max_objects': 1},
  345. runas=ROOT_USER)
  346. # Set min_interval to 1
  347. self.server.manager(MGR_CMD_SET, SERVER,
  348. {'python_restart_min_interval': '00:00:01'},
  349. runas=ROOT_USER)
  350. # Submit held jobs
  351. for x in range(3):
  352. j = Job()
  353. j.set_attributes({ATTR_h: None})
  354. j.set_sleep_time(1)
  355. jid = self.server.submit(j)
  356. self.server.expect(JOB, {'job_state': "H"}, id=jid)
  357. self.server.alterjob(jid, {ATTR_N: "yaya"})
  358. # Verify that python is restarted
  359. self.server.log_match(
  360. "Restarting Python interpreter to reduce mem usage",
  361. starttime=self.server.ctime, max_attempts=5)
  362. # This message only gets printed if
  363. # /proc/self/statm presents
  364. if os.path.isfile("/proc/self/statm"):
  365. self.server.log_match(
  366. "Current memory usage:",
  367. starttime=self.server.ctime, max_attempts=5)
  368. else:
  369. self.server.log_match("unknown", max_attempts=5)
  370. # Verify other log messages
  371. self.server.log_match(
  372. "python_restart_max_objects is now 1",
  373. starttime=stime, max_attempts=5)
  374. self.server.log_match(
  375. "python_restart_min_interval is now 1",
  376. starttime=stime, max_attempts=5)
  377. def test_no_restart(self):
  378. """
  379. Test that if limit not reached then python interpreter
  380. will not be started
  381. """
  382. hook_body = """
  383. import pbs
  384. pbs.event().accept()
  385. """
  386. a = {'event': "queuejob", 'enabled': "True"}
  387. self.server.create_import_hook("test", a, hook_body, overwrite=True)
  388. # Set max_hooks, max_objects, and min_interval to large values
  389. # to avoid restarting the Python interpreter.
  390. self.server.manager(MGR_CMD_SET, SERVER,
  391. {'python_restart_max_hooks': 10000},
  392. runas=ROOT_USER)
  393. self.server.manager(MGR_CMD_SET, SERVER,
  394. {'python_restart_max_objects': 10000},
  395. runas=ROOT_USER)
  396. self.server.manager(MGR_CMD_SET, SERVER,
  397. {'python_restart_min_interval': 10000},
  398. runas=ROOT_USER)
  399. stime = time.time()
  400. # Submit a job
  401. for x in range(10):
  402. j = Job()
  403. j.set_sleep_time(1)
  404. jid = self.server.submit(j)
  405. # Verify no restart message
  406. msg = "Restarting Python interpreter to reduce mem usage"
  407. self.server.log_match(msg, starttime=stime, max_attempts=8,
  408. existence=False)