pbs_hook_debug_nocrash.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. # coding: utf-8
  2. # Copyright (C) 1994-2018 Altair Engineering, Inc.
  3. # For more information, contact Altair at www.altair.com.
  4. #
  5. # This file is part of the PBS Professional ("PBS Pro") software.
  6. #
  7. # Open Source License Information:
  8. #
  9. # PBS Pro is free software. You can redistribute it and/or modify it under the
  10. # terms of the GNU Affero General Public License as published by the Free
  11. # Software Foundation, either version 3 of the License, or (at your option) any
  12. # later version.
  13. #
  14. # PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
  15. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  16. # FOR A PARTICULAR PURPOSE.
  17. # See the GNU Affero General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU Affero General Public License
  20. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. #
  22. # Commercial License Information:
  23. #
  24. # For a copy of the commercial license terms and conditions,
  25. # go to: (http://www.pbspro.com/UserArea/agreement.html)
  26. # or contact the Altair Legal Department.
  27. #
  28. # Altair’s dual-license business model allows companies, individuals, and
  29. # organizations to create proprietary derivative works of PBS Pro and
  30. # distribute them - whether embedded or bundled with other software -
  31. # under a commercial license agreement.
  32. #
  33. # Use of Altair’s trademarks, including but not limited to "PBS™",
  34. # "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
  35. # trademark licensing policies.
  36. from tests.functional import *
  37. class TestHookDebugNoCrash(TestFunctional):
  38. """
  39. This tests to make sure the following does not occur:
  40. Hook debug causes file descriptor leak that crashes PBS server
  41. PRE: Have 3 queuejob hooks, qjob1, qjob2, qjob3 with order=1, order=2,
  42. order=2 respectively. qjob1 and qjob2 have debug=True while
  43. qjob3 has debug=False. Try submitting 1000 jobs.
  44. POST: On a fixed PBS, this test case will run to completion.
  45. On a PBS containing the bug, the test could fail on a server crash,
  46. a failure in qsub with "Invalid credential", or even a qstat
  47. hang with ptl returning:
  48. corretja: /opt/pbs/bin/qstat -f 4833.corretja
  49. 2016-07-08 12:56:52,799 INFO TIMEDOUT
  50. and server_logs having the message "Too many open files".
  51. This is because a previous bug causes pbs_server to not close the
  52. debug output file descriptors opened by subsequent hook executions.
  53. NOTE: This is assuming on one's local system, we have the
  54. follwoing limit:
  55. # ulimit -a
  56. ...
  57. open files (-n) 1024
  58. """
  59. # Class variables
  60. open_files_limit_expected = 1024
  61. def setUp(self):
  62. ret = self.du.run_cmd(
  63. self.server.hostname, [
  64. 'ulimit', '-n'], as_script=True, logerr=False)
  65. self.assertEqual(ret['rc'], 0)
  66. open_files_limit = ret['out'][0]
  67. if (open_files_limit == "unlimited") or (
  68. int(open_files_limit) > self.open_files_limit_expected):
  69. msg = "\n'This test requires 'open files' system limit"
  70. msg += " to be <= %d " % self.open_files_limit_expected
  71. msg += "(current value=%s)." % open_files_limit
  72. self.skipTest(msg)
  73. TestFunctional.setUp(self)
  74. @timeout(400)
  75. def test_hook_debug_no_crash(self):
  76. hook_body = """
  77. import pbs
  78. e=pbs.event()
  79. pbs.logmsg(pbs.LOG_DEBUG, "hook %s executed" % (e.hook_name,))
  80. """
  81. hook_name = "qjob1"
  82. a = {
  83. 'event': "queuejob",
  84. 'enabled': 'True',
  85. 'debug': 'True',
  86. 'order': 1}
  87. rv = self.server.create_import_hook(
  88. hook_name,
  89. a,
  90. hook_body,
  91. overwrite=True)
  92. self.assertTrue(rv)
  93. hook_name = "qjob2"
  94. a = {
  95. 'event': "queuejob",
  96. 'enabled': 'True',
  97. 'debug': 'True',
  98. 'order': 2}
  99. rv = self.server.create_import_hook(
  100. hook_name,
  101. a,
  102. hook_body,
  103. overwrite=True)
  104. self.assertTrue(rv)
  105. hook_name = "qjob3"
  106. a = {
  107. 'event': "queuejob",
  108. 'enabled': 'True',
  109. 'debug': 'False',
  110. 'order': 2}
  111. rv = self.server.create_import_hook(
  112. hook_name,
  113. a,
  114. hook_body,
  115. overwrite=True)
  116. self.assertTrue(rv)
  117. self.server.manager(MGR_CMD_SET, SERVER, {'scheduling': 'False'},
  118. expect=True)
  119. for i in range(1000):
  120. j = Job(TEST_USER)
  121. a = {
  122. 'Resource_List.select': '1:ncpus=1',
  123. 'Resource_List.walltime': 3600}
  124. j.set_attributes(a)
  125. j.set_sleep_time("5")
  126. jid = self.server.submit(j)
  127. self.server.expect(JOB, {'job_state': 'Q'}, id=jid)