pbs_cpusetdestroydelay.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. # coding: utf-8
  2. # Copyright (C) 1994-2018 Altair Engineering, Inc.
  3. # For more information, contact Altair at www.altair.com.
  4. #
  5. # This file is part of the PBS Professional ("PBS Pro") software.
  6. #
  7. # Open Source License Information:
  8. #
  9. # PBS Pro is free software. You can redistribute it and/or modify it under the
  10. # terms of the GNU Affero General Public License as published by the Free
  11. # Software Foundation, either version 3 of the License, or (at your option) any
  12. # later version.
  13. #
  14. # PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
  15. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  16. # FOR A PARTICULAR PURPOSE.
  17. # See the GNU Affero General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU Affero General Public License
  20. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. #
  22. # Commercial License Information:
  23. #
  24. # For a copy of the commercial license terms and conditions,
  25. # go to: (http://www.pbspro.com/UserArea/agreement.html)
  26. # or contact the Altair Legal Department.
  27. #
  28. # Altair’s dual-license business model allows companies, individuals, and
  29. # organizations to create proprietary derivative works of PBS Pro and
  30. # distribute them - whether embedded or bundled with other software -
  31. # under a commercial license agreement.
  32. #
  33. # Use of Altair’s trademarks, including but not limited to "PBS™",
  34. # "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
  35. # trademark licensing policies.
  36. import os
  37. import time
  38. from tests.functional import *
  39. class TestCpusetDestroyDelay(TestFunctional):
  40. """
  41. Testing function kill_cpuset_procs() on SGI systems
  42. PP-345: cpuset_destroy_delay ignores child processes
  43. """
  44. def setUp(self):
  45. """
  46. Base class method overridding
  47. builds absolute path of commands to execute
  48. """
  49. self.server.expect(SERVER, {'pbs_version': (GE, '13.0')},
  50. max_attempts=2)
  51. self.server.set_op_mode(PTL_CLI)
  52. self.server.cleanup_jobs(extend='force')
  53. if not self.mom.is_cpuset_mom():
  54. self.skipTest("Not running cpuset mom")
  55. self.resilient_job_script = """
  56. # To adjust the process count set RESILIENT_JOB_PROCS
  57. # To adjust the job duration set RESILIENT_JOB_DURATION
  58. # Default is to maintain 10 active processes
  59. RESILIENT_JOB_PROCS=${RESILIENT_JOB_PROCS:-10}
  60. export RESILIENT_JOB_PROCS
  61. # Default is to run for 10 seconds
  62. RESILIENT_JOB_DURATION=${RESILIENT_JOB_DURATION:-10}
  63. export RESILIENT_JOB_DURATION
  64. # Calculate the end time only once at the beginning
  65. RESILIENT_JOB_END=`date +%s`
  66. let RESILIENT_JOB_END+=${RESILIENT_JOB_DURATION}
  67. export RESILIENT_JOB_END
  68. mount=`cat /proc/mounts | grep cpuset | cut -d' ' -f2`
  69. myset=`cpuset -w $$`
  70. RESILIENT_JOB_TASKS="$mount$myset/tasks"
  71. [ -f "$RESILIENT_JOB_TASKS" ] || exit 1
  72. export RESILIENT_JOB_TASKS
  73. RESILIENT_JOB_SCRIPT=$(cat <<'EOF'
  74. [ -z "$RESILIENT_JOB_PROCS" ] && exit 1
  75. [ -z "$RESILIENT_JOB_END" ] && exit 1
  76. [ -z "$RESILIENT_JOB_TASKS" ] && exit 1
  77. while [ `date +%s` -lt $RESILIENT_JOB_END ]; do
  78. count=`wc -l "$RESILIENT_JOB_TASKS" | cut -d' ' -f1`
  79. if [ $count -lt $RESILIENT_JOB_PROCS ]; then
  80. ( echo "$RESILIENT_JOB_SCRIPT" | nohup /bin/sh >/dev/null 2>&1 ) &
  81. fi
  82. # Pause briefly between loops
  83. # usleep 10
  84. done
  85. EOF
  86. )
  87. export RESILIENT_JOB_SCRIPT
  88. ( echo "$RESILIENT_JOB_SCRIPT" | nohup /bin/sh >/dev/null 2>&1 ) &
  89. sleep $RESILIENT_JOB_DURATION
  90. """
  91. Job.dflt_attributes[ATTR_k] = 'oe'
  92. self.resilient_job = Job()
  93. self.resilient_job.create_script(
  94. '\n\n' +
  95. 'RESILIENT_JOB_PROCS=10\n' +
  96. 'RESILIENT_JOB_DURATION=20\n\n' +
  97. self.resilient_job_script)
  98. def test_t1(self):
  99. """
  100. Resilient job run time exceeds cpuset_destroy_delay
  101. Confirm job is killed before cpuset_destroy_delay expires
  102. """
  103. delay = 10
  104. self.mom.add_config({'cpuset_destroy_delay': delay})
  105. self.mom.add_config({'$logevent': 2047})
  106. self.mom.signal('-HUP')
  107. self.logger.info("sleeping for one second after sending MoM SIGHUP")
  108. time.sleep(1)
  109. jid = self.server.submit(self.resilient_job)
  110. self.server.expect(JOB, {'job_state': 'R', 'substate': 42}, jid)
  111. self.logger.info(jid + "is running")
  112. self.logger.info("sleeping for one second to let the job spin up")
  113. time.sleep(1)
  114. path = os.path.join(os.path.sep, 'dev', 'cpuset', 'PBSPro',
  115. jid, 'tasks')
  116. cmd = 'wc -l ' + path
  117. self.logger.info('Task count: ' + os.popen(cmd).read())
  118. started = time.time()
  119. self.server.delete(jid, wait=True)
  120. ended = time.time()
  121. self.assertTrue(ended < (started + delay))