pbs_alps_release_tunables.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. # coding: utf-8
  2. # Copyright (C) 1994-2018 Altair Engineering, Inc.
  3. # For more information, contact Altair at www.altair.com.
  4. #
  5. # This file is part of the PBS Professional ("PBS Pro") software.
  6. #
  7. # Open Source License Information:
  8. #
  9. # PBS Pro is free software. You can redistribute it and/or modify it under the
  10. # terms of the GNU Affero General Public License as published by the Free
  11. # Software Foundation, either version 3 of the License, or (at your option) any
  12. # later version.
  13. #
  14. # PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
  15. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  16. # FOR A PARTICULAR PURPOSE.
  17. # See the GNU Affero General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU Affero General Public License
  20. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. #
  22. # Commercial License Information:
  23. #
  24. # For a copy of the commercial license terms and conditions,
  25. # go to: (http://www.pbspro.com/UserArea/agreement.html)
  26. # or contact the Altair Legal Department.
  27. #
  28. # Altair’s dual-license business model allows companies, individuals, and
  29. # organizations to create proprietary derivative works of PBS Pro and
  30. # distribute them - whether embedded or bundled with other software -
  31. # under a commercial license agreement.
  32. #
  33. # Use of Altair’s trademarks, including but not limited to "PBS™",
  34. # "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
  35. # trademark licensing policies.
  36. from tests.functional import *
  37. import math
  38. def get_epoch(msg):
  39. # Since its a log message split on ';' to get timestamp
  40. a = time.strptime(msg.split(';')[0], "%m/%d/%Y %H:%M:%S")
  41. return int(time.mktime(a))
  42. @tags('cray')
  43. class TestCrayAlpsReleaseTunables(TestFunctional):
  44. """
  45. Set of tests to verify alps release tunables namely alps_release_wait_time
  46. and alps_release_jitter
  47. """
  48. def setUp(self):
  49. machine = self.du.get_platform()
  50. if not machine == 'cray':
  51. self.skipTest("Test suite only meant to run on a Cray")
  52. TestFunctional.setUp(self)
  53. def test_alps_release_wait_time(self):
  54. """
  55. Set alps_release_wait_time to a higher value and then notice that
  56. subsequest reservation cancellation requests are made at least
  57. after the set interval.
  58. """
  59. # assigning a random value to alps_release_wait_time that is
  60. # measurable using mom log messages
  61. arwt = 4.298
  62. self.mom.add_config({'$alps_release_wait_time': arwt})
  63. # submit a job and then delete it after it starts running
  64. start_time = int(time.time())
  65. j1 = Job(TEST_USER)
  66. jid1 = self.server.submit(j1)
  67. self.server.expect(JOB, {ATTR_state: 'R'}, id=jid1)
  68. time.sleep(2)
  69. self.server.delete(jid1)
  70. # Look for a message that confirms that reservation is deleted
  71. self.mom.log_match("%s;ALPS reservation cancelled" % jid1,
  72. starttime=start_time)
  73. # Now that we know that reservation is cleared we should
  74. # check for time difference between each cancellation request
  75. out = self.mom.log_match("%s;Canceling ALPS reservation *" % jid1,
  76. n='ALL', regexp=True, allmatch=True)
  77. # We found something, Let's first check there are atleast 2 such
  78. # log messages, If not then that means reservation was cancelled
  79. # in the first attempt itself, at that point right thing to do is
  80. # to either run it again or find out a way to delay the reservation
  81. # cancellation at ALPS level itself.
  82. if len(out) >= 2:
  83. # variable 'out' is a list of tuples and every second element
  84. # in a tuple is the matched log message
  85. time_prev = get_epoch(out[0][1])
  86. for data in out[1:]:
  87. time_current = get_epoch(data[1])
  88. fail_msg = "alps_release_wait_time not working"
  89. self.assertGreaterEqual(time_current - time_prev,
  90. math.ceil(arwt),
  91. msg=fail_msg)
  92. time_prev = time_current
  93. else:
  94. self.skipTest("Reservation cancelled without retry, Try again!")
  95. def test_alps_release_jitter(self):
  96. """
  97. Set alps_release_jitter to a higher value and then notice that
  98. subsequest reservation cancellation requests are made by adding
  99. a random time interval (less than jitter) to alps_release_wait_time.
  100. """
  101. # assigning a random value to alps_release_jitter that is
  102. # measurable using mom log messages
  103. arj = 2.198
  104. arwt = 1
  105. max_delay = (arwt + math.ceil(arj))
  106. self.mom.add_config({'$alps_release_jitter': arj})
  107. self.mom.add_config({'$alps_release_wait_time': arwt})
  108. # There is no good way to test jitter and it is a random number
  109. # less than value set in alps_release_jitter. So in this case
  110. # we can probably try deleting a reservation a few times.
  111. n = retry = 5
  112. for _ in range(n):
  113. # submit a job and then delete it after it starts running
  114. start_time = int(time.time())
  115. j1 = Job(TEST_USER)
  116. jid1 = self.server.submit(j1)
  117. self.server.expect(JOB, {ATTR_state: 'R'}, id=jid1)
  118. time.sleep(2)
  119. self.server.delete(jid1)
  120. # Look for a message that confirms that reservation is deleted
  121. self.mom.log_match("%s;ALPS reservation cancelled" % jid1,
  122. starttime=start_time)
  123. # Now that we know that reservation is cleared we should
  124. # check for time difference between each cancellation request
  125. out = self.mom.log_match("%s;Canceling ALPS reservation *" % jid1,
  126. n='ALL', regexp=True, allmatch=True)
  127. # We found something, Let's first check there are atleast 2 such
  128. # log messages, If not then that means reservation was cancelled
  129. # in the first attempt itself, at that point right thing to do is
  130. # to either run it again or find out a way to delay the reservation
  131. # cancellation at ALPS level itself.
  132. if len(out) >= 2:
  133. retry -= 1
  134. # variable 'out' is a list of tuples and every second element
  135. # in a tuple is the matched log message
  136. time_prev = get_epoch(out[0][1])
  137. for data in out[1:]:
  138. time_current = get_epoch(data[1])
  139. self.assertLessEqual(time_current - time_prev, max_delay,
  140. msg="alps_release_jitter not working")
  141. time_prev = time_current
  142. if retry == 5:
  143. self.skipTest("Reservation cancelled without retry, Try again!")