pbs_cray_vnode_pool.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. # coding: utf-8
  2. # Copyright (C) 1994-2018 Altair Engineering, Inc.
  3. # For more information, contact Altair at www.altair.com.
  4. #
  5. # This file is part of the PBS Professional ("PBS Pro") software.
  6. #
  7. # Open Source License Information:
  8. #
  9. # PBS Pro is free software. You can redistribute it and/or modify it under the
  10. # terms of the GNU Affero General Public License as published by the Free
  11. # Software Foundation, either version 3 of the License, or (at your option) any
  12. # later version.
  13. #
  14. # PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
  15. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  16. # FOR A PARTICULAR PURPOSE.
  17. # See the GNU Affero General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU Affero General Public License
  20. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. #
  22. # Commercial License Information:
  23. #
  24. # For a copy of the commercial license terms and conditions,
  25. # go to: (http://www.pbspro.com/UserArea/agreement.html)
  26. # or contact the Altair Legal Department.
  27. #
  28. # Altair’s dual-license business model allows companies, individuals, and
  29. # organizations to create proprietary derivative works of PBS Pro and
  30. # distribute them - whether embedded or bundled with other software -
  31. # under a commercial license agreement.
  32. #
  33. # Use of Altair’s trademarks, including but not limited to "PBS™",
  34. # "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
  35. # trademark licensing policies.
  36. from tests.functional import *
  37. @tags('cray', 'configuration')
  38. class TestVnodePool(TestFunctional):
  39. """
  40. This test suite tests how PBS makes use of node attribute "vnode_pool"
  41. It expects at least 2 moms to be specified to it while executing.
  42. """
  43. def setUp(self):
  44. if not self.du.get_platform().startswith('cray'):
  45. self.skipTest("This test can only run on a cray")
  46. TestFunctional.setUp(self)
  47. if len(self.moms.values()) < 2:
  48. self.skipTest("Provide at least 2 moms while invoking test")
  49. # The moms provided to the test may have unwanted vnodedef files.
  50. if self.moms.values()[0].has_vnode_defs():
  51. self.moms.values()[0].delete_vnode_defs()
  52. if self.moms.values()[1].has_vnode_defs():
  53. self.moms.values()[1].delete_vnode_defs()
  54. # Check if vnodes exist before deleting nodes.
  55. # Clean all default nodes because each test case will set up nodes.
  56. try:
  57. self.server.status(NODE)
  58. self.server.manager(MGR_CMD_DELETE, NODE, None, "")
  59. except PbsStatusError as e:
  60. self.assertTrue("Server has no node list" in e.msg[0])
  61. def test_invalid_values(self):
  62. """
  63. Invalid vnode_pool values shall result in errors.
  64. """
  65. self.momA = self.moms.values()[0]
  66. self.momB = self.moms.values()[1]
  67. self.hostA = self.momA.shortname
  68. self.hostB = self.momB.shortname
  69. attr_A = {'vnode_pool': '-1'}
  70. try:
  71. self.server.manager(MGR_CMD_CREATE, NODE, id=self.hostA,
  72. attrib=attr_A)
  73. except PbsManagerError as e:
  74. self.assertTrue("Illegal attribute or resource value" in e.msg[0])
  75. attr_A = {'vnode_pool': '0'}
  76. try:
  77. self.server.manager(MGR_CMD_CREATE, NODE, id=self.hostA,
  78. attrib=attr_A)
  79. except PbsManagerError as e:
  80. self.assertTrue("Illegal attribute or resource value" in e.msg[0])
  81. attr_A = {'vnode_pool': 'a'}
  82. try:
  83. self.server.manager(MGR_CMD_CREATE, NODE, id=self.hostA,
  84. attrib=attr_A)
  85. except PbsManagerError as e:
  86. self.assertTrue("Illegal attribute or resource value" in e.msg[0])
  87. def test_two_moms_single_vnode_pool(self):
  88. """
  89. Same vnode_pool for two moms shall result in one mom being the
  90. inventory mom and the other the non-inventory mom.
  91. The inventory mom goes down (e.g. killed).
  92. Compute nodes remain up even when the inventory mom is killed,
  93. since another mom is reporting them.
  94. Check that a new inventory mom is listed in the log.
  95. Bring up killed mom.
  96. """
  97. self.server.manager(MGR_CMD_SET, SERVER, {"log_events": -1})
  98. self.momA = self.moms.values()[0]
  99. self.momB = self.moms.values()[1]
  100. self.hostA = self.momA.shortname
  101. self.hostB = self.momB.shortname
  102. attr = {'vnode_pool': '1'}
  103. start_time = int(time.time())
  104. self.server.manager(MGR_CMD_CREATE, NODE, id=self.hostA, attrib=attr)
  105. self.server.manager(MGR_CMD_CREATE, NODE, id=self.hostB, attrib=attr)
  106. self.server.log_match("Mom %s added to vnode_pool %s" %
  107. (self.momB.hostname, '1'), max_attempts=5,
  108. starttime=start_time)
  109. _msg = "Hello (no inventory required) from server"
  110. try:
  111. self.momA.log_match(_msg, max_attempts=9, starttime=start_time)
  112. found_in_momA = 1
  113. except PtlLogMatchError:
  114. found_in_momA = 0
  115. try:
  116. self.momB.log_match(_msg, max_attempts=9, starttime=start_time)
  117. found_in_momB = 1
  118. except PtlLogMatchError:
  119. found_in_momB = 0
  120. self.assertEqual(found_in_momA + found_in_momB,
  121. 1, msg="an inventory mom not chosen correctly")
  122. # Only one mom is inventory mom
  123. if (found_in_momA == 0):
  124. inv_mom = self.momA
  125. noninv_mom = self.momB
  126. else:
  127. inv_mom = self.momB
  128. noninv_mom = self.momA
  129. self.logger.info("Inventory mom is %s." % inv_mom.shortname)
  130. self.logger.info("Non-inventory mom is %s." %
  131. noninv_mom.shortname)
  132. start_time = int(time.time())
  133. # Kill inventory mom
  134. inv_mom.signal('-KILL')
  135. # Check that former inventory mom is down
  136. rv = self.server.expect(
  137. VNODE, {'state': 'down'}, id=inv_mom.shortname,
  138. max_attempts=10, interval=2)
  139. self.assertTrue(rv)
  140. # Check if inventory mom changed and is listed in the server log.
  141. self.server.log_match(
  142. "Setting inventory_mom for vnode_pool %s to %s" %
  143. ('1', noninv_mom.shortname), max_attempts=5,
  144. starttime=start_time)
  145. self.logger.info(
  146. "Inventory mom is now %s in server logs." %
  147. (noninv_mom.shortname))
  148. # Check compute nodes are up
  149. vlist = []
  150. try:
  151. vnl = self.server.filter(
  152. VNODE, {'resources_available.vntype': 'cray_compute'})
  153. vlist = vnl["resources_available.vntype=cray_compute"]
  154. except Exception:
  155. pass
  156. # Loop through each compute vnode in the list and check if state = free
  157. for v1 in vlist:
  158. # Check that the node is in free state
  159. rv = self.server.expect(
  160. VNODE, {'state': 'free'}, id=v1, max_attempts=3, interval=2)
  161. self.assertTrue(rv)
  162. # Start the previous inv mom.
  163. inv_mom.start()
  164. # Check previous inventory mom is up
  165. rv = self.server.expect(
  166. VNODE, {'state': 'free'}, id=inv_mom.shortname,
  167. max_attempts=3, interval=2)
  168. self.assertTrue(rv)
  169. def test_two_moms_different_vnode_pool(self):
  170. """
  171. Differing vnode_pool for two moms shall result in both moms reporting
  172. inventory.
  173. """
  174. self.momA = self.moms.values()[0]
  175. self.momB = self.moms.values()[1]
  176. self.hostA = self.momA.shortname
  177. self.hostB = self.momB.shortname
  178. attr_A = {'vnode_pool': '1'}
  179. attr_B = {'vnode_pool': '2'}
  180. start_time = int(time.time())
  181. self.server.manager(MGR_CMD_CREATE, NODE, id=self.hostA, attrib=attr_A)
  182. self.server.manager(MGR_CMD_CREATE, NODE, id=self.hostB, attrib=attr_B)
  183. _msg = "Hello (no inventory required) from server"
  184. try:
  185. self.momA.log_match(_msg, max_attempts=5, starttime=start_time)
  186. found_in_momA = 1
  187. except PtlLogMatchError:
  188. found_in_momA = 0
  189. try:
  190. self.momB.log_match(_msg, max_attempts=5, starttime=start_time)
  191. found_in_momB = 1
  192. except PtlLogMatchError:
  193. found_in_momB = 0
  194. self.assertTrue((found_in_momA + found_in_momB == 0),
  195. msg="Both moms must report inventory")
  196. def test_invalid_usage(self):
  197. """
  198. Setting vnode_pool for an existing mom that does not have a vnode_pool
  199. attribute shall not be allowable.
  200. Setting vnode_pool for an existing mom having a vnode_pool attribute
  201. shall not be allowable.
  202. Unsetting vnode_pool for an existing mom having a vnode_pool attribute
  203. shall not be allowable.
  204. """
  205. self.momA = self.moms.values()[0]
  206. self.hostA = self.momA.shortname
  207. self.logger.info("hostA is %s." % self.hostA)
  208. start_time = int(time.time())
  209. self.server.manager(MGR_CMD_CREATE, NODE, id=self.hostA)
  210. attr_2 = {'vnode_pool': '2'}
  211. try:
  212. self.server.manager(
  213. MGR_CMD_SET, NODE, id=self.hostA, attrib=attr_2, expect=False)
  214. except PbsManagerError as e:
  215. self.assertTrue("Invalid request" in e.msg[0])
  216. self.server.log_match("Unsupported actions for vnode_pool",
  217. max_attempts=5, starttime=start_time)
  218. self.logger.info("Found correct server log message")
  219. self.momB = self.moms.values()[1]
  220. self.hostB = self.momB.shortname
  221. attr_1 = {'vnode_pool': '1'}
  222. start_time = int(time.time())
  223. self.server.manager(MGR_CMD_CREATE, NODE, id=self.hostB, attrib=attr_1)
  224. attr_2 = {'vnode_pool': '2'}
  225. try:
  226. self.server.manager(MGR_CMD_SET, NODE, id=self.hostB,
  227. attrib=attr_2, expect=False)
  228. except PbsManagerError as e:
  229. self.assertTrue("Invalid request" in e.msg[0])
  230. self.server.log_match("Unsupported actions for vnode_pool",
  231. max_attempts=5, starttime=start_time)
  232. try:
  233. self.server.manager(MGR_CMD_UNSET, NODE, id=self.hostB,
  234. attrib='vnode_pool', expect=False)
  235. except PbsManagerError as e:
  236. self.assertTrue("Illegal value for node vnode_pool" in e.msg[0])