pbs_alps_inventory_check_hook.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. # coding: utf-8
  2. # Copyright (C) 1994-2018 Altair Engineering, Inc.
  3. # For more information, contact Altair at www.altair.com.
  4. #
  5. # This file is part of the PBS Professional ("PBS Pro") software.
  6. #
  7. # Open Source License Information:
  8. #
  9. # PBS Pro is free software. You can redistribute it and/or modify it under the
  10. # terms of the GNU Affero General Public License as published by the Free
  11. # Software Foundation, either version 3 of the License, or (at your option) any
  12. # later version.
  13. #
  14. # PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
  15. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  16. # FOR A PARTICULAR PURPOSE.
  17. # See the GNU Affero General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU Affero General Public License
  20. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. #
  22. # Commercial License Information:
  23. #
  24. # For a copy of the commercial license terms and conditions,
  25. # go to: (http://www.pbspro.com/UserArea/agreement.html)
  26. # or contact the Altair Legal Department.
  27. #
  28. # Altair’s dual-license business model allows companies, individuals, and
  29. # organizations to create proprietary derivative works of PBS Pro and
  30. # distribute them - whether embedded or bundled with other software -
  31. # under a commercial license agreement.
  32. #
  33. # Use of Altair’s trademarks, including but not limited to "PBS™",
  34. # "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
  35. # trademark licensing policies.
  36. from tests.functional import *
  37. import os
  38. @tags('cray', 'mom')
  39. class TestAlpsInventoryCheckHook(TestFunctional):
  40. """
  41. PBS mom appears not to periodically automatically re-query the
  42. node inventory on Cray.
  43. """
  44. def setUp(self):
  45. self.platform = DshUtils().get_platform()
  46. if self.platform != 'cray' and self.platform != 'craysim':
  47. self.skipTest("This is not a cray platform")
  48. TestFunctional.setUp(self)
  49. with open("/etc/xthostname") as xthost_file:
  50. self.crayhostname = xthost_file.readline().rstrip()
  51. self.server.manager(MGR_CMD_SET, PBS_HOOK,
  52. {'enabled': 'true', 'freq': 3},
  53. id='PBS_alps_inventory_check')
  54. def delete_cray_compute_node(self):
  55. """
  56. Deletes the cray compute node from pbs node list
  57. """
  58. vnl = self.server.filter(
  59. VNODE, {'resources_available.vntype': 'cray_compute'})
  60. vlist = vnl["resources_available.vntype=cray_compute"]
  61. self.server.manager(MGR_CMD_DELETE, NODE, id=vlist[0])
  62. def test_apstat_cmd(self):
  63. """
  64. Test the log when apstat is not present in the
  65. expected/default location, it indicates a Cray system issue.
  66. """
  67. now = int(time.time())
  68. if self.platform == "craysim":
  69. if os.path.exists("/opt/cray/alps/default/bin/stat"):
  70. # The file to be renamed is conflicting with existing file
  71. self.skipTest("Conflict in the testcase settings")
  72. os.rename(
  73. "/opt/cray/alps/default/bin/apstat",
  74. "/opt/cray/alps/default/bin/stat")
  75. try:
  76. self.mom.log_match(
  77. "ALPS Inventory Check: apstat command can not " +
  78. "be found at /opt/cray/alps/default/bin/apstat",
  79. starttime=now,
  80. max_attempts=10,
  81. interval=2)
  82. finally:
  83. os.rename(
  84. "/opt/cray/alps/default/bin/stat",
  85. "/opt/cray/alps/default/bin/apstat")
  86. else:
  87. self.skipTest("This test can be run on a simulator")
  88. def test_xthostname(self):
  89. """
  90. Test when hook attempts to read the /etc/xthostname file to
  91. determine Cray hostname, but the hostname file is missing.
  92. """
  93. now = int(time.time())
  94. if self.platform == "craysim":
  95. if os.path.exists("/etc/xt"):
  96. # The file to be renamed is conflicting with existing file
  97. self.skipTest("Conflict in the testcase settings")
  98. os.rename("/etc/xthostname", "/etc/xt")
  99. try:
  100. self.mom.log_match(
  101. "/etc/xthostname file found on this host",
  102. starttime=now,
  103. max_attempts=10,
  104. interval=2)
  105. finally:
  106. os.rename("/etc/xt", "/etc/xthostname")
  107. else:
  108. self.skipTest("This test can be run on a simulator")
  109. def test_start_of_hook(self):
  110. """
  111. Test log at the start of hook processing.
  112. """
  113. now = int(time.time())
  114. self.mom.log_match(
  115. "Processing ALPS inventory for crayhost %s" % self.crayhostname,
  116. starttime=now,
  117. max_attempts=10,
  118. interval=2)
  119. def test_cray_login_nodes(self):
  120. """
  121. Test log when no nodes with vntype 'cray_login' are present.
  122. """
  123. now = int(time.time())
  124. mc = self.mom.parse_config()
  125. save = mc["$alps_client"]
  126. del mc["$alps_client"]
  127. self.mom.apply_config(mc)
  128. self.host = self.mom.shortname
  129. try:
  130. self.server.manager(MGR_CMD_DELETE, NODE, None, "")
  131. self.server.manager(MGR_CMD_CREATE, NODE, id=self.host)
  132. self.mom.log_match(
  133. "ALPS Inventory Check: No eligible " +
  134. "login nodes to perform inventory check",
  135. starttime=now,
  136. max_attempts=10,
  137. interval=2)
  138. finally:
  139. mc["$alps_client"] = save
  140. self.mom.apply_config(mc, False)
  141. def test_pbs_home_path(self):
  142. """
  143. Test log when mom_priv directory is not in the expected/default
  144. location (PBS_HOME), indicating a PBS installation issue.
  145. """
  146. if self.platform == "craysim":
  147. now = int(time.time())
  148. pbs_conf = self.du.parse_pbs_config(self.server.shortname)
  149. save = pbs_conf['PBS_HOME']
  150. self.du.set_pbs_config(
  151. self.server.shortname, confs={
  152. 'PBS_HOME': ''})
  153. try:
  154. self.delete_cray_compute_node()
  155. self.mom.log_match(
  156. "ALPS Inventory Check: Internal error in retrieving " +
  157. "path to mom_priv",
  158. starttime=now,
  159. max_attempts=10,
  160. interval=2)
  161. finally:
  162. self.du.set_pbs_config(
  163. self.server.shortname, confs={
  164. 'PBS_HOME': save})
  165. else:
  166. self.skipTest("This test can be run on a simulator")
  167. def test_alps_and_pbs_are_in_sync(self):
  168. """
  169. Test log when both PBS and ALPS are in sync i.e. they report the
  170. same number of compute nodes in the Cray cluster.
  171. """
  172. now = int(time.time())
  173. self.mom.log_match(
  174. "ALPS Inventory Check: PBS and ALPS are in sync",
  175. starttime=now,
  176. max_attempts=10,
  177. interval=2)
  178. def test_nodes_out_of_sync(self):
  179. """
  180. Test the log when PBS and ALPS are out of sync
  181. """
  182. now = int(time.time())
  183. self.delete_cray_compute_node()
  184. self.mom.log_match(
  185. "ALPS Inventory Check: Compute " +
  186. "nodes defined in ALPS, but not in PBS",
  187. starttime=now,
  188. max_attempts=10,
  189. interval=2)
  190. def test_failure_in_refreshing_nodes(self):
  191. """
  192. Test log when the Hook is unable to HUP the Mom and successfully
  193. refresh nodes.
  194. """
  195. if self.platform == "craysim":
  196. now = int(time.time())
  197. pbs_conf = self.du.parse_pbs_config(self.server.shortname)
  198. save = pbs_conf['PBS_HOME']
  199. self.du.set_pbs_config(
  200. self.server.shortname, confs={'PBS_HOME': 'xyz'})
  201. try:
  202. self.delete_cray_compute_node()
  203. self.mom.log_match(
  204. "ALPS Inventory Check: Failure in refreshing nodes on " +
  205. "login node (%s)" %
  206. self.mom.hostname,
  207. starttime=now,
  208. max_attempts=10,
  209. interval=2)
  210. finally:
  211. self.du.set_pbs_config(
  212. self.server.shortname, confs={
  213. 'PBS_HOME': save})
  214. else:
  215. self.skipTest("This test can be run on cray a simulator")