pbs_mom_hook_sync.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. # coding: utf-8
  2. # Copyright (C) 1994-2018 Altair Engineering, Inc.
  3. # For more information, contact Altair at www.altair.com.
  4. #
  5. # This file is part of the PBS Professional ("PBS Pro") software.
  6. #
  7. # Open Source License Information:
  8. #
  9. # PBS Pro is free software. You can redistribute it and/or modify it under the
  10. # terms of the GNU Affero General Public License as published by the Free
  11. # Software Foundation, either version 3 of the License, or (at your option) any
  12. # later version.
  13. #
  14. # PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
  15. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  16. # FOR A PARTICULAR PURPOSE.
  17. # See the GNU Affero General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU Affero General Public License
  20. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. #
  22. # Commercial License Information:
  23. #
  24. # For a copy of the commercial license terms and conditions,
  25. # go to: (http://www.pbspro.com/UserArea/agreement.html)
  26. # or contact the Altair Legal Department.
  27. #
  28. # Altair’s dual-license business model allows companies, individuals, and
  29. # organizations to create proprietary derivative works of PBS Pro and
  30. # distribute them - whether embedded or bundled with other software -
  31. # under a commercial license agreement.
  32. #
  33. # Use of Altair’s trademarks, including but not limited to "PBS™",
  34. # "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
  35. # trademark licensing policies.
  36. from tests.functional import *
  37. class TestMomHookSync(TestFunctional):
  38. """
  39. This test suite tests to make sure a hook does not disappear in
  40. a series of hook event change from mom hook to server hook and
  41. then back to a mom hook. This is a good exercise to make sure
  42. hook updates are not lost even when mom is stopped, killed, and
  43. restarted during hook event changes.
  44. """
  45. def setUp(self):
  46. if len(self.moms) != 2:
  47. self.skip_test(reason="need 2 mom hosts: -p moms=<m1>:<m2>")
  48. TestFunctional.setUp(self)
  49. self.momA = self.moms.values()[0]
  50. self.momB = self.moms.values()[1]
  51. self.momA.delete_vnode_defs()
  52. self.momB.delete_vnode_defs()
  53. self.hostA = self.momA.shortname
  54. self.hostB = self.momB.shortname
  55. rc = self.server.manager(MGR_CMD_DELETE, NODE, None, "")
  56. self.assertEqual(rc, 0)
  57. rc = self.server.manager(MGR_CMD_CREATE, NODE, id=self.hostA)
  58. self.assertEqual(rc, 0)
  59. rc = self.server.manager(MGR_CMD_CREATE, NODE, id=self.hostB)
  60. self.assertEqual(rc, 0)
  61. self.hook_name = "cpufreq"
  62. hook_body = "import pbs\n"
  63. a = {'event': 'execjob_begin', 'enabled': 'True'}
  64. self.server.create_import_hook(self.hook_name, a, hook_body)
  65. hook_config = """{
  66. "apple" : "pears",
  67. "banana" : "cucumbers"
  68. }
  69. """
  70. fn = self.du.create_temp_file(body=hook_config)
  71. a = {'content-type': 'application/x-config',
  72. 'content-encoding': 'default',
  73. 'input-file': fn}
  74. self.server.manager(MGR_CMD_IMPORT, HOOK, a, self.hook_name)
  75. os.remove(fn)
  76. self.server.log_match(
  77. 'successfully sent hook file.*cpufreq.HK ' +
  78. 'to %s.*' % self.momA.hostname,
  79. max_attempts=10, regexp=True)
  80. self.server.log_match(
  81. 'successfully sent hook file.*cpufreq.CF ' +
  82. 'to %s.*' % self.momA.hostname,
  83. max_attempts=10, regexp=True)
  84. self.server.log_match(
  85. 'successfully sent hook file.*cpufreq.PY ' +
  86. 'to %s.*' % self.momA.hostname,
  87. max_attempts=10, regexp=True)
  88. self.server.log_match(
  89. 'successfully sent hook file.*cpufreq.HK ' +
  90. 'to %s.*' % self.momB.hostname,
  91. max_attempts=10, regexp=True)
  92. self.server.log_match(
  93. 'successfully sent hook file.*cpufreq.CF ' +
  94. 'to %s.*' % self.momB.hostname,
  95. max_attempts=10, regexp=True)
  96. self.server.log_match(
  97. 'successfully sent hook file.*cpufreq.PY ' +
  98. 'to %s.*' % self.momB.hostname,
  99. max_attempts=10, regexp=True)
  100. def tearDown(self):
  101. self.momB.signal("-CONT")
  102. TestFunctional.tearDown(self)
  103. def test_1(self):
  104. """
  105. Given an existing mom hook, suspend mom on hostB,
  106. change the hook to be a server hook (causes a
  107. delete action), then change it back to a mom hook
  108. (results in a send action), and then resume mom.
  109. The delete action occurs first and then the send
  110. action so we end up with a mom hook in place.
  111. """
  112. self.momB.signal('-STOP')
  113. # Turn current mom hook into a server hook
  114. self.server.manager(MGR_CMD_SET, HOOK,
  115. {'event': 'queuejob'},
  116. id=self.hook_name)
  117. # Turn current mom hook back to a mom hook
  118. self.server.manager(MGR_CMD_SET, HOOK,
  119. {'event': 'exechost_periodic'},
  120. id=self.hook_name)
  121. # For testability, delay resuming the mom so we can
  122. # get a different timestamp on the hook updates
  123. self.logger.info("Waiting 3 secs for earlier hook updates to complete")
  124. time.sleep(3)
  125. now = int(time.time())
  126. self.momB.signal('-CONT')
  127. # Put another sleep delay so log_match() can see all the matches
  128. self.logger.info("Waiting 3 secs for new hook updates to complete")
  129. time.sleep(3)
  130. match_delete = self.server.log_match(
  131. 'successfully deleted hook file cpufreq.HK ' +
  132. 'from %s.*' % self.momB.hostname,
  133. starttime=now, max_attempts=10, regexp=True)
  134. # Without the fix, there won't be these sent hook file messages
  135. match_sent1 = self.server.log_match(
  136. 'successfully sent hook file.*cpufreq.HK ' +
  137. 'to %s.*' % self.momB.hostname,
  138. starttime=now, max_attempts=10, regexp=True)
  139. match_sent2 = self.server.log_match(
  140. 'successfully sent hook file.*cpufreq.CF ' +
  141. 'to %s.*' % self.momB.hostname,
  142. starttime=now, max_attempts=10, regexp=True)
  143. match_sent3 = self.server.log_match(
  144. 'successfully sent hook file.*cpufreq.PY ' +
  145. 'to %s.*' % self.momB.hostname,
  146. starttime=now, max_attempts=10, regexp=True)
  147. # the higher the number, the earlier the line appears in the log
  148. self.assertTrue(match_delete[0] > match_sent1[0])
  149. self.assertTrue(match_delete[0] > match_sent2[0])
  150. self.assertTrue(match_delete[0] > match_sent3[0])
  151. def test_2(self):
  152. """
  153. Given an existing mom hook, suspend mom on hostB,
  154. change the hook event to be another mom hook event
  155. (results in a send action), change the hook to be a
  156. server hook (causes a delete action),
  157. and then resume mom.
  158. The send action occurs first and then the delete
  159. action so we end up with no mom hook in place.
  160. """
  161. self.momB.signal('-STOP')
  162. # Turn current mom hook back to a mom hook
  163. self.server.manager(MGR_CMD_SET, HOOK,
  164. {'event': 'exechost_periodic'},
  165. id=self.hook_name)
  166. # Turn current mom hook into a server hook
  167. self.server.manager(MGR_CMD_SET, HOOK,
  168. {'event': 'queuejob'},
  169. id=self.hook_name)
  170. # For testability, delay resuming the mom so we can
  171. # get a different timestamp on the hook updates
  172. self.logger.info("Waiting 3 secs for earlier hook updates to complete")
  173. time.sleep(3)
  174. now = int(time.time())
  175. self.momB.signal('-CONT')
  176. # Put another sleep delay so log_match() can see all the matches
  177. self.logger.info("Waiting 3 secs for new hook updates to complete")
  178. time.sleep(3)
  179. match_delete = self.server.log_match(
  180. 'successfully deleted hook file cpufreq.HK ' +
  181. 'from %s.*' % self.momB.hostname,
  182. starttime=now, max_attempts=10, regexp=True)
  183. # Only the hook control file (.HK) is sent since that contains
  184. # the hook event change to exechost_periodic.
  185. match_sent = self.server.log_match(
  186. 'successfully sent hook file .*cpufreq.HK ' +
  187. 'to %s.*' % self.momB.hostname,
  188. starttime=now, max_attempts=10, regexp=True)
  189. # the higher the number, the earlier the line appears in the log
  190. self.assertTrue(match_sent[0] > match_delete[0])
  191. self.server.log_match(
  192. 'successfully sent hook file .*cpufreq.CF ' +
  193. 'to %s.*' % self.momB.hostname, existence=False,
  194. starttime=now, max_attempts=10, regexp=True)
  195. self.server.log_match(
  196. 'successfully sent hook file .*cpufreq.PY ' +
  197. 'to %s.*' % self.momB.hostname, existence=False,
  198. starttime=now, max_attempts=10, regexp=True)
  199. def test_3(self):
  200. """
  201. Like test_1 except instead of resuming mom,
  202. we kill -9 it and restart.
  203. """
  204. self.momB.signal('-STOP')
  205. # Turn current mom hook into a server hook
  206. self.server.manager(MGR_CMD_SET, HOOK,
  207. {'event': 'queuejob'},
  208. id=self.hook_name)
  209. # Turn current mom hook back to a mom hook
  210. self.server.manager(MGR_CMD_SET, HOOK,
  211. {'event': 'exechost_periodic'},
  212. id=self.hook_name)
  213. # For testability, delay resuming the mom so we can
  214. # get a different timestamp on the hook updates
  215. self.logger.info("Waiting 3 secs for earlier hook updates to complete")
  216. time.sleep(3)
  217. now = int(time.time())
  218. self.momB.signal('-KILL')
  219. self.momB.restart()
  220. # Killing and restarting mom would cause server to sync
  221. # up its version of the mom hook file resulting in an
  222. # additional send action, which would not alter the
  223. # outcome, as send action occurs after the delete action.
  224. self.server.log_match(
  225. 'Node;%s.*;' % (self.momB.hostname,) +
  226. 'Mom restarted on host',
  227. starttime=now, max_attempts=10, regexp=True)
  228. # Put another sleep delay so log_match() can see all the matches
  229. self.logger.info("Waiting 3 secs for new hook updates to complete")
  230. time.sleep(3)
  231. match_delete = self.server.log_match(
  232. 'successfully deleted hook file cpufreq.HK ' +
  233. 'from %s.*' % self.momB.hostname,
  234. starttime=now, max_attempts=10, regexp=True)
  235. # Without the fix, there won't be these sent hook file messages
  236. match_sent1 = self.server.log_match(
  237. 'successfully sent hook file.*cpufreq.HK ' +
  238. 'to %s.*' % self.momB.hostname,
  239. starttime=now, max_attempts=10, regexp=True)
  240. match_sent2 = self.server.log_match(
  241. 'successfully sent hook file.*cpufreq.CF ' +
  242. 'to %s.*' % self.momB.hostname,
  243. starttime=now, max_attempts=10, regexp=True)
  244. match_sent3 = self.server.log_match(
  245. 'successfully sent hook file.*cpufreq.PY ' +
  246. 'to %s.*' % self.momB.hostname,
  247. starttime=now, max_attempts=10, regexp=True)
  248. # the higher the number, the earlier the line appears in the log
  249. self.assertTrue(match_delete[0] > match_sent1[0])
  250. self.assertTrue(match_delete[0] > match_sent2[0])
  251. self.assertTrue(match_delete[0] > match_sent3[0])
  252. def test_4(self):
  253. """
  254. Like test_2 except instead of resuming mom,
  255. we kill -9 it and restart.
  256. """
  257. self.momB.signal('-STOP')
  258. # Turn current mom hook back to a mom hook
  259. self.server.manager(MGR_CMD_SET, HOOK,
  260. {'event': 'exechost_periodic'},
  261. id=self.hook_name)
  262. # Turn current mom hook into a server hook
  263. self.server.manager(MGR_CMD_SET, HOOK,
  264. {'event': 'queuejob'},
  265. id=self.hook_name)
  266. # For testability, delay resuming the mom so we can
  267. # get a different timestamp on the hook updates
  268. self.logger.info("Waiting 3 secs for earlier hook updates to complete")
  269. time.sleep(3)
  270. # Killing and restarting mom would cause server to sync
  271. # up its version of the mom hook file resulting in an
  272. # delete mom hook action as that hook is now seen as a
  273. # server hook. Since it's now a server hook, no further
  274. # mom hook sends are done.
  275. now = int(time.time())
  276. self.momB.signal('-KILL')
  277. self.momB.restart()
  278. # Put another sleep delay so log_match() can see all the matches
  279. self.logger.info("Waiting 3 secs for new hook updates to complete")
  280. time.sleep(3)
  281. self.server.log_match(
  282. 'successfully deleted hook file cpufreq.HK ' +
  283. 'from %s.*' % self.momB.hostname,
  284. starttime=now, max_attempts=10, regexp=True)
  285. self.server.log_match(
  286. 'successfully sent hook file .*cpufreq.HK ' +
  287. 'to %s.*' % self.momB.hostname, existence=False,
  288. starttime=now, max_attempts=10, regexp=True)
  289. self.server.log_match(
  290. 'successfully sent hook file .*cpufreq.CF ' +
  291. 'to %s.*' % self.momB.hostname, existence=False,
  292. starttime=now, max_attempts=10, regexp=True)
  293. self.server.log_match(
  294. 'successfully sent hook file .*cpufreq.PY ' +
  295. 'to %s.*' % self.momB.hostname, existence=False,
  296. starttime=now, max_attempts=10, regexp=True)