pbs_snapshot_unittest.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678
  1. # coding: utf-8
  2. # Copyright (C) 1994-2018 Altair Engineering, Inc.
  3. # For more information, contact Altair at www.altair.com.
  4. #
  5. # This file is part of the PBS Professional ("PBS Pro") software.
  6. #
  7. # Open Source License Information:
  8. #
  9. # PBS Pro is free software. You can redistribute it and/or modify it under the
  10. # terms of the GNU Affero General Public License as published by the Free
  11. # Software Foundation, either version 3 of the License, or (at your option) any
  12. # later version.
  13. #
  14. # PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
  15. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  16. # FOR A PARTICULAR PURPOSE.
  17. # See the GNU Affero General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU Affero General Public License
  20. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. #
  22. # Commercial License Information:
  23. #
  24. # For a copy of the commercial license terms and conditions,
  25. # go to: (http://www.pbspro.com/UserArea/agreement.html)
  26. # or contact the Altair Legal Department.
  27. #
  28. # Altair’s dual-license business model allows companies, individuals, and
  29. # organizations to create proprietary derivative works of PBS Pro and
  30. # distribute them - whether embedded or bundled with other software -
  31. # under a commercial license agreement.
  32. #
  33. # Use of Altair’s trademarks, including but not limited to "PBS™",
  34. # "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
  35. # trademark licensing policies.
  36. import time
  37. import os
  38. import json
  39. from tests.functional import *
  40. from ptl.utils.pbs_snaputils import *
  41. class TestPBSSnapshot(TestFunctional):
  42. """
  43. Test suit with unit tests for the pbs_snapshot tool
  44. """
  45. pbs_snapshot_path = None
  46. snapdirs = []
  47. snaptars = []
  48. parent_dir = os.getcwd()
  49. def setUp(self):
  50. TestFunctional.setUp(self)
  51. # Create a custom resource called 'ngpus'
  52. # This will help us test parts of PBSSnapUtils which handle resources
  53. attr = {"type": "long", "flag": "nh"}
  54. self.server.manager(MGR_CMD_CREATE, RSC, attr,
  55. id="ngpus", expect=True,
  56. sudo=True)
  57. # Check whether pbs_snapshot is accessible
  58. try:
  59. self.pbs_snapshot_path = os.path.join(
  60. self.server.pbs_conf["PBS_EXEC"], "sbin", "pbs_snapshot")
  61. ret = self.du.run_cmd(cmd=[self.pbs_snapshot_path, "-h"])
  62. if ret['rc'] != 0:
  63. self.pbs_snapshot_path = None
  64. except Exception:
  65. self.pbs_snapshot_path = None
  66. # Check whether the user has root access or not
  67. # pbs_snapshot only supports being run as root, so skip the entire
  68. # testsuite if the user doesn't have root privileges
  69. ret = self.du.run_cmd(
  70. cmd=["ls", os.path.join(os.sep, "root")], sudo=True)
  71. if ret['rc'] != 0:
  72. self.skipTest("pbs_snapshot/PBSSnapUtils need root privileges")
  73. def setup_sc(self, sched_id, partition, port,
  74. sched_priv=None, sched_log=None):
  75. """
  76. Setup a scheduler
  77. :param sched_id: id of the scheduler
  78. :type sched_id: str
  79. :param partition: partition name for the scheduler (e.g "P1", "P1,P2")
  80. :type partition: str
  81. :param port: The port number string for the scheduler
  82. :type port: str
  83. :param sched_priv: 'sched_priv' (full path) for the scheduler
  84. :type sched_priv: str
  85. :param sched_log: 'sched_log' (full path) for the scheduler
  86. :type sched_log: str
  87. :param log_filter: log filter value for the scheduler
  88. :type log_filter: int
  89. """
  90. a = {'partition': partition,
  91. 'sched_host': self.server.hostname,
  92. 'sched_port': port}
  93. if sched_priv is not None:
  94. a['sched_priv'] = sched_priv
  95. if sched_log is not None:
  96. a['sched_log'] = sched_log
  97. self.server.manager(MGR_CMD_CREATE, SCHED, a, id=sched_id)
  98. if 'sched_priv' in a:
  99. sched_dir = os.path.dirname(sched_priv)
  100. self.scheds[sched_id].create_scheduler(sched_dir)
  101. self.scheds[sched_id].start(sched_dir)
  102. else:
  103. self.scheds[sched_id].create_scheduler()
  104. self.scheds[sched_id].start()
  105. self.server.manager(MGR_CMD_SET, SCHED,
  106. {'scheduling': 'True'}, id=sched_id, expect=True)
  107. def setup_queues_nodes(self, num_partitions):
  108. """
  109. Given a no. of partitions, create equal no. of associated queues
  110. and nodes
  111. :param num_partitions: number of partitions
  112. :type num_partitions: int
  113. :return a tuple of lists of queue and node ids:
  114. ([q1, q1, ..], [n1, n2, ..])
  115. """
  116. queues = []
  117. nodes = []
  118. a_q = {"queue_type": "execution",
  119. "started": "True",
  120. "enabled": "True"}
  121. a_n = {"resources_available.ncpus": 2}
  122. self.server.create_vnodes("vnode", a_n, (num_partitions + 1),
  123. self.mom)
  124. for i in range(num_partitions):
  125. partition_id = "P" + str(i + 1)
  126. # Create queue i + 1 with partition i + 1
  127. id_q = "wq" + str(i + 1)
  128. queues.append(id_q)
  129. a_q["partition"] = partition_id
  130. self.server.manager(MGR_CMD_CREATE, QUEUE, a_q, id=id_q)
  131. # Set the partition i + 1 on node i
  132. id_n = "vnode[" + str(i) + "]"
  133. nodes.append(id_n)
  134. a = {"partition": partition_id}
  135. self.server.manager(MGR_CMD_SET, NODE, a, id=id_n, expect=True)
  136. return (queues, nodes)
  137. def take_snapshot(self, acct_logs=None, daemon_logs=None,
  138. obfuscate=None, with_sudo=True, hosts=None):
  139. """
  140. Take a snapshot using pbs_snapshot command
  141. :param acct_logs: Number of accounting logs to capture
  142. :type acct_logs: int
  143. :param daemon_logs: Number of daemon logs to capture
  144. :type daemon_logs: int
  145. :param obfuscate: Obfuscate information?
  146. :type obfuscate: bool
  147. :param with_sudo: use the --with-sudo option?
  148. :type with_sudo: bool
  149. :param hosts: list of additional hosts to capture information from
  150. :type list
  151. :return a tuple of name of tarball and snapshot directory captured:
  152. (tarfile, snapdir)
  153. """
  154. if self.pbs_snapshot_path is None:
  155. self.skip_test("pbs_snapshot not found")
  156. snap_cmd = [self.pbs_snapshot_path, "-o", self.parent_dir]
  157. if acct_logs is not None:
  158. snap_cmd.append("--accounting-logs=" + str(acct_logs))
  159. if daemon_logs is not None:
  160. snap_cmd.append("--daemon-logs=" + str(daemon_logs))
  161. if obfuscate:
  162. snap_cmd.append("--obfuscate")
  163. if with_sudo:
  164. snap_cmd.append("--with-sudo")
  165. if hosts is not None:
  166. hosts_str = ",".join(hosts)
  167. snap_cmd.append("--additional-hosts=" + hosts_str)
  168. ret = self.du.run_cmd(cmd=snap_cmd, logerr=False)
  169. self.assertEquals(ret['rc'], 0)
  170. # Get the name of the tarball that was created
  171. # pbs_snapshot prints to stdout only the following:
  172. # "Snapshot available at: <path to tarball>"
  173. self.assertTrue(len(ret['out']) > 0)
  174. snap_out = ret['out'][0]
  175. output_tar = snap_out.split(":")[1]
  176. output_tar = output_tar.strip()
  177. # Check that the output tarball was created
  178. self.assertTrue(os.path.isfile(output_tar),
  179. "%s not found" % (output_tar))
  180. # Unwrap the tarball
  181. tar = tarfile.open(output_tar)
  182. tar.extractall(path=self.parent_dir)
  183. tar.close()
  184. # snapshot directory name = <snapshot>.tgz[:-4]
  185. snap_dir = output_tar[:-4]
  186. # Check that the directory exists
  187. self.assertTrue(os.path.isdir(snap_dir))
  188. self.snapdirs.append(snap_dir)
  189. self.snaptars.append(output_tar)
  190. return (output_tar, snap_dir)
  191. def test_capture_server(self):
  192. """
  193. Test the 'capture_server' interface of PBSSnapUtils
  194. """
  195. # Set something on the server so we can match it later
  196. job_hist_duration = "12:00:00"
  197. attr_list = {"job_history_enable": "True",
  198. "job_history_duration": job_hist_duration}
  199. self.server.manager(MGR_CMD_SET, SERVER, attr_list)
  200. num_daemon_logs = 2
  201. num_acct_logs = 5
  202. with PBSSnapUtils(out_dir=self.parent_dir, acct_logs=num_acct_logs,
  203. daemon_logs=num_daemon_logs,
  204. with_sudo=True) as snap_obj:
  205. snap_dir = snap_obj.capture_server(True, True)
  206. # Go through the snapshot and perform certain checks
  207. # Check 1: the snapshot exists
  208. self.assertTrue(os.path.isdir(snap_dir))
  209. # Check 2: all directories except the 'server' directory have no
  210. # files
  211. svr_fullpath = os.path.join(snap_dir, "server")
  212. for root, _, files in os.walk(snap_dir):
  213. for filename in files:
  214. file_fullpath = os.path.join(root, filename)
  215. # Find the common paths between 'server' & the file
  216. common_path = os.path.commonprefix([file_fullpath,
  217. svr_fullpath])
  218. self.assertEquals(os.path.basename(common_path), "server")
  219. # Check 3: qstat_Bf.out exists
  220. qstat_bf_out = os.path.join(snap_obj.snapdir, QSTAT_BF_PATH)
  221. self.assertTrue(os.path.isfile(qstat_bf_out))
  222. # Check 4: qstat_Bf.out has 'job_history_duration' set to 24:00:00
  223. with open(qstat_bf_out, "r") as fd:
  224. for line in fd:
  225. if "job_history_duration" in line:
  226. # Remove whitespaces
  227. line = "".join(line.split())
  228. # Split it up by '='
  229. key_val = line.split("=")
  230. self.assertEquals(key_val[1], job_hist_duration)
  231. # Cleanup
  232. if os.path.isdir(snap_dir):
  233. self.du.rm(path=snap_dir, recursive=True, force=True)
  234. def test_capture_all(self):
  235. """
  236. Test the 'capture_all' interface of PBSSnapUtils
  237. WARNING: Assumes that the test is being run on type - 1 PBS install
  238. """
  239. num_daemon_logs = 2
  240. num_acct_logs = 5
  241. # Check that all PBS daemons are up and running
  242. all_daemons_up = self.server.isUp()
  243. all_daemons_up = all_daemons_up and self.mom.isUp()
  244. all_daemons_up = all_daemons_up and self.comm.isUp()
  245. all_daemons_up = all_daemons_up and self.scheduler.isUp()
  246. if not all_daemons_up:
  247. # Skip the test
  248. self.skipTest("Type 1 installation not present or " +
  249. "all daemons are not running")
  250. with PBSSnapUtils(out_dir=self.parent_dir, acct_logs=num_acct_logs,
  251. daemon_logs=num_daemon_logs,
  252. with_sudo=True) as snap_obj:
  253. snap_dir = snap_obj.capture_all()
  254. snap_obj.finalize()
  255. # Test that all the expected information has been captured
  256. # PBSSnapUtils has various dictionaries which store metadata
  257. # for various objects. Create a list of these dicts
  258. all_info = [snap_obj.server_info, snap_obj.job_info,
  259. snap_obj.node_info, snap_obj.comm_info,
  260. snap_obj.hook_info, snap_obj.sched_info,
  261. snap_obj.resv_info, snap_obj.core_info,
  262. snap_obj.sys_info]
  263. skip_list = [ACCT_LOGS, QMGR_LPBSHOOK_OUT, "reservation", "job",
  264. QMGR_PR_OUT, PG_LOGS, "core_file_bt",
  265. "pbs_snapshot.log"]
  266. platform = self.du.get_platform()
  267. if not platform.startswith("linux"):
  268. skip_list.extend([ETC_HOSTS, ETC_NSSWITCH_CONF, LSOF_PBS_OUT,
  269. VMSTAT_OUT, DF_H_OUT, DMESG_OUT])
  270. for item_info in all_info:
  271. for key, info in item_info.iteritems():
  272. info_path = info[0]
  273. if info_path is None:
  274. continue
  275. # Check if we should skip checking this info
  276. skip_item = False
  277. for item in skip_list:
  278. if isinstance(item, int):
  279. if item == key:
  280. skip_item = True
  281. break
  282. else:
  283. if item in info_path:
  284. skip_item = True
  285. break
  286. if skip_item:
  287. continue
  288. # Check if this information was captured
  289. info_full_path = os.path.join(snap_dir, info_path)
  290. self.assertTrue(os.path.exists(info_full_path),
  291. msg=info_full_path + " was not captured")
  292. # Cleanup
  293. if os.path.isdir(snap_dir):
  294. self.du.rm(path=snap_dir, recursive=True, force=True)
  295. def test_capture_pbs_logs(self):
  296. """
  297. Test the 'capture_pbs_logs' interface of PBSSnapUtils
  298. """
  299. num_daemon_logs = 2
  300. num_acct_logs = 5
  301. # Check which PBS daemons are up on this machine.
  302. # We'll only check for logs from the daemons which were up
  303. # when the snapshot was taken.
  304. server_up = self.server.isUp()
  305. mom_up = self.mom.isUp()
  306. comm_up = self.comm.isUp()
  307. sched_up = self.scheduler.isUp()
  308. if not (server_up or mom_up or comm_up or sched_up):
  309. # Skip the test
  310. self.skipTest("No PBSPro daemons found on the system," +
  311. " skipping the test")
  312. with PBSSnapUtils(out_dir=self.parent_dir, acct_logs=num_acct_logs,
  313. daemon_logs=num_daemon_logs,
  314. with_sudo=True) as snap_obj:
  315. snap_dir = snap_obj.capture_pbs_logs()
  316. # Perform some checks
  317. # Check that the snapshot exists
  318. self.assertTrue(os.path.isdir(snap_dir))
  319. if server_up:
  320. # Check that 'server_logs' were captured
  321. log_path = os.path.join(snap_dir, SVR_LOGS_PATH)
  322. self.assertTrue(os.path.isdir(log_path))
  323. # Check that 'accounting_logs' were captured
  324. log_path = os.path.join(snap_dir, ACCT_LOGS_PATH)
  325. self.assertTrue(os.path.isdir(log_path))
  326. if mom_up:
  327. # Check that 'mom_logs' were captured
  328. log_path = os.path.join(snap_dir, MOM_LOGS_PATH)
  329. self.assertTrue(os.path.isdir(log_path))
  330. if comm_up:
  331. # Check that 'comm_logs' were captured
  332. log_path = os.path.join(snap_dir, COMM_LOGS_PATH)
  333. self.assertTrue(os.path.isdir(log_path))
  334. if sched_up:
  335. # Check that 'sched_logs' were captured
  336. log_path = os.path.join(snap_dir, DFLT_SCHED_LOGS_PATH)
  337. self.assertTrue(os.path.isdir(log_path))
  338. if os.path.isdir(snap_dir):
  339. self.du.rm(path=snap_dir, recursive=True, force=True)
  340. def test_snapshot_basic(self):
  341. """
  342. Test capturing a snapshot via the pbs_snapshot program
  343. """
  344. if self.pbs_snapshot_path is None:
  345. self.skip_test("pbs_snapshot not found")
  346. output_tar, _ = self.take_snapshot()
  347. # Check that the output tarball was created
  348. self.assertTrue(os.path.isfile(output_tar))
  349. def test_snapshot_without_logs(self):
  350. """
  351. Test capturing a snapshot via the pbs_snapshot program
  352. Capture no logs
  353. """
  354. if self.pbs_snapshot_path is None:
  355. self.skip_test("pbs_snapshot not found")
  356. (_, snap_dir) = self.take_snapshot(0, 0)
  357. # Check that 'server_logs' were not captured
  358. log_path = os.path.join(snap_dir, SVR_LOGS_PATH)
  359. self.assertTrue(not os.path.isdir(log_path))
  360. # Check that 'mom_logs' were not captured
  361. log_path = os.path.join(snap_dir, MOM_LOGS_PATH)
  362. self.assertTrue(not os.path.isdir(log_path))
  363. # Check that 'comm_logs' were not captured
  364. log_path = os.path.join(snap_dir, COMM_LOGS_PATH)
  365. self.assertTrue(not os.path.isdir(log_path))
  366. # Check that 'sched_logs' were not captured
  367. log_path = os.path.join(snap_dir, DFLT_SCHED_LOGS_PATH)
  368. self.assertTrue(not os.path.isdir(log_path))
  369. # Check that 'accounting_logs' were not captured
  370. log_path = os.path.join(snap_dir, ACCT_LOGS_PATH)
  371. self.assertTrue(not os.path.isdir(log_path))
  372. def test_obfuscate_resv_user_groups(self):
  373. """
  374. Test obfuscation of user & group related attributes while capturing
  375. snapshots via pbs_snapshot
  376. """
  377. if self.pbs_snapshot_path is None:
  378. self.skip_test("pbs_snapshot not found")
  379. now = int(time.time())
  380. # Let's submit a reservation with Authorized_Users and
  381. # Authorized_Groups set
  382. attribs = {ATTR_auth_u: TEST_USER1, ATTR_auth_g: TSTGRP0,
  383. ATTR_l + ".ncpus": 2, 'reserve_start': now + 25,
  384. 'reserve_end': now + 45}
  385. resv_obj = Reservation(attrs=attribs)
  386. resv_id = self.server.submit(resv_obj)
  387. attribs = {'reserve_state': (MATCH_RE, 'RESV_CONFIRMED|2')}
  388. self.server.expect(RESV, attribs, id=resv_id)
  389. # Now, take a snapshot with --obfuscate
  390. (_, snap_dir) = self.take_snapshot(0, 0, True)
  391. # Make sure that the pbs_rstat -f output captured doesn't have the
  392. # Authorized user and group names
  393. pbsrstat_path = os.path.join(snap_dir, PBS_RSTAT_F_PATH)
  394. self.assertTrue(os.path.isfile(pbsrstat_path))
  395. with open(pbsrstat_path, "r") as rstatfd:
  396. all_content = rstatfd.read()
  397. self.assertFalse(str(TEST_USER1) in all_content)
  398. self.assertFalse(str(TSTGRP0) in all_content)
  399. def test_multisched_support(self):
  400. """
  401. Test that pbs_snapshot can capture details of all schedulers
  402. """
  403. if self.pbs_snapshot_path is None:
  404. self.skip_test("pbs_snapshot not found")
  405. # Setup 3 schedulers
  406. sched_ids = ["sc1", "sc2", "sc3", "default"]
  407. self.setup_sc(sched_ids[0], "P1", "15050")
  408. self.setup_sc(sched_ids[1], "P2", "15051")
  409. # Setup scheduler at non-default location
  410. dir_path = os.path.join(os.sep, 'var', 'spool', 'pbs', 'sched_dir')
  411. if not os.path.exists(dir_path):
  412. self.du.mkdir(path=dir_path, sudo=True)
  413. sched_priv = os.path.join(dir_path, 'sched_priv_sc3')
  414. sched_log = os.path.join(dir_path, 'sched_logs_sc3')
  415. self.setup_sc(sched_ids[2], "P3", "15052", sched_priv, sched_log)
  416. # Add 3 partitions, each associated with a queue and a node
  417. (q_ids, _) = self.setup_queues_nodes(3)
  418. # Submit some jobs to fill the system up and get the multiple
  419. # schedulers busy
  420. for q_id in q_ids:
  421. for _ in range(2):
  422. attr = {"queue": q_id, "Resource_List.ncpus": "1"}
  423. j = Job(TEST_USER1, attrs=attr)
  424. self.server.submit(j)
  425. # Capture a snapshot of the system with multiple schedulers
  426. (_, snapdir) = self.take_snapshot()
  427. # Check that sched priv and sched logs for all schedulers was captured
  428. for sched_id in sched_ids:
  429. if (sched_id == "default"):
  430. schedi_priv = os.path.join(snapdir, DFLT_SCHED_PRIV_PATH)
  431. schedi_logs = os.path.join(snapdir, DFLT_SCHED_LOGS_PATH)
  432. else:
  433. schedi_priv = os.path.join(snapdir, "sched_priv_" + sched_id)
  434. schedi_logs = os.path.join(snapdir, "sched_logs_" + sched_id)
  435. self.assertTrue(os.path.isdir(schedi_priv))
  436. self.assertTrue(os.path.isdir(schedi_logs))
  437. # Make sure that these directories are not empty
  438. self.assertTrue(len(os.listdir(schedi_priv)) > 0)
  439. self.assertTrue(len(os.listdir(schedi_logs)) > 0)
  440. # Check that qmgr -c "l sched" captured information about all scheds
  441. lschedpath = os.path.join(snapdir, QMGR_LSCHED_PATH)
  442. with open(lschedpath, "r") as fd:
  443. scheds_found = 0
  444. for line in fd:
  445. if line.startswith("Sched "):
  446. sched_id = line.split("Sched ")[1]
  447. sched_id = sched_id.strip()
  448. self.assertTrue(sched_id in sched_ids)
  449. scheds_found += 1
  450. self.assertEqual(scheds_found, 4)
  451. def test_snapshot_from_hook(self):
  452. """
  453. Test that pbs_snapshot can be called from inside a hook
  454. """
  455. logmsg = "pbs_snapshot was successfully run"
  456. hook_body = """
  457. import pbs
  458. import os
  459. import subprocess
  460. import time
  461. pbs_snap_exec = os.path.join(pbs.pbs_conf['PBS_EXEC'], "sbin", "pbs_snapshot")
  462. if not os.path.isfile(pbs_snap_exec):
  463. raise ValueError("pbs_snapshot executable not found")
  464. ref_time = time.time()
  465. snap_cmd = [pbs_snap_exec, "-o", "."]
  466. assert(not subprocess.call(snap_cmd))
  467. # Check that the snapshot was captured
  468. snapshot_found = False
  469. for filename in os.listdir("."):
  470. if filename.startswith("snapshot") and filename.endswith(".tgz"):
  471. # Make sure the mtime on this file is recent enough
  472. mtime_file = os.path.getmtime(filename)
  473. if mtime_file > ref_time:
  474. snapshot_found = True
  475. break
  476. assert(snapshot_found)
  477. pbs.logmsg(pbs.EVENT_DEBUG,"%s")
  478. """ % (logmsg)
  479. hook_name = "snapshothook"
  480. attr = {"event": "periodic", "freq": 5}
  481. rv = self.server.create_import_hook(hook_name, attr, hook_body,
  482. overwrite=True)
  483. self.assertTrue(rv)
  484. self.server.log_match(logmsg)
  485. def snapshot_multi_mom_basic(self, obfuscate=False):
  486. """
  487. Test capturing data from a multi-mom system
  488. :param obfuscate: take snapshot with --obfuscate?
  489. :type obfuscate: bool
  490. """
  491. # Skip test if number of moms is not equal to two
  492. if len(self.moms) != 2:
  493. self.skipTest("test requires atleast two moms as input, "
  494. "use -p moms=<mom 1>:<mom 2>")
  495. mom1 = self.moms.values()[0]
  496. mom2 = self.moms.values()[1]
  497. host1 = mom1.shortname
  498. host2 = mom2.shortname
  499. self.server.manager(MGR_CMD_DELETE, NODE, None, "")
  500. self.server.manager(MGR_CMD_CREATE, NODE, id=host1)
  501. self.server.manager(MGR_CMD_CREATE, NODE, id=host2)
  502. # Give the moms a chance to contact the server.
  503. self.server.expect(NODE, {'state': 'free'}, id=host1)
  504. self.server.expect(NODE, {'state': 'free'}, id=host2)
  505. # Capture a snapshot with details from the remote moms
  506. (_, snapdir) = self.take_snapshot(hosts=[host1, host2],
  507. obfuscate=obfuscate)
  508. # Check that snapshots for the 2 hosts were captured
  509. host1_outtar = os.path.join(snapdir, host1 + "_snapshot.tgz")
  510. host2_outtar = os.path.join(snapdir, host2 + "_snapshot.tgz")
  511. self.assertTrue(os.path.isfile(host1_outtar),
  512. "Failed to capture snapshot on %s" % (host1))
  513. self.assertTrue(os.path.isfile(host2_outtar),
  514. "Failed to capture snapshot on %s" % (host2))
  515. # Unwrap the host snapshots
  516. host1_snapdir = host1 + "_snapshot"
  517. host2_snapdir = host2 + "_snapshot"
  518. os.mkdir(host1_snapdir)
  519. self.snapdirs.append(host1_snapdir)
  520. os.mkdir(host2_snapdir)
  521. self.snapdirs.append(host2_snapdir)
  522. tar = tarfile.open(host1_outtar)
  523. tar.extractall(path=host1_snapdir)
  524. tar.close()
  525. tar = tarfile.open(host2_outtar)
  526. tar.extractall(path=host2_snapdir)
  527. tar.close()
  528. # Determine the name of the child snapshots
  529. snap1_path = self.du.listdir(path=host1_snapdir, fullpath=True)
  530. snap2_path = self.du.listdir(path=host2_snapdir, fullpath=True)
  531. snap1_path = snap1_path[0]
  532. snap2_path = snap2_path[0]
  533. # Check that at least pbs.conf was captured on all of these hosts
  534. self.assertTrue(os.path.isfile(os.path.join(snapdir, "pbs.conf")),
  535. "Main snapshot didn't capture all expected"
  536. " information")
  537. self.assertTrue(os.path.isfile(os.path.join(snap1_path, "pbs.conf")),
  538. "%s snapshot didn't capture all expected"
  539. " information" % (host1))
  540. self.assertTrue(os.path.isfile(os.path.join(snap2_path, "pbs.conf")),
  541. "%s snapshot didn't capture all expected"
  542. " information" % (host2))
  543. def test_multi_mom_basic(self):
  544. """
  545. Test running pbs_snapshot on a multi-mom setup
  546. """
  547. self.snapshot_multi_mom_basic()
  548. def test_multi_mom_basic_obfuscate(self):
  549. """
  550. Test running pbs_snapshot on a multi-mom setup with obfuscation
  551. """
  552. self.snapshot_multi_mom_basic(obfuscate=True)
  553. def test_no_sudo(self):
  554. """
  555. Test that running pbs_snapshot without sudo doesn't fail
  556. """
  557. output_tar, _ = self.take_snapshot(with_sudo=False)
  558. # Check that the output tarball was created
  559. self.assertTrue(os.path.isfile(output_tar))
  560. def test_snapshot_json(self):
  561. """
  562. Test that pbs_snapshot captures job and vnode info in json
  563. """
  564. _, snap_dir = self.take_snapshot()
  565. # Verify that qstat json was captured
  566. jsonpath = os.path.join(snap_dir, QSTAT_F_JSON_PATH)
  567. self.assertTrue(os.path.isfile(jsonpath))
  568. with open(jsonpath, "r") as fd:
  569. json.load(fd) # this will fail if file is not a valid json
  570. # Verify that pbsnodes json was captured
  571. jsonpath = os.path.join(snap_dir, PBSNODES_AVFJSON_PATH)
  572. self.assertTrue(os.path.isfile(jsonpath))
  573. with open(jsonpath, "r") as fd:
  574. json.load(fd)
  575. @classmethod
  576. def tearDownClass(self):
  577. # Delete the snapshot directories and tarballs created
  578. for snap_dir in self.snapdirs:
  579. self.du.rm(path=snap_dir, recursive=True, force=True)
  580. for snap_tar in self.snaptars:
  581. self.du.rm(path=snap_tar, sudo=True, force=True)
  582. TestFunctional.tearDownClass()