#!/usr/bin/env python # coding: utf-8 # Copyright (C) 1994-2018 Altair Engineering, Inc. # For more information, contact Altair at www.altair.com. # # This file is part of the PBS Professional ("PBS Pro") software. # # Open Source License Information: # # PBS Pro is free software. You can redistribute it and/or modify it under the # terms of the GNU Affero General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) any # later version. # # PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. # See the GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # # Commercial License Information: # # For a copy of the commercial license terms and conditions, # go to: (http://www.pbspro.com/UserArea/agreement.html) # or contact the Altair Legal Department. # # Altair’s dual-license business model allows companies, individuals, and # organizations to create proprietary derivative works of PBS Pro and # distribute them - whether embedded or bundled with other software - # under a commercial license agreement. # # Use of Altair’s trademarks, including but not limited to "PBS™", # "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's # trademark licensing policies. import os import sys import getopt import errno import logging import ptl import time import tarfile from getopt import GetoptError from threading import Thread from ptl.lib.pbs_testlib import PtlConfig from ptl.utils.pbs_snaputils import PBSSnapUtils from ptl.utils.pbs_cliutils import CliUtils from ptl.utils.pbs_dshutils import DshUtils def trap_exceptions(etype, value, tb): """ Trap SIGINT and SIGPIPE """ # This is done so that any exceptions created by this method itself # are caught by the default excepthook to prevent endless recursion sys.excepthook = sys.__excepthook__ if issubclass(etype, KeyboardInterrupt): pass elif issubclass(etype, IOError) and value.errno == errno.EPIPE: pass else: sys.__excepthook__(etype, value, tb) # Set sys.excepthook back to trap_exceptions to catch future exceptions sys.excepthook = trap_exceptions sys.excepthook = trap_exceptions def usage(): msg = """ Usage: pbs_snapshot -o [OPTION] Take snapshot of a PBS system and optionally capture logs for diagnostics -H primary hostname to operate on Defaults to local host -l set log level to one of INFO, INFOCLI, INFOCLI2, DEBUG, DEBUG2, WARNING, ERROR or FATAL -h, --help display this usage message --daemon-logs= number of daemon logs to collect --accounting-logs= number of accounting logs to collect --additional-hosts= collect data from additional hosts 'hostname' is a comma separated list --map= file to store the map of obfuscated data --obfuscate obfuscates sensitive data --with-sudo Uses sudo to capture privileged data --version print version number and exit """ print msg def childsnap_thread(logger, host): """ Thread routine for each child snapshot being run on a remote host :param logger - Logging object :type logger - logging.Logger :param host - the hostname for remote host :type host - str """ logger.info("Capturing snapshot from host %s" % (host)) du = DshUtils() # Get path to pbs_snapshot on remote host host_pbsconf = du.parse_pbs_config(hostname=host) try: pbs_exec_path = host_pbsconf["PBS_EXEC"] except KeyError: logger.error("Couldn't find PBS_EXEC on host %s" ", won't capture snapshot on this host" % ( host)) return host_pbssnappath = os.path.join(pbs_exec_path, "sbin", "pbs_snapshot") # Create a directory on the remote host with a unique name # We will create the snapshot here timestamp = str(int(time.time())) snap_home = "host_" + timestamp du.mkdir(hostname=host, path=snap_home) # Run pbs_snapshot on the remote host cmd = [host_pbssnappath, "-o", snap_home, "--daemon-logs=" + str(daemon_logs), "--accounting-logs=" + str(acct_logs)] if anonymize: cmd.extend(["--obfuscate", "--map=" + map_file]) if with_sudo: cmd.append("--with-sudo") ret = du.run_cmd(hosts=host, cmd=cmd, logerr=False) if ret['rc'] != 0: logger.error("Error capturing snapshot from host %s" % (host)) print ret['err'] return # Get the snapshot tar filename from stdout child_stdout = ret['out'][-1] snaptarname = child_stdout.split("Snapshot available at: ")[1] # Copy over the snapshot tar file as _snapshot.tgz dest_path = os.path.join(out_dir, host + "_snapshot.tgz") src_path = host + ":" + snaptarname ret = du.run_copy(src=src_path, dest=dest_path) if ret['rc'] != 0: logger.error("Error copying child snapshot from host %s" % (host)) # Copy over map file if any as 'host_' if map_file is not None: dest_path = os.path.join(out_dir, host + "_" + map_file) src_path = os.path.join(snap_home, map_file) src_path = host + ":" + src_path ret = du.run_copy(src=src_path, dest=dest_path) if ret['rc'] != 0: logger.error("Error copying map file from host %s" % (host)) # Delete the snapshot home from remote host du.rm(hostname=host, path=snap_home, recursive=True, force=True) if __name__ == '__main__': # Arguments to PBSSnapUtils out_dir = None primary_host = None log_level = "INFOCLI2" acct_logs = 30 # Capture 30 days of accounting logs by default daemon_logs = 5 # Capture 5 days of daemon logs by default additional_hosts = None map_file = None anonymize = False log_file = "pbs_snapshot.log" with_sudo = False PtlConfig() # Parse the options provided to pbs_snapshot try: sopt = "d:H:l:o:h" lopt = ["accounting-logs=", "daemon-logs=", "help", "additional-hosts=", "map=", "obfuscate", "with-sudo", "version"] opts, args = getopt.getopt(sys.argv[1:], sopt, lopt) except GetoptError: usage() sys.exit(1) for o, val in opts: if o == "-o": out_dir = val elif o == "-H": primary_host = val elif o == "-l": log_level = val elif o == "-h" or o == "--help": usage() sys.exit(0) elif o == "--accounting-logs": try: acct_logs = int(val) except ValueError: raise ValueError("Invalid value for --accounting-logs" + "option, should be an integer") elif o == "--daemon-logs": try: daemon_logs = int(val) except ValueError: raise ValueError("Invalid value for --daemon-logs" + "option, should be an integer") elif o == "--additional-hosts": additional_hosts = val elif o == "--map": map_file = val elif o == "--obfuscate": anonymize = True elif o == "--with-sudo": with_sudo = True elif o == "--version": print ptl.__version__ sys.exit(0) else: sys.stderr.write("Unrecognized option") usage() sys.exit(1) # -o is a mandatory option, so make sure that it was provided if out_dir is None: sys.stderr.write("-o option not provided") usage() sys.exit(1) elif not os.path.isdir(out_dir): sys.stderr.write("-o path should exist," " this is where the snapshot is captured") usage() sys.exit(1) fmt = '%(asctime)-15s %(levelname)-8s %(message)s' level_int = CliUtils.get_logging_level(log_level) log_path = os.path.join(out_dir, log_file) logging.basicConfig(filename=log_path, filemode='w+', level=level_int, format=fmt) stream_hdlr = logging.StreamHandler() stream_hdlr.setLevel(level_int) stream_hdlr.setFormatter(logging.Formatter(fmt)) ptl_logger = logging.getLogger('ptl') ptl_logger.addHandler(stream_hdlr) ptl_logger.setLevel(level_int) if anonymize is True: # find the parent directory of the snapshot # This will be used to store the map file out_abspath = os.path.abspath(out_dir) if map_file is None: map_file = os.path.join(out_abspath, "obfuscate.map") if additional_hosts is not None: du = DshUtils() # Run child pbs_snapshot commands on those hosts hostnames = additional_hosts.split(",") childsnap_threads = {} for host in hostnames: thread = Thread(target=childsnap_thread, args=( ptl_logger, host)) thread.start() childsnap_threads[host] = thread # Capture snapshot on the main host in the meantime with PBSSnapUtils(out_dir, primary_host=primary_host, acct_logs=acct_logs, daemon_logs=daemon_logs, map_file=map_file, anonymize=anonymize, create_tar=False, log_path=log_path, with_sudo=with_sudo) as snap_utils: main_snap = snap_utils.capture_all() # Let's reconcile the child snapshots for host, thread in childsnap_threads.iteritems(): thread.join() host_snappath = os.path.join(out_dir, host + "_snapshot.tgz") if os.path.isfile(host_snappath): # Move the tar file to the main snapshot du.run_copy(src=host_snappath, dest=main_snap) # Remove the tar file du.rm(host_snappath, force=True) # Finally, create a tar of the whole snapshot outtar = main_snap + ".tgz" with tarfile.open(outtar, "w:gz") as tar: tar.add(main_snap, arcname=os.path.basename(main_snap)) # Delete the snapshot directory itself du.rm(path=main_snap, recursive=True, force=True) else: # No additional hosts to capture with PBSSnapUtils(out_dir, primary_host=primary_host, acct_logs=acct_logs, daemon_logs=daemon_logs, map_file=map_file, anonymize=anonymize, create_tar=True, log_path=log_path, with_sudo=with_sudo) as snap_utils: outtar = snap_utils.capture_all() if outtar is not None: print "Snapshot available at: " + outtar