123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319 |
- #!/usr/bin/env python
- # coding: utf-8
- # Copyright (C) 1994-2018 Altair Engineering, Inc.
- # For more information, contact Altair at www.altair.com.
- #
- # This file is part of the PBS Professional ("PBS Pro") software.
- #
- # Open Source License Information:
- #
- # PBS Pro is free software. You can redistribute it and/or modify it under the
- # terms of the GNU Affero General Public License as published by the Free
- # Software Foundation, either version 3 of the License, or (at your option) any
- # later version.
- #
- # PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
- # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- # FOR A PARTICULAR PURPOSE.
- # See the GNU Affero General Public License for more details.
- #
- # You should have received a copy of the GNU Affero General Public License
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
- #
- # Commercial License Information:
- #
- # For a copy of the commercial license terms and conditions,
- # go to: (http://www.pbspro.com/UserArea/agreement.html)
- # or contact the Altair Legal Department.
- #
- # Altair’s dual-license business model allows companies, individuals, and
- # organizations to create proprietary derivative works of PBS Pro and
- # distribute them - whether embedded or bundled with other software -
- # under a commercial license agreement.
- #
- # Use of Altair’s trademarks, including but not limited to "PBS™",
- # "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
- # trademark licensing policies.
- import os
- import sys
- import getopt
- import errno
- import logging
- import ptl
- import time
- import tarfile
- from getopt import GetoptError
- from threading import Thread
- from ptl.lib.pbs_testlib import PtlConfig
- from ptl.utils.pbs_snaputils import PBSSnapUtils
- from ptl.utils.pbs_cliutils import CliUtils
- from ptl.utils.pbs_dshutils import DshUtils
- def trap_exceptions(etype, value, tb):
- """
- Trap SIGINT and SIGPIPE
- """
- # This is done so that any exceptions created by this method itself
- # are caught by the default excepthook to prevent endless recursion
- sys.excepthook = sys.__excepthook__
- if issubclass(etype, KeyboardInterrupt):
- pass
- elif issubclass(etype, IOError) and value.errno == errno.EPIPE:
- pass
- else:
- sys.__excepthook__(etype, value, tb)
- # Set sys.excepthook back to trap_exceptions to catch future exceptions
- sys.excepthook = trap_exceptions
- sys.excepthook = trap_exceptions
- def usage():
- msg = """
- Usage: pbs_snapshot -o <path to output tar file> [OPTION]
- Take snapshot of a PBS system and optionally capture logs for diagnostics
- -H <hostname> primary hostname to operate on
- Defaults to local host
- -l <loglevel> set log level to one of INFO, INFOCLI,
- INFOCLI2, DEBUG, DEBUG2, WARNING, ERROR
- or FATAL
- -h, --help display this usage message
- --daemon-logs=<num days> number of daemon logs to collect
- --accounting-logs=<num days> number of accounting logs to collect
- --additional-hosts=<hostname> collect data from additional hosts
- 'hostname' is a comma separated list
- --map=<file> file to store the map of obfuscated data
- --obfuscate obfuscates sensitive data
- --with-sudo Uses sudo to capture privileged data
- --version print version number and exit
- """
- print msg
- def childsnap_thread(logger, host):
- """
- Thread routine for each child snapshot being run on a remote host
- :param logger - Logging object
- :type logger - logging.Logger
- :param host - the hostname for remote host
- :type host - str
- """
- logger.info("Capturing snapshot from host %s" % (host))
- du = DshUtils()
- # Get path to pbs_snapshot on remote host
- host_pbsconf = du.parse_pbs_config(hostname=host)
- try:
- pbs_exec_path = host_pbsconf["PBS_EXEC"]
- except KeyError:
- logger.error("Couldn't find PBS_EXEC on host %s"
- ", won't capture snapshot on this host" % (
- host))
- return
- host_pbssnappath = os.path.join(pbs_exec_path, "sbin",
- "pbs_snapshot")
- # Create a directory on the remote host with a unique name
- # We will create the snapshot here
- timestamp = str(int(time.time()))
- snap_home = "host_" + timestamp
- du.mkdir(hostname=host, path=snap_home)
- # Run pbs_snapshot on the remote host
- cmd = [host_pbssnappath, "-o", snap_home,
- "--daemon-logs=" + str(daemon_logs),
- "--accounting-logs=" + str(acct_logs)]
- if anonymize:
- cmd.extend(["--obfuscate", "--map=" + map_file])
- if with_sudo:
- cmd.append("--with-sudo")
- ret = du.run_cmd(hosts=host, cmd=cmd, logerr=False)
- if ret['rc'] != 0:
- logger.error("Error capturing snapshot from host %s" % (host))
- print ret['err']
- return
- # Get the snapshot tar filename from stdout
- child_stdout = ret['out'][-1]
- snaptarname = child_stdout.split("Snapshot available at: ")[1]
- # Copy over the snapshot tar file as <hostname>_snapshot.tgz
- dest_path = os.path.join(out_dir, host + "_snapshot.tgz")
- src_path = host + ":" + snaptarname
- ret = du.run_copy(src=src_path, dest=dest_path)
- if ret['rc'] != 0:
- logger.error("Error copying child snapshot from host %s" % (host))
- # Copy over map file if any as 'host_<map filename>'
- if map_file is not None:
- dest_path = os.path.join(out_dir, host + "_" + map_file)
- src_path = os.path.join(snap_home, map_file)
- src_path = host + ":" + src_path
- ret = du.run_copy(src=src_path, dest=dest_path)
- if ret['rc'] != 0:
- logger.error("Error copying map file from host %s" % (host))
- # Delete the snapshot home from remote host
- du.rm(hostname=host, path=snap_home, recursive=True, force=True)
- if __name__ == '__main__':
- # Arguments to PBSSnapUtils
- out_dir = None
- primary_host = None
- log_level = "INFOCLI2"
- acct_logs = 30 # Capture 30 days of accounting logs by default
- daemon_logs = 5 # Capture 5 days of daemon logs by default
- additional_hosts = None
- map_file = None
- anonymize = False
- log_file = "pbs_snapshot.log"
- with_sudo = False
- PtlConfig()
- # Parse the options provided to pbs_snapshot
- try:
- sopt = "d:H:l:o:h"
- lopt = ["accounting-logs=", "daemon-logs=", "help",
- "additional-hosts=", "map=", "obfuscate", "with-sudo",
- "version"]
- opts, args = getopt.getopt(sys.argv[1:], sopt, lopt)
- except GetoptError:
- usage()
- sys.exit(1)
- for o, val in opts:
- if o == "-o":
- out_dir = val
- elif o == "-H":
- primary_host = val
- elif o == "-l":
- log_level = val
- elif o == "-h" or o == "--help":
- usage()
- sys.exit(0)
- elif o == "--accounting-logs":
- try:
- acct_logs = int(val)
- except ValueError:
- raise ValueError("Invalid value for --accounting-logs" +
- "option, should be an integer")
- elif o == "--daemon-logs":
- try:
- daemon_logs = int(val)
- except ValueError:
- raise ValueError("Invalid value for --daemon-logs" +
- "option, should be an integer")
- elif o == "--additional-hosts":
- additional_hosts = val
- elif o == "--map":
- map_file = val
- elif o == "--obfuscate":
- anonymize = True
- elif o == "--with-sudo":
- with_sudo = True
- elif o == "--version":
- print ptl.__version__
- sys.exit(0)
- else:
- sys.stderr.write("Unrecognized option")
- usage()
- sys.exit(1)
- # -o is a mandatory option, so make sure that it was provided
- if out_dir is None:
- sys.stderr.write("-o option not provided")
- usage()
- sys.exit(1)
- elif not os.path.isdir(out_dir):
- sys.stderr.write("-o path should exist,"
- " this is where the snapshot is captured")
- usage()
- sys.exit(1)
- fmt = '%(asctime)-15s %(levelname)-8s %(message)s'
- level_int = CliUtils.get_logging_level(log_level)
- log_path = os.path.join(out_dir, log_file)
- logging.basicConfig(filename=log_path, filemode='w+',
- level=level_int, format=fmt)
- stream_hdlr = logging.StreamHandler()
- stream_hdlr.setLevel(level_int)
- stream_hdlr.setFormatter(logging.Formatter(fmt))
- ptl_logger = logging.getLogger('ptl')
- ptl_logger.addHandler(stream_hdlr)
- ptl_logger.setLevel(level_int)
- if anonymize is True:
- # find the parent directory of the snapshot
- # This will be used to store the map file
- out_abspath = os.path.abspath(out_dir)
- if map_file is None:
- map_file = os.path.join(out_abspath, "obfuscate.map")
- if additional_hosts is not None:
- du = DshUtils()
- # Run child pbs_snapshot commands on those hosts
- hostnames = additional_hosts.split(",")
- childsnap_threads = {}
- for host in hostnames:
- thread = Thread(target=childsnap_thread, args=(
- ptl_logger, host))
- thread.start()
- childsnap_threads[host] = thread
- # Capture snapshot on the main host in the meantime
- with PBSSnapUtils(out_dir, primary_host=primary_host,
- acct_logs=acct_logs,
- daemon_logs=daemon_logs, map_file=map_file,
- anonymize=anonymize, create_tar=False,
- log_path=log_path,
- with_sudo=with_sudo) as snap_utils:
- main_snap = snap_utils.capture_all()
- # Let's reconcile the child snapshots
- for host, thread in childsnap_threads.iteritems():
- thread.join()
- host_snappath = os.path.join(out_dir, host + "_snapshot.tgz")
- if os.path.isfile(host_snappath):
- # Move the tar file to the main snapshot
- du.run_copy(src=host_snappath, dest=main_snap)
- # Remove the tar file
- du.rm(host_snappath, force=True)
- # Finally, create a tar of the whole snapshot
- outtar = main_snap + ".tgz"
- with tarfile.open(outtar, "w:gz") as tar:
- tar.add(main_snap, arcname=os.path.basename(main_snap))
- # Delete the snapshot directory itself
- du.rm(path=main_snap, recursive=True, force=True)
- else:
- # No additional hosts to capture
- with PBSSnapUtils(out_dir, primary_host=primary_host,
- acct_logs=acct_logs,
- daemon_logs=daemon_logs, map_file=map_file,
- anonymize=anonymize, create_tar=True,
- log_path=log_path,
- with_sudo=with_sudo) as snap_utils:
- outtar = snap_utils.capture_all()
- if outtar is not None:
- print "Snapshot available at: " + outtar
|