# coding: utf-8
# Copyright (C) 1994-2018 Altair Engineering, Inc.
# For more information, contact Altair at www.altair.com.
#
# This file is part of the PBS Professional ("PBS Pro") software.
#
# Open Source License Information:
#
# PBS Pro is free software. You can redistribute it and/or modify it under the
# terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.
# See the GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
#
# Commercial License Information:
#
# For a copy of the commercial license terms and conditions,
# go to: (http://www.pbspro.com/UserArea/agreement.html)
# or contact the Altair Legal Department.
#
# Altair’s dual-license business model allows companies, individuals, and
# organizations to create proprietary derivative works of PBS Pro and
# distribute them - whether embedded or bundled with other software -
# under a commercial license agreement.
#
# Use of Altair’s trademarks, including but not limited to "PBS™",
# "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
# trademark licensing policies.
import os
import time
import tarfile
import logging
import socket
from subprocess import STDOUT
from ptl.lib.pbs_testlib import Server, Scheduler, SCHED
from ptl.lib.pbs_ifl_mock import *
from ptl.utils.pbs_dshutils import DshUtils
from ptl.utils.pbs_logutils import PBSLogUtils
from ptl.utils.pbs_anonutils import PBSAnonymizer
# Define an enum which is used to label various pieces of information
( # qstat outputs
QSTAT_B_OUT,
QSTAT_BF_OUT,
QSTAT_OUT,
QSTAT_F_OUT,
QSTAT_T_OUT,
QSTAT_TF_OUT,
QSTAT_X_OUT,
QSTAT_XF_OUT,
QSTAT_NS_OUT,
QSTAT_FX_DSV_OUT,
QSTAT_F_DSV_OUT,
QSTAT_F_JSON_OUT,
QSTAT_Q_OUT,
QSTAT_QF_OUT,
# qmgr outputs
QMGR_PS_OUT,
QMGR_PH_OUT,
QMGR_LPBSHOOK_OUT,
QMGR_LSCHED_OUT,
QMGR_PN_OUT,
QMGR_PR_OUT,
# pbsnodes outputs
PBSNODES_VA_OUT,
PBSNODES_A_OUT,
PBSNODES_AVSJ_OUT,
PBSNODES_ASJ_OUT,
PBSNODES_AVS_OUT,
PBSNODES_AS_OUT,
PBSNODES_AFDSV_OUT,
PBSNODES_AVFDSV_OUT,
PBSNODES_AVFJSON_OUT,
# pbs_rstat outputs
PBS_RSTAT_OUT,
PBS_RSTAT_F_OUT,
# PBS config related outputs
PBS_CONF,
PBS_PROBE_OUT,
PBS_HOSTN_OUT,
PBS_ENVIRONMENT,
# System related outputs
OS_INFO,
PROCESS_INFO,
LSOF_PBS_OUT,
ETC_HOSTS,
ETC_NSSWITCH_CONF,
VMSTAT_OUT,
DF_H_OUT,
DMESG_OUT,
PS_LEAF_OUT,
# Logs
ACCT_LOGS,
SVR_LOGS,
SCHED_LOGS,
MOM_LOGS,
PG_LOGS,
COMM_LOGS,
# Daemon priv directories
SVR_PRIV,
MOM_PRIV,
SCHED_PRIV,
# Core file information
CORE_SCHED,
CORE_SERVER,
CORE_MOM,
# Miscellaneous
CTIME) = range(57)
# Define paths to various files/directories with respect to the snapshot
# server/
SERVER_DIR = "server"
QSTAT_B_PATH = os.path.join(SERVER_DIR, "qstat_B.out")
QSTAT_BF_PATH = os.path.join(SERVER_DIR, "qstat_Bf.out")
QMGR_PS_PATH = os.path.join(SERVER_DIR, "qmgr_ps.out")
QSTAT_Q_PATH = os.path.join(SERVER_DIR, "qstat_Q.out")
QSTAT_QF_PATH = os.path.join(SERVER_DIR, "qstat_Qf.out")
QMGR_PR_PATH = os.path.join(SERVER_DIR, "qmgr_pr.out")
# server_priv/
SVR_PRIV_PATH = "server_priv"
ACCT_LOGS_PATH = os.path.join("server_priv", "accounting")
# server_logs/
SVR_LOGS_PATH = "server_logs"
# job/
JOB_DIR = "job"
QSTAT_PATH = os.path.join(JOB_DIR, "qstat.out")
QSTAT_F_PATH = os.path.join(JOB_DIR, "qstat_f.out")
QSTAT_T_PATH = os.path.join(JOB_DIR, "qstat_t.out")
QSTAT_TF_PATH = os.path.join(JOB_DIR, "qstat_tf.out")
QSTAT_X_PATH = os.path.join(JOB_DIR, "qstat_x.out")
QSTAT_XF_PATH = os.path.join(JOB_DIR, "qstat_xf.out")
QSTAT_NS_PATH = os.path.join(JOB_DIR, "qstat_ns.out")
QSTAT_FX_DSV_PATH = os.path.join(JOB_DIR, "qstat_fx_F_dsv.out")
QSTAT_F_DSV_PATH = os.path.join(JOB_DIR, "qstat_f_F_dsv.out")
QSTAT_F_JSON_PATH = os.path.join(JOB_DIR, "qstat_f_F_json.out")
# node/
NODE_DIR = "node"
PBSNODES_VA_PATH = os.path.join(NODE_DIR, "pbsnodes_va.out")
PBSNODES_A_PATH = os.path.join(NODE_DIR, "pbsnodes_a.out")
PBSNODES_AVSJ_PATH = os.path.join(NODE_DIR, "pbsnodes_avSj.out")
PBSNODES_ASJ_PATH = os.path.join(NODE_DIR, "pbsnodes_aSj.out")
PBSNODES_AVS_PATH = os.path.join(NODE_DIR, "pbsnodes_avS.out")
PBSNODES_AS_PATH = os.path.join(NODE_DIR, "pbsnodes_aS.out")
PBSNODES_AFDSV_PATH = os.path.join(NODE_DIR, "pbsnodes_aFdsv.out")
PBSNODES_AVFDSV_PATH = os.path.join(NODE_DIR, "pbsnodes_avFdsv.out")
PBSNODES_AVFJSON_PATH = os.path.join(NODE_DIR, "pbsnodes_avFjson.out")
QMGR_PN_PATH = os.path.join(NODE_DIR, "qmgr_pn_default.out")
# mom_priv/
MOM_PRIV_PATH = "mom_priv"
# mom_logs/
MOM_LOGS_PATH = "mom_logs"
# comm_logs/
COMM_LOGS_PATH = "comm_logs"
# hook/
HOOK_DIR = "hook"
QMGR_PH_PATH = os.path.join(HOOK_DIR, "qmgr_ph_default.out")
QMGR_LPBSHOOK_PATH = os.path.join(HOOK_DIR, "qmgr_lpbshook.out")
# scheduler/
SCHED_DIR = "scheduler"
QMGR_LSCHED_PATH = os.path.join(SCHED_DIR, "qmgr_lsched.out")
# sched_priv/
DFLT_SCHED_PRIV_PATH = "sched_priv"
# sched_logs/
DFLT_SCHED_LOGS_PATH = "sched_logs"
# reservation/
RESV_DIR = "reservation"
PBS_RSTAT_PATH = os.path.join(RESV_DIR, "pbs_rstat.out")
PBS_RSTAT_F_PATH = os.path.join(RESV_DIR, "pbs_rstat_f.out")
# datastore/
DATASTORE_DIR = "datastore"
PG_LOGS_PATH = os.path.join(DATASTORE_DIR, "pg_log")
# core_file_bt/
CORE_DIR = "core_file_bt"
CORE_SERVER_PATH = os.path.join(CORE_DIR, "server_priv")
CORE_SCHED_PATH = os.path.join(CORE_DIR, "sched_priv")
CORE_MOM_PATH = os.path.join(CORE_DIR, "mom_priv")
# system/
SYS_DIR = "system"
PBS_PROBE_PATH = os.path.join(SYS_DIR, "pbs_probe_v.out")
PBS_HOSTN_PATH = os.path.join(SYS_DIR, "pbs_hostn_v.out")
PBS_ENV_PATH = os.path.join(SYS_DIR, "pbs_environment")
OS_PATH = os.path.join(SYS_DIR, "os_info")
PROCESS_PATH = os.path.join(SYS_DIR, "process_info")
ETC_HOSTS_PATH = os.path.join(SYS_DIR, "etc_hosts")
ETC_NSSWITCH_PATH = os.path.join(SYS_DIR, "etc_nsswitch_conf")
LSOF_PBS_PATH = os.path.join(SYS_DIR, "lsof_pbs.out")
VMSTAT_PATH = os.path.join(SYS_DIR, "vmstat.out")
DF_H_PATH = os.path.join(SYS_DIR, "df_h.out")
DMESG_PATH = os.path.join(SYS_DIR, "dmesg.out")
PS_LEAF_PATH = os.path.join(SYS_DIR, "ps_leaf.out")
# top-level
PBS_CONF_PATH = "pbs.conf"
CTIME_PATH = "ctime"
# Define paths to PBS commands used to capture data with respect to PBS_EXEC
QSTAT_CMD = os.path.join("bin", "qstat")
PBSNODES_CMD = os.path.join("bin", "pbsnodes")
QMGR_CMD = os.path.join("bin", "qmgr")
PBS_RSTAT_CMD = os.path.join("bin", "pbs_rstat")
PBS_PROBE_CMD = os.path.join("sbin", "pbs_probe")
PBS_HOSTN_CMD = os.path.join("bin", "pbs_hostn")
# A global list of files which contain data in tabular form
FILE_TABULAR = ["qstat.out", "qstat_t.out", "qstat_x.out", "qstat_ns.out",
"pbsnodes_aS.out", "pbsnodes_aSj.out", "pbsnodes_avS.out",
"pbsnodes_avSj.out", "qstat_Q.out", "qstat_B.out",
"pbs_rstat.out"]
class PBSSnapUtils(object):
"""
Wrapper class around _PBSSnapUtils
This makes sure that we do necessay cleanup before destroying objects
"""
def __init__(self, out_dir, primary_host=None, acct_logs=None,
daemon_logs=None, map_file=None, anonymize=None,
create_tar=False, log_path=None, with_sudo=False):
self.out_dir = out_dir
self.primary_host = primary_host
self.acct_logs = acct_logs
self.srvc_logs = daemon_logs
self.map_file = map_file
self.anonymize = anonymize
self.create_tar = create_tar
self.log_path = log_path
self.with_sudo = with_sudo
self.utils_obj = None
def __enter__(self):
self.utils_obj = _PBSSnapUtils(self.out_dir, self.primary_host,
self.acct_logs, self.srvc_logs,
self.map_file, self.anonymize,
self.create_tar, self.log_path,
self.with_sudo)
return self.utils_obj
def __exit__(self, exc_type, exc_value, traceback):
# Do some cleanup
self.utils_obj.finalize()
return False
class _PBSSnapUtils(object):
"""
PBS snapshot utilities
"""
def __init__(self, out_dir, primary_host=None, acct_logs=None,
daemon_logs=None, map_file=None, anonymize=False,
create_tar=False, log_path=None, with_sudo=False):
"""
Initialize a PBSSnapUtils object with the arguments specified
:param out_dir: path to the directory where snapshot will be created
:type out_dir: str
:param primary_host: Name of the primary host to capture
:type primary_host: str or None
:param acct_logs: number of accounting logs to capture
:type acct_logs: int or None
:param daemon_logs: number of daemon logs to capture
:type daemon_logs: int or None
:param map_file: Path to map file for anonymization map
:type map_file str or None
:param anonymize: anonymize data?
:type anonymize: bool
:param create_tar: Create a tarball of the output snapshot?
:type create_tar: bool or None
:param log_path: Path to pbs_snapshot's log file
:type log_path: str or None
:param with_sudo: Capture relevant information with sudo?
:type with_sudo: bool
"""
self.logger = logging.getLogger(__name__)
self.du = DshUtils()
self.server_info = {}
self.job_info = {}
self.node_info = {}
self.comm_info = {}
self.hook_info = {}
self.sched_info = {}
self.resv_info = {}
self.sys_info = {}
self.core_info = {}
self.anon_obj = None
self.all_hosts = []
self.server = None
self.mom = None
self.comm = None
self.scheduler = None
self.log_utils = PBSLogUtils()
self.outtar_path = None
self.outtar_fd = None
self.create_tar = create_tar
self.snapshot_name = None
self.with_sudo = with_sudo
self.log_path = log_path
self.server_up = False
self.server_info_avail = False
self.mom_info_avail = False
self.comm_info_avail = False
self.sched_info_avail = False
if self.log_path is not None:
self.log_filename = os.path.basename(self.log_path)
else:
self.log_filename = None
# finalize() is called by the context's __exit__() automatically
# however, finalize() is non-reenterant, so set a flag to keep
# track of whether it has been called or not.
self.finalized = False
# Parse the input arguments
timestamp_str = time.strftime("%Y%m%d_%H_%M_%S")
self.snapshot_name = "snapshot_" + timestamp_str
# Make sure that the target directory exists
dir_path = os.path.abspath(out_dir)
if not os.path.isdir(dir_path):
raise ValueError("Target directory either doesn't exist" +
"or not accessible. Quitting.")
self.snapdir = os.path.join(dir_path, self.snapshot_name)
self.num_acct_logs = int(acct_logs) if acct_logs is not None else 0
if daemon_logs is not None:
self.num_daemon_logs = int(daemon_logs)
else:
self.num_daemon_logs = 0
self.mapfile = map_file
if primary_host is None:
primary_host = socket.gethostname()
# Check which of the PBS daemons' information is available
self.server = Server(primary_host)
self.scheduler = Scheduler(server=self.server)
daemon_status = self.server.pi.status()
if len(daemon_status) > 0 and daemon_status['rc'] == 0 and \
len(daemon_status['err']) == 0:
for d_stat in daemon_status['out']:
if d_stat.startswith("pbs_server"):
self.server_info_avail = True
if "not running" not in d_stat:
self.server_up = True
elif d_stat.startswith("pbs_sched"):
self.sched_info_avail = True
elif d_stat.startswith("pbs_mom"):
self.mom_info_avail = True
elif d_stat.startswith("pbs_comm"):
self.comm_info_avail = True
self.custom_rscs = None
if self.server_up:
self.custom_rscs = self.server.parse_resources()
# Store paths to PBS_HOME and PBS_EXEC
self.pbs_home = self.server.pbs_conf["PBS_HOME"]
self.pbs_exec = self.server.pbs_conf["PBS_EXEC"]
# Add self.primary_host to the list of hosts
self.primary_host = self.server.hostname
# If output needs to be a tarball, create the tarfile name
# tarfile name =