pbs_testlib.py 507 KB


  1. # coding: utf-8
  2. # Copyright (C) 1994-2018 Altair Engineering, Inc.
  3. # For more information, contact Altair at www.altair.com.
  4. #
  5. # This file is part of the PBS Professional ("PBS Pro") software.
  6. #
  7. # Open Source License Information:
  8. #
  9. # PBS Pro is free software. You can redistribute it and/or modify it under the
  10. # terms of the GNU Affero General Public License as published by the Free
  11. # Software Foundation, either version 3 of the License, or (at your option) any
  12. # later version.
  13. #
  14. # PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
  15. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  16. # FOR A PARTICULAR PURPOSE.
  17. # See the GNU Affero General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU Affero General Public License
  20. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. #
  22. # Commercial License Information:
  23. #
  24. # For a copy of the commercial license terms and conditions,
  25. # go to: (http://www.pbspro.com/UserArea/agreement.html)
  26. # or contact the Altair Legal Department.
  27. #
  28. # Altair’s dual-license business model allows companies, individuals, and
  29. # organizations to create proprietary derivative works of PBS Pro and
  30. # distribute them - whether embedded or bundled with other software -
  31. # under a commercial license agreement.
  32. #
  33. # Use of Altair’s trademarks, including but not limited to "PBS™",
  34. # "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
  35. # trademark licensing policies.
  36. import sys
  37. import os
  38. import socket
  39. import pwd
  40. import grp
  41. import logging
  42. import time
  43. import re
  44. import random
  45. import string
  46. import tempfile
  47. import cPickle
  48. import copy
  49. import datetime
  50. import traceback
  51. import threading
  52. from operator import itemgetter
  53. from collections import OrderedDict
  54. from distutils.version import LooseVersion
  55. try:
  56. import psycopg2
  57. PSYCOPG = True
  58. except:
  59. PSYCOPG = False
  60. try:
  61. from ptl.lib.pbs_ifl import *
  62. API_OK = True
  63. except:
  64. try:
  65. from ptl.lib.pbs_ifl_mock import *
  66. except:
  67. sys.stderr.write("failed to import pbs_ifl, run pbs_swigify " +
  68. "to make it\n")
  69. raise ImportError
  70. API_OK = False
  71. from ptl.lib.pbs_api_to_cli import api_to_cli
  72. from ptl.utils.pbs_dshutils import DshUtils
  73. from ptl.utils.pbs_procutils import ProcUtils
  74. from ptl.utils.pbs_cliutils import CliUtils
  75. from ptl.utils.pbs_fileutils import FileUtils, FILE_TAIL
  76. # suppress logging exceptions
  77. logging.raiseExceptions = False
  78. # Various mappings and aliases
  79. MGR_OBJ_VNODE = MGR_OBJ_NODE
  80. VNODE = MGR_OBJ_VNODE
  81. NODE = MGR_OBJ_NODE
  82. HOST = MGR_OBJ_HOST
  83. JOB = MGR_OBJ_JOB
  84. RESV = MGR_OBJ_RESV
  85. SERVER = MGR_OBJ_SERVER
  86. QUEUE = MGR_OBJ_QUEUE
  87. SCHED = MGR_OBJ_SCHED
  88. HOOK = MGR_OBJ_HOOK
  89. RSC = MGR_OBJ_RSC
  90. PBS_HOOK = MGR_OBJ_PBS_HOOK
  91. # the order of these symbols matters, see pbs_ifl.h
  92. (SET, UNSET, INCR, DECR, EQ, NE, GE, GT,
  93. LE, LT, MATCH, MATCH_RE, NOT, DFLT) = range(14)
  94. (PTL_OR, PTL_AND) = [0, 1]
  95. (IFL_SUBMIT, IFL_SELECT, IFL_TERMINATE, IFL_ALTER,
  96. IFL_MSG, IFL_DELETE, IFL_RALTER) = [0, 1, 2, 3, 4, 5, 6]
  97. (PTL_API, PTL_CLI) = ['api', 'cli']
  98. (PTL_COUNTER, PTL_FILTER) = [0, 1]
  99. PTL_STR_TO_OP = {
  100. '<': LT,
  101. '<=': LE,
  102. '=': EQ,
  103. '>=': GE,
  104. '>': GT,
  105. '!=': NE,
  106. ' set ': SET,
  107. ' unset ': UNSET,
  108. ' match ': MATCH,
  109. '~': MATCH_RE,
  110. '!': NOT
  111. }
  112. PTL_OP_TO_STR = {
  113. LT: '<',
  114. LE: '<=',
  115. EQ: '=',
  116. GE: '>=',
  117. GT: '>',
  118. SET: ' set ',
  119. NE: '!=',
  120. UNSET: ' unset ',
  121. MATCH: ' match ',
  122. MATCH_RE: '~',
  123. NOT: 'is not'
  124. }
  125. PTL_ATTROP_TO_STR = {PTL_AND: '&&', PTL_OR: '||'}
  126. (RESOURCES_AVAILABLE, RESOURCES_TOTAL) = [0, 1]
  127. EXPECT_MAP = {
  128. UNSET: 'Unset',
  129. SET: 'Set',
  130. EQ: 'Equal',
  131. NE: 'Not Equal',
  132. LT: 'Less Than',
  133. GT: 'Greater Than',
  134. LE: 'Less Equal Than',
  135. GE: 'Greater Equal Than',
  136. MATCH_RE: 'Matches regexp',
  137. MATCH: 'Matches',
  138. NOT: 'Not'
  139. }
  140. PBS_CMD_MAP = {
  141. MGR_CMD_CREATE: 'create',
  142. MGR_CMD_SET: 'set',
  143. MGR_CMD_DELETE: 'delete',
  144. MGR_CMD_UNSET: 'unset',
  145. MGR_CMD_IMPORT: 'import',
  146. MGR_CMD_EXPORT: 'export',
  147. MGR_CMD_LIST: 'list',
  148. }
  149. PBS_CMD_TO_OP = {
  150. MGR_CMD_SET: SET,
  151. MGR_CMD_UNSET: UNSET,
  152. MGR_CMD_DELETE: UNSET,
  153. MGR_CMD_CREATE: SET,
  154. }
  155. PBS_OBJ_MAP = {
  156. MGR_OBJ_NONE: 'none',
  157. SERVER: 'server',
  158. QUEUE: 'queue',
  159. JOB: 'job',
  160. NODE: 'node',
  161. RESV: 'reservation',
  162. RSC: 'resource',
  163. SCHED: 'sched',
  164. HOST: 'host',
  165. HOOK: 'hook',
  166. VNODE: 'node',
  167. PBS_HOOK: 'pbshook'
  168. }
  169. PTL_TRUE = ('1', 'true', 't', 'yes', 'y', 'enable', 'enabled', 'True', True)
  170. PTL_FALSE = ('0', 'false', 'f', 'no', 'n', 'disable', 'disabled', 'False',
  171. False)
  172. PTL_NONE = ('None', None)
  173. PTL_FORMULA = '__formula__'
  174. PTL_NOARG = '__noarg__'
  175. PTL_ALL = '__ALL__'
  176. CMD_ERROR_MAP = {
  177. 'alterjob': 'PbsAlterError',
  178. 'holdjob': 'PbsHoldError',
  179. 'sigjob': 'PbsSignalError',
  180. 'msgjob': 'PbsMessageError',
  181. 'rlsjob': 'PbsReleaseError',
  182. 'rerunjob': 'PbsRerunError',
  183. 'orderjob': 'PbsOrderError',
  184. 'runjob': 'PbsRunError',
  185. 'movejob': 'PbsMoveError',
  186. 'delete': 'PbsDeleteError',
  187. 'deljob': 'PbsDeljobError',
  188. 'delresv': 'PbsDelresvError',
  189. 'status': 'PbsStatusError',
  190. 'manager': 'PbsManagerError',
  191. 'submit': 'PbsSubmitError',
  192. 'terminate': 'PbsQtermError',
  193. 'alterresv': 'PbsResvAlterError'
  194. }
  195. class PtlConfig(object):
  196. """
  197. Holds configuration options
  198. The options can be stored in a file as well as in the OS environment
  199. variables.When set, the environment variables will override
  200. definitions in the file.By default, on Unix like systems, the file
  201. read is ``/etc/ptl.conf``, the environment variable ``PTL_CONF_FILE``
  202. can be used to set the path to the file to read.
  203. The format of the file is a series of ``<key> = <value>`` properties.
  204. A line that starts with a '#' is ignored and can be used for comments
  205. :param conf: Path to PTL configuration file
  206. :type conf: str or None
  207. """
  208. logger = logging.getLogger(__name__)
  209. def __init__(self, conf=None):
  210. self.options = {
  211. 'PTL_SUDO_CMD': 'sudo -H',
  212. 'PTL_RSH_CMD': 'ssh',
  213. 'PTL_CP_CMD': 'scp -p',
  214. 'PTL_EXPECT_MAX_ATTEMPTS': 60,
  215. 'PTL_EXPECT_INTERVAL': 0.5,
  216. 'PTL_UPDATE_ATTRIBUTES': True,
  217. }
  218. self.handlers = {
  219. 'PTL_SUDO_CMD': DshUtils.set_sudo_cmd,
  220. 'PTL_RSH_CMD': DshUtils.set_rsh_cmd,
  221. 'PTL_CP_CMD': DshUtils.set_copy_cmd,
  222. 'PTL_EXPECT_MAX_ATTEMPTS': Server.set_expect_max_attempts,
  223. 'PTL_EXPECT_INTERVAL': Server.set_expect_interval,
  224. 'PTL_UPDATE_ATTRIBUTES': Server.set_update_attributes
  225. }
  226. if conf is None:
  227. conf = os.environ.get('PTL_CONF_FILE', '/etc/ptl.conf')
  228. try:
  229. lines = open(conf).readlines()
  230. except IOError:
  231. lines = []
  232. for line in lines:
  233. line = line.strip()
  234. if (line.startswith('#') or (line == '')):
  235. continue
  236. try:
  237. k, v = line.split('=', 1)
  238. k = k.strip()
  239. v = v.strip()
  240. self.options[k] = v
  241. except:
  242. self.logger.error('Error parsing line ' + line)
  243. for k, v in self.options.items():
  244. if k in os.environ:
  245. v = os.environ[k]
  246. else:
  247. os.environ[k] = str(v)
  248. if k in self.handlers:
  249. self.handlers[k](v)
  250. class PtlException(Exception):
  251. """
  252. Generic errors raised by PTL operations.
  253. Sets a ``return value``, a ``return code``, and a ``message``
  254. A post function and associated positional and named arguments
  255. are available to perform any necessary cleanup.
  256. :param rv: Return value set for the error occured during PTL
  257. operation
  258. :type rv: int or None.
  259. :param rc: Return code set for the error occured during PTL
  260. operation
  261. :type rc: int or None.
  262. :param msg: Message set for the error occured during PTL operation
  263. :type msg: str or None.
  264. :param post: Execute given post callable function if not None.
  265. :type post: callable or None.
  266. :raises: PTL exceptions
  267. """
  268. def __init__(self, rv=None, rc=None, msg=None, post=None, *args, **kwargs):
  269. self.rv = rv
  270. self.rc = rc
  271. self.msg = msg
  272. if post is not None:
  273. post(*args, **kwargs)
  274. def __str__(self):
  275. return ('rc=' + str(self.rc) + ', rv=' + str(self.rv) +
  276. ', msg=' + str(self.msg))
  277. def __repr__(self):
  278. return (self.__class__.__name__ + '(rc=' + str(self.rc) + ', rv=' +
  279. str(self.rv) + ', msg=' + str(self.msg) + ')')
  280. class PtlFailureException(AssertionError):
  281. """
  282. Generic failure exception raised by PTL operations.
  283. Sets a ``return value``, a ``return code``, and a ``message``
  284. A post function and associated positional and named arguments
  285. are available to perform any necessary cleanup.
  286. :param rv: Return value set for the failure occured during PTL
  287. operation
  288. :type rv: int or None.
  289. :param rc: Return code set for the failure occured during PTL
  290. operation
  291. :type rc: int or None.
  292. :param msg: Message set for the failure occured during PTL operation
  293. :type msg: str or None.
  294. :param post: Execute given post callable function if not None.
  295. :type post: callable or None.
  296. :raises: PTL exceptions
  297. """
  298. def __init__(self, rv=None, rc=None, msg=None, post=None, *args, **kwargs):
  299. self.rv = rv
  300. self.rc = rc
  301. self.msg = msg
  302. if post is not None:
  303. post(*args, **kwargs)
  304. def __str__(self):
  305. return ('rc=' + str(self.rc) + ', rv=' + str(self.rv) +
  306. ', msg=' + str(self.msg))
  307. def __repr__(self):
  308. return (self.__class__.__name__ + '(rc=' + str(self.rc) + ', rv=' +
  309. str(self.rv) + ', msg=' + str(self.msg) + ')')
  310. class PbsServiceError(PtlException):
  311. pass
  312. class PbsConnectError(PtlException):
  313. pass
  314. class PbsStatusError(PtlException):
  315. pass
  316. class PbsSubmitError(PtlException):
  317. pass
  318. class PbsManagerError(PtlException):
  319. pass
  320. class PbsDeljobError(PtlException):
  321. pass
  322. class PbsDelresvError(PtlException):
  323. pass
  324. class PbsDeleteError(PtlException):
  325. pass
  326. class PbsRunError(PtlException):
  327. pass
  328. class PbsSignalError(PtlException):
  329. pass
  330. class PbsMessageError(PtlException):
  331. pass
  332. class PbsHoldError(PtlException):
  333. pass
  334. class PbsReleaseError(PtlException):
  335. pass
  336. class PbsOrderError(PtlException):
  337. pass
  338. class PbsRerunError(PtlException):
  339. pass
  340. class PbsMoveError(PtlException):
  341. pass
  342. class PbsAlterError(PtlException):
  343. pass
  344. class PbsResourceError(PtlException):
  345. pass
  346. class PbsSelectError(PtlException):
  347. pass
  348. class PbsSchedConfigError(PtlException):
  349. pass
  350. class PbsMomConfigError(PtlException):
  351. pass
  352. class PbsFairshareError(PtlException):
  353. pass
  354. class PbsQdisableError(PtlException):
  355. pass
  356. class PbsQenableError(PtlException):
  357. pass
  358. class PbsQstartError(PtlException):
  359. pass
  360. class PbsQstopError(PtlException):
  361. pass
  362. class PtlExpectError(PtlFailureException):
  363. pass
  364. class PbsInitServicesError(PtlException):
  365. pass
  366. class PbsQtermError(PtlException):
  367. pass
  368. class PtlLogMatchError(PtlFailureException):
  369. pass
  370. class PbsResvAlterError(PtlException):
  371. pass
  372. class PbsTypeSize(str):
  373. """
  374. Descriptor class for memory as a numeric entity.
  375. Units can be one of ``b``, ``kb``, ``mb``, ``gb``, ``tb``, ``pt``
  376. :param unit: The unit type associated to the memory value
  377. :type unit: str
  378. :param value: The numeric value of the memory
  379. :type value: int or None
  380. :raises: ValueError and TypeError
  381. """
  382. def __init__(self, value=None):
  383. if value is None:
  384. return
  385. if len(value) < 2:
  386. raise ValueError
  387. if value[-1:] in ('b', 'B') and value[:-1].isdigit():
  388. self.unit = 'b'
  389. self.value = int(int(value[:-1]) / 1024)
  390. return
  391. # lower() applied to ignore case
  392. unit = value[-2:].lower()
  393. self.value = value[:-2]
  394. if not self.value.isdigit():
  395. raise ValueError
  396. if unit == 'kb':
  397. self.value = int(self.value)
  398. elif unit == 'mb':
  399. self.value = int(self.value) * 1024
  400. elif unit == 'gb':
  401. self.value = int(self.value) * 1024 * 1024
  402. elif unit == 'tb':
  403. self.value = int(self.value) * 1024 * 1024 * 1024
  404. elif unit == 'pb':
  405. self.value = int(self.value) * 1024 * 1024 * 1024 * 1024
  406. else:
  407. raise TypeError
  408. self.unit = 'kb'
  409. def encode(self, value=None, valtype='kb', precision=1):
  410. """
  411. Encode numeric memory input in kilobytes to a string, including
  412. unit
  413. :param value: The numeric value of memory to encode
  414. :type value: int or None.
  415. :param valtype: The unit of the input value, defaults to kb
  416. :type valtype: str
  417. :param precision: Precision of the encoded value, defaults to 1
  418. :type precision: int
  419. :returns: Encoded memory in kb to string
  420. """
  421. if value is None:
  422. value = self.value
  423. if valtype == 'b':
  424. val = value
  425. elif valtype == 'kb':
  426. val = value * 1024
  427. elif valtype == 'mb':
  428. val = value * 1024 * 1024
  429. elif valtype == 'gb':
  430. val = value * 1024 * 1024 * 1024 * 1024
  431. elif valtype == 'tb':
  432. val = value * 1024 * 1024 * 1024 * 1024 * 1024
  433. elif valtype == 'pt':
  434. val = value * 1024 * 1024 * 1024 * 1024 * 1024 * 1024
  435. m = (
  436. (1 << 50, 'pb'),
  437. (1 << 40, 'tb'),
  438. (1 << 30, 'gb'),
  439. (1 << 20, 'mb'),
  440. (1 << 10, 'kb'),
  441. (1, 'b')
  442. )
  443. for factor, suffix in m:
  444. if val >= factor:
  445. break
  446. return '%.*f%s' % (precision, float(val) / factor, suffix)
  447. def __cmp__(self, other):
  448. if self.value < other.value:
  449. return -1
  450. if self.value == other.value:
  451. return 0
  452. return 1
  453. def __lt__(self, other):
  454. if self.value < other.value:
  455. return True
  456. return False
  457. def __le__(self, other):
  458. if self.value <= other.value:
  459. return True
  460. return False
  461. def __gt__(self, other):
  462. if self.value > other.value:
  463. return True
  464. return False
  465. def __ge__(self, other):
  466. if self.value < other.value:
  467. return True
  468. return False
  469. def __eq__(self, other):
  470. if self.value == other.value:
  471. return True
  472. return False
  473. def __get__(self):
  474. return self.value
  475. def __add__(self, other):
  476. if isinstance(other, int):
  477. self.value += other
  478. else:
  479. self.value += other.value
  480. return self
  481. def __mul__(self, other):
  482. if isinstance(other, int):
  483. self.value *= other
  484. else:
  485. self.value *= other.value
  486. return self
  487. def __floordiv__(self, other):
  488. self.value /= other.value
  489. return self
  490. def __sub__(self, other):
  491. self.value -= other.value
  492. return self
  493. def __repr__(self):
  494. return self.__str__()
  495. def __str__(self):
  496. return self.encode(valtype=self.unit)
  497. class PbsTypeDuration(str):
  498. """
  499. Descriptor class for a duration represented as ``hours``,
  500. ``minutes``, and ``seconds``,in the form of ``[HH:][MM:]SS``
  501. :param as_seconds: HH:MM:SS represented in seconds
  502. :type as_seconds: int
  503. :param as_str: duration represented in HH:MM:SS
  504. :type as_str: str
  505. """
  506. def __init__(self, val):
  507. if isinstance(val, str):
  508. if ':' in val:
  509. s = val.split(':')
  510. l = len(s)
  511. if l > 3:
  512. raise ValueError
  513. hr = mn = sc = 0
  514. if l >= 2:
  515. sc = s[l - 1]
  516. mn = s[l - 2]
  517. if l == 3:
  518. hr = s[0]
  519. self.duration = int(hr) * 3600 + int(mn) * 60 + int(sc)
  520. elif val.isdigit():
  521. self.duration = int(val)
  522. elif isinstance(val, int) or isinstance(val, float):
  523. self.duration = val
  524. def __add__(self, other):
  525. self.duration += other.duration
  526. return self
  527. def __sub__(self, other):
  528. self.duration -= other.duration
  529. return self
  530. def __cmp__(self, other):
  531. if self.duration < other.duration:
  532. return -1
  533. if self.duration == other.duration:
  534. return 0
  535. return 1
  536. def __lt__(self, other):
  537. if self.duration < other.duration:
  538. return True
  539. return False
  540. def __le__(self, other):
  541. if self.duration <= other.duration:
  542. return True
  543. return False
  544. def __gt__(self, other):
  545. if self.duration > other.duration:
  546. return True
  547. return False
  548. def __ge__(self, other):
  549. if self.duration < other.duration:
  550. return True
  551. return False
  552. def __eq__(self, other):
  553. if self.duration == other.duration:
  554. return True
  555. return False
  556. def __get__(self):
  557. return self.as_str
  558. def __repr__(self):
  559. return self.__str__()
  560. def __int__(self):
  561. return int(self.duration)
  562. def __str__(self):
  563. return str(datetime.timedelta(seconds=self.duration))
  564. class PbsTypeArray(list):
  565. """
  566. Descriptor class for a PBS array list type, e.g. String array
  567. :param value: Array value to be passed
  568. :param sep: Separator for two array elements
  569. :type sep: str
  570. :returns: List
  571. """
  572. def __init__(self, value=None, sep=','):
  573. self.separator = sep
  574. self = list.__init__(self, value.split(sep))
  575. def __str__(self):
  576. return self.separator.join(self)
  577. class PbsTypeList(dict):
  578. """
  579. Descriptor class for a generic PBS list that are key/value pairs
  580. delimited
  581. :param value: List value to be passed
  582. :param sep: Separator for two key/value pair
  583. :type sep: str
  584. :param kvsep: Separator for key and value
  585. :type kvsep: str
  586. :returns: Dictionary
  587. """
  588. def __init__(self, value=None, sep=',', kvsep='='):
  589. self.kvsep = kvsep
  590. self.separator = sep
  591. d = {}
  592. as_list = map(lambda v: v.split(kvsep), value.split(sep))
  593. if as_list:
  594. for k, v in as_list:
  595. d[k] = v
  596. del as_list
  597. dict.__init__(self, d)
  598. def __str__(self):
  599. s = []
  600. for k, v in self.items():
  601. s += [str(k) + self.kvsep + str(v)]
  602. return self.separator.join(s)
  603. class PbsTypeLicenseCount(PbsTypeList):
  604. """
  605. Descriptor class for a PBS license_count attribute.
  606. It is a specialized list where key/values are ':' delimited, separated
  607. by a ' ' (space)
  608. :param value: PBS license_count attribute value
  609. :returns: Specialized list
  610. """
  611. def __init__(self, value=None):
  612. super(PbsTypeLicenseCount, self).__init__(value, sep=' ', kvsep=':')
  613. class PbsTypeVariableList(PbsTypeList):
  614. """
  615. Descriptor class for a PBS Variable_List attribute
  616. It is a specialized list where key/values are '=' delimited, separated
  617. by a ',' (space)
  618. :param value: PBS Variable_List attribute value
  619. :returns: Specialized list
  620. """
  621. def __init__(self, value=None):
  622. super(PbsTypeVariableList, self).__init__(value, sep=',', kvsep='=')
  623. class PbsTypeSelect(list):
  624. """
  625. Descriptor class for PBS select/schedselect specification.
  626. Select is of the form:
  627. ``<select> ::= <m>":"<chunk> | <select>"+"<select>``
  628. ``<m> ::= <digit> | <digit><m>``
  629. ``<chunk> ::= <resc_name>":"<resc_value> | <chunk>":"<chunk>``
  630. ``<m>`` is a multiplying factor for each chunk requested
  631. ``<chunk>`` are resource key/value pairs
  632. The type populates a list of single chunk of resource
  633. ``key/value`` pairs, the list can be walked by iterating over
  634. the type itself.
  635. :param num_chunks: The total number of chunks in the select
  636. :type num_chunk: int
  637. :param resources: A dictionary of all resource counts in the select
  638. :type resources: Dictionary
  639. """
  640. def __init__(self, s=None):
  641. if s is not None:
  642. self._as_str = s
  643. self.resources = {}
  644. self.num_chunks = 0
  645. nc = s.split('+')
  646. for chunk in nc:
  647. self._parse_chunk(chunk)
  648. def _parse_chunk(self, chunk):
  649. d = chunk.split(':')
  650. # number of chunks
  651. _num_chunks = int(d[0])
  652. self.num_chunks += _num_chunks
  653. r = {}
  654. for e in d[1:]:
  655. k, v = e.split('=')
  656. r[k] = v
  657. if 'mem' in k:
  658. try:
  659. v = PbsTypeSize(v).value
  660. except:
  661. # failed so we guessed wrong on the type
  662. pass
  663. if isinstance(v, int) or v.isdigit():
  664. if k not in self.resources:
  665. self.resources[k] = _num_chunks * int(v)
  666. else:
  667. self.resources[k] += _num_chunks * int(v)
  668. else:
  669. if k not in self.resources:
  670. self.resources[k] = v
  671. else:
  672. self.resources[k] = [self.resources[k], v]
  673. # explicitly expose the multiplying factor
  674. for _ in range(_num_chunks):
  675. self.append(r)
  676. def __add__(self, chunk=None):
  677. if chunk is None:
  678. return self
  679. self._parse_chunk(chunk)
  680. self._as_str = self._as_str + "+" + chunk
  681. return self
  682. def __repr__(self):
  683. return str(self)
  684. def __str__(self):
  685. return self._as_str
  686. class PbsTypeChunk(dict):
  687. """
  688. Descriptor class for a PBS chunk associated to a
  689. ``PbsTypeExecVnode``.This type of chunk corresponds to
  690. a node solution to a resource request,not to the select
  691. specification.
  692. ``chunk ::= <subchk> | <chunk>"+"<chunk>``
  693. ``subchk ::= <node>":"<resource>``
  694. ``resource ::= <key>":"<val> | <resource>":"<resource>``
  695. A chunk expresses a solution to a specific select-chunk
  696. request. If multiple chunks are needed to solve a single
  697. select-chunk, e.g., on a shared memory system, the chunk
  698. will be extended into virtual chunk,vchunk.
  699. :param vnode: the vnode name corresponding to the chunk
  700. :type vnode: str or None
  701. :param resources: the key value pair of resources in
  702. dictionary form
  703. :type resources: Dictionary or None
  704. :param vchunk: a list of virtual chunks needed to solve
  705. the select-chunk, vchunk is only set if more
  706. than one vchunk are required to solve the
  707. select-chunk
  708. :type vchunk: list
  709. """
  710. def __init__(self, vnode=None, resources=None, chunkstr=None):
  711. self.vnode = vnode
  712. if resources is not None:
  713. self.resources = resources
  714. else:
  715. self.resources = {}
  716. self.vchunk = []
  717. self.as_str = chunkstr
  718. self.__parse_chunk(chunkstr)
  719. def __parse_chunk(self, chunkstr=None):
  720. if chunkstr is None:
  721. return
  722. vchunks = chunkstr.split('+')
  723. if len(vchunks) == 1:
  724. entities = chunkstr.split(':')
  725. self.vnode = entities[0]
  726. if len(entities) > 1:
  727. for e in entities[1:]:
  728. (r, v) = e.split('=')
  729. self.resources[r] = v
  730. self[self.vnode] = self.resources
  731. else:
  732. for sc in vchunks:
  733. chk = PbsTypeChunk(chunkstr=sc)
  734. self.vchunk.append(chk)
  735. self[chk.vnode] = chk.resources
  736. def add(self, vnode, resources):
  737. """
  738. Add a chunk specificiation. If a chunk is already
  739. defined, add the chunk as a vchunk.
  740. :param vnode: The vnode to add
  741. :type vnode: str
  742. :param resources: The resources associated to the
  743. vnode
  744. :type resources: str
  745. :returns: Added chunk specification
  746. """
  747. if self.vnode == vnode:
  748. self.resources = dict(self.resources.items() + resources.items())
  749. return self
  750. elif len(self.vchunk) != 0:
  751. for chk in self.vchunk:
  752. if chk.vnode == vnode:
  753. chk.resources = dict(self.resources.items() +
  754. resources.items())
  755. return self
  756. chk = PbsTypeChunk(vnode, resources)
  757. self.vchunk.append(chk)
  758. return self
  759. def __repr__(self):
  760. return self.__str__()
  761. def __str__(self):
  762. _s = ["("]
  763. _s += [self.vnode, ":"]
  764. for resc_k, resc_v in self.resources.items():
  765. _s += [resc_k, "=", str(resc_v)]
  766. if self.vchunk:
  767. for _v in self.vchunk:
  768. _s += ["+", _v.vnode, ":"]
  769. for resc_k, resc_v in _v.resources.items():
  770. _s += [resc_k, "=", str(resc_v)]
  771. _s += [")"]
  772. return "".join(_s)
  773. class PbsTypeExecVnode(list):
  774. """
  775. Execvnode representation, expressed as a list of
  776. PbsTypeChunk
  777. :param vchunk: List of virtual chunks, only set when
  778. more than one vnode is allocated to a
  779. host satisfy a chunk requested
  780. :type vchunk: List
  781. :param num_chunks: The number of chunks satisfied by
  782. this execvnode
  783. :type num_chunks: int
  784. :param vnodes: List of vnode names allocated to the execvnode
  785. :type vnodes: List
  786. :param resource: method to return the amount of a named
  787. resource satisfied by this execvnode
  788. """
  789. def __init__(self, s=None):
  790. if s is None:
  791. return None
  792. self._as_str = s
  793. start = 0
  794. self.num_chunks = 0
  795. for c in range(len(s)):
  796. # must split on '+' between parens because '+' can occur within
  797. # paren for complex specs
  798. if s[c] == '(':
  799. start = c + 1
  800. if s[c] == ')':
  801. self.append(PbsTypeChunk(chunkstr=s[start:c]))
  802. self.num_chunks += 1
  803. def resource(self, name=None):
  804. """
  805. :param name: Name of the resource
  806. :type name: str or None
  807. """
  808. if name is None:
  809. return None
  810. _total = 0
  811. for _c in self:
  812. if _c.vchunk:
  813. for _v in _c.vchunk:
  814. if name in _v.resources:
  815. _total += int(_v.resources[name])
  816. if name in _c.resources:
  817. if name in _c.resources:
  818. _total += int(_c.resources[name])
  819. return _total
  820. @property
  821. def vnodes(self):
  822. vnodes = []
  823. for e in self:
  824. vnodes += [e.vnode]
  825. if e.vchunk:
  826. vnodes += map(lambda n: n.vnode, e.vchunk)
  827. return list(set(vnodes))
  828. def _str__(self):
  829. return self._as_str
  830. # below would be to verify that the converted type maps back correctly
  831. _s = []
  832. for _c in self:
  833. _s += [str(_c)]
  834. return "+".join(_s)
  835. class PbsTypeExecHost(str):
  836. """
  837. Descriptor class for exec_host attribute
  838. :param hosts: List of hosts in the exec_host. Each entry is
  839. a host info dictionary that maps the number of
  840. cpus and its task number
  841. :type hosts: List
  842. """
  843. def __init__(self, s=None):
  844. if s is None:
  845. return None
  846. self._as_str = s
  847. self.hosts = []
  848. hsts = s.split('+')
  849. for h in hsts:
  850. hi = {}
  851. ti = {}
  852. (host, task) = h.split('/',)
  853. d = task.split('*')
  854. if len(d) == 1:
  855. taskslot = d[0]
  856. ncpus = 1
  857. elif len(d) == 2:
  858. (taskslot, ncpus) = d
  859. else:
  860. (taskslot, ncpus) = (0, 1)
  861. ti['task'] = taskslot
  862. ti['ncpus'] = ncpus
  863. hi[host] = ti
  864. self.hosts.append(hi)
  865. def __repr__(self):
  866. return str(self.hosts)
  867. def __str__(self):
  868. return self._as_str
  869. class PbsTypeJobId(str):
  870. """
  871. Descriptor class for a Job identifier
  872. :param id: The numeric portion of a job identifier
  873. :type id: int
  874. :param server_name: The pbs server name
  875. :type server_name: str
  876. :param server_shortname: The first portion of a FQDN server
  877. name
  878. :type server_shortname: str
  879. """
  880. def __init__(self, value=None):
  881. if value is None:
  882. return
  883. self.value = value
  884. r = value.split('.', 1)
  885. if len(r) != 2:
  886. return
  887. self.id = int(r[0])
  888. self.server_name = r[1]
  889. self.server_shortname = r[1].split('.', 1)[0]
  890. def __str__(self):
  891. return str(self.value)
  892. class PbsUser(object):
  893. """
  894. The PbsUser type augments a PBS username to associate
  895. it to groups to which the user belongs
  896. :param name: The user name referenced
  897. :type name: str
  898. :param uid: uid of user
  899. :type uid: int or None
  900. :param groups: The list of PbsGroup objects the user
  901. belongs to
  902. :type groups: List or None
  903. """
  904. def __init__(self, name, uid=None, groups=None):
  905. self.name = name
  906. if uid is not None:
  907. self.uid = int(uid)
  908. else:
  909. self.uid = None
  910. self.home = None
  911. self.gid = None
  912. self.shell = None
  913. self.gecos = None
  914. try:
  915. _user = pwd.getpwnam(self.name)
  916. self.uid = _user.pw_uid
  917. self.home = _user.pw_dir
  918. self.gid = _user.pw_gid
  919. self.shell = _user.pw_shell
  920. self.gecos = _user.pw_gecos
  921. except:
  922. pass
  923. if groups is None:
  924. self.groups = []
  925. elif isinstance(groups, list):
  926. self.groups = groups
  927. else:
  928. self.groups = groups.split(",")
  929. for g in self.groups:
  930. if isinstance(g, str):
  931. self.groups.append(PbsGroup(g, users=[self]))
  932. elif self not in g.users:
  933. g.users.append(self)
  934. def __repr__(self):
  935. return str(self.name)
  936. def __str__(self):
  937. return self.__repr__()
  938. def __int__(self):
  939. return int(self.uid)
  940. class PbsGroup(object):
  941. """
  942. The PbsGroup type augments a PBS groupname to associate it
  943. to users to which the group belongs
  944. :param name: The group name referenced
  945. :type name: str
  946. :param gid: gid of group
  947. :type gid: int or None
  948. :param users: The list of PbsUser objects the group belongs to
  949. :type users: List or None
  950. """
  951. def __init__(self, name, gid=None, users=None):
  952. self.name = name
  953. if gid is not None:
  954. self.gid = int(gid)
  955. else:
  956. self.gid = None
  957. try:
  958. _group = grp.getgrnam(self.name)
  959. self.gid = _group.gr_gid
  960. except:
  961. pass
  962. if users is None:
  963. self.users = []
  964. elif isinstance(users, list):
  965. self.users = users
  966. else:
  967. self.users = users.split(",")
  968. for u in self.users:
  969. if isinstance(u, str):
  970. self.users.append(PbsUser(u, groups=[self]))
  971. elif self not in u.groups:
  972. u.groups.append(self)
  973. def __repr__(self):
  974. return str(self.name)
  975. def __str__(self):
  976. return self.__repr__()
  977. def __int__(self):
  978. return int(self.gid)
  979. class BatchUtils(object):
  980. """
  981. Utility class to create/convert/display various PBS
  982. data structures
  983. """
  984. legal = "\d\w:\+=\[\]~"
  985. chunks_tag = re.compile("(?P<chunk>\([\d\w:\+=\[\]~]\)[\+]?)")
  986. chunk_tag = re.compile("(?P<vnode>[\w\d\[\]]+):" +
  987. "(?P<resources>[\d\w:\+=\[\]~])+\)")
  988. array_tag = re.compile("(?P<jobid>[\d]+)\[(?P<subjobid>[0-9]*)\]*" +
  989. "[.]*[(?P<server>.*)]*")
  990. subjob_tag = re.compile("(?P<jobid>[\d]+)\[(?P<subjobid>[0-9]+)\]*" +
  991. "[.]*[(?P<server>.*)]*")
  992. pbsobjname_re = re.compile("^([\w\d][\d\w\s]*:?[\s]+)" +
  993. "*(?P<name>[\w@\.\d\[\]-]+)$")
  994. pbsobjattrval_re = re.compile(r"""
  995. [\s]*(?P<attribute>[\w\d\.-]+)
  996. [\s]*=[\s]*
  997. (?P<value>.*)
  998. [\s]*""",
  999. re.VERBOSE)
  1000. dt_re = '(?P<dt_from>\d\d/\d\d/\d\d\d\d \d\d:\d\d)' + \
  1001. '[\s]+' + \
  1002. '(?P<dt_to>\d\d/\d\d/\d\d\d\d \d\d:\d\d)'
  1003. dt_tag = re.compile(dt_re)
  1004. hms_tag = re.compile('(?P<hr>\d\d):(?P<mn>\d\d):(?P<sc>\d\d)')
  1005. lim_tag = re.compile("(?P<limtype>[a-z_]+)[\.]*(?P<resource>[\w\d-]*)"
  1006. "=[\s]*\[(?P<entity_type>[ugpo]):"
  1007. "(?P<entity_name>[\w\d-]+)"
  1008. "=(?P<entity_value>[\d\w]+)\][\s]*")
  1009. def __init__(self):
  1010. self.logger = logging.getLogger(__name__)
  1011. self.du = DshUtils()
  1012. def list_to_attrl(self, l):
  1013. """
  1014. Convert a list to a PBS attribute list
  1015. :param l: List to be converted
  1016. :type l: List
  1017. :returns: PBS attribute list
  1018. """
  1019. return self.list_to_attropl(l, None)
  1020. def list_to_attropl(self, l, op=SET):
  1021. """
  1022. Convert a list to a PBS attribute operation list
  1023. :param l: List to be converted
  1024. :type l: List
  1025. :returns: PBS attribute operation list
  1026. """
  1027. head = None
  1028. prev = None
  1029. for i in l:
  1030. a = self.str_to_attropl(i, op)
  1031. if prev is None:
  1032. head = a
  1033. else:
  1034. prev.next = a
  1035. prev = a
  1036. if op is not None:
  1037. a.op = op
  1038. return head
  1039. def str_to_attrl(self, s):
  1040. """
  1041. Convert a string to a PBS attribute list
  1042. :param s: String to be converted
  1043. :type s: str
  1044. :returns: PBS attribute list
  1045. """
  1046. return self.str_to_attropl(s, None)
  1047. def str_to_attropl(self, s, op=SET):
  1048. """
  1049. Convert a string to a PBS attribute operation list
  1050. :param s: String to be converted
  1051. :type s: str
  1052. :returns: PBS attribute operation list
  1053. """
  1054. if op is not None:
  1055. a = attropl()
  1056. else:
  1057. a = attrl()
  1058. if '.' in s:
  1059. (attribute, resource) = s.split('.')
  1060. a.name = attribute
  1061. a.resource = resource.strip()
  1062. else:
  1063. a.name = s
  1064. a.value = ''
  1065. a.next = None
  1066. if op:
  1067. a.op = op
  1068. return a
  1069. def dict_to_attrl(self, d={}):
  1070. """
  1071. Convert a dictionary to a PBS attribute list
  1072. :param d: Dictionary to be converted
  1073. :type d: Dictionary
  1074. :returns: PBS attribute list
  1075. """
  1076. return self.dict_to_attropl(d, None)
  1077. def dict_to_attropl(self, d={}, op=SET):
  1078. """
  1079. Convert a dictionary to a PBS attribute operation list
  1080. :param d: Dictionary to be converted
  1081. :type d: Dictionary
  1082. :returns: PBS attribute operation list
  1083. """
  1084. if len(d.keys()) == 0:
  1085. return None
  1086. prev = None
  1087. head = None
  1088. for k, v in d.items():
  1089. if isinstance(v, tuple):
  1090. op = v[0]
  1091. v = v[1]
  1092. if op is not None:
  1093. a = attropl()
  1094. else:
  1095. a = attrl()
  1096. if '.' in k:
  1097. (attribute, resource) = k.split('.')
  1098. a.name = attribute
  1099. a.resource = resource
  1100. else:
  1101. a.name = k
  1102. a.value = str(v)
  1103. if op is not None:
  1104. a.op = op
  1105. a.next = None
  1106. if prev is None:
  1107. head = a
  1108. else:
  1109. prev.next = a
  1110. prev = a
  1111. return head
  1112. def convert_to_attrl(self, attrib):
  1113. """
  1114. Generic call to convert Python type to PBS attribute list
  1115. :param attrib: Attributes to be converted
  1116. :type attrib: List or tuple or dictionary or str
  1117. :returns: PBS attribute list
  1118. """
  1119. return self.convert_to_attropl(attrib, None)
  1120. def convert_to_attropl(self, attrib, cmd=MGR_CMD_SET, op=None):
  1121. """
  1122. Generic call to convert Python type to PBS attribute
  1123. operation list
  1124. :param attrib: Attributes to be converted
  1125. :type attrib: List or tuple or dictionary or str
  1126. :returns: PBS attribute operation list
  1127. """
  1128. if op is None:
  1129. op = self.command_to_op(cmd)
  1130. if isinstance(attrib, (list, tuple)):
  1131. a = self.list_to_attropl(attrib, op)
  1132. elif isinstance(attrib, (dict, OrderedDict)):
  1133. a = self.dict_to_attropl(attrib, op)
  1134. elif isinstance(attrib, str):
  1135. a = self.str_to_attropl(attrib, op)
  1136. else:
  1137. a = None
  1138. return a
  1139. def command_to_op(self, cmd=None):
  1140. """
  1141. Map command to a ``SET`` or ``UNSET`` Operation. An unrecognized
  1142. command will return SET. No command will return None.
  1143. :param cmd: Command to be mapped
  1144. :type cmd: str
  1145. :returns: ``SET`` or ``UNSET`` operation for the command
  1146. """
  1147. if cmd is None:
  1148. return None
  1149. if cmd in (MGR_CMD_SET, MGR_CMD_EXPORT, MGR_CMD_IMPORT):
  1150. return SET
  1151. if cmd == MGR_CMD_UNSET:
  1152. return UNSET
  1153. return SET
  1154. def display_attrl(self, a=None, writer=sys.stdout):
  1155. """
  1156. Display an attribute list using writer, defaults to sys.stdout
  1157. :param a: Attributes
  1158. :type a: List
  1159. :returns: Displays attribute list
  1160. """
  1161. return self.display_attropl(a)
  1162. def display_attropl(self, attropl=None, writer=sys.stdout):
  1163. """
  1164. Display an attribute operation list with writer, defaults to
  1165. sys.stdout
  1166. :param attropl: Attribute operation list
  1167. :type attropl: List
  1168. :returns: Displays an attribute operation list
  1169. """
  1170. attrs = attropl
  1171. while attrs is not None:
  1172. if attrs.resource:
  1173. writer.write('\t' + attrs.name + '.' + attrs.resource + '= ' +
  1174. attrs.value + '\n')
  1175. else:
  1176. writer.write('\t' + attrs.name + '= ' + attrs.value + '\n')
  1177. attrs = attrs.next
  1178. def display_dict(self, d, writer=sys.stdout):
  1179. """
  1180. Display a dictionary using writer, defaults to sys.stdout
  1181. :param d: Dictionary
  1182. :type d: Dictionary
  1183. :returns: Displays a dictionary
  1184. """
  1185. if not d:
  1186. return
  1187. for k, v in d.items():
  1188. writer.write(k + ': ' + v + '\n')
  1189. def batch_status_to_dictlist(self, bs=None, attr_names=None, id=None):
  1190. """
  1191. Convert a batch status to a list of dictionaries.
  1192. version 0.1a6 added this conversion as a typemap(out) as
  1193. part of the swig wrapping itself so there are fewer uses
  1194. for this function.Returns a list of dictionary
  1195. representation of batch status
  1196. :param bs: Batch status
  1197. :param attr_names: Attribute names
  1198. :returns: List of dictionaries
  1199. """
  1200. attr_time = (
  1201. 'ctime', 'mtime', 'qtime', 'start', 'end', 'reserve_start',
  1202. 'reserve_end', 'estimated.start_time')
  1203. ret = []
  1204. while bs:
  1205. if id is not None and bs.name != id:
  1206. bs = bs.next
  1207. continue
  1208. d = {}
  1209. attrs = bs.attribs
  1210. while attrs is not None:
  1211. if attrs.resource:
  1212. key = attrs.name + '.' + attrs.resource
  1213. else:
  1214. key = attrs.name
  1215. if attr_names is not None:
  1216. if key not in attr_names:
  1217. attrs = attrs.next
  1218. continue
  1219. val = attrs.value
  1220. if attrs.name in attr_time:
  1221. val = self.convert_time(val)
  1222. # for attributes that may occur multiple times (e.g., max_run)
  1223. # append the value in a comma-separated representation
  1224. if key in d:
  1225. d[key] = d[key] + ',' + str(val)
  1226. else:
  1227. d[key] = str(val)
  1228. attrs = attrs.next
  1229. if len(d.keys()) > 0:
  1230. ret.append(d)
  1231. d['id'] = bs.name
  1232. bs = bs.next
  1233. return ret
  1234. def display_batch_status(self, bs=None, attr_names=None,
  1235. writer=sys.stdout):
  1236. """
  1237. Display a batch status using writer, defaults to sys.stdout
  1238. :param bs: Batch status
  1239. :param attr_name: Attribute name
  1240. :type attr_name: str
  1241. :returns: Displays batch status
  1242. """
  1243. if bs is None:
  1244. return
  1245. l = self.batch_status_to_dictlist(bs, attr_names)
  1246. self.display_batch_status_as_dictlist(l, writer)
  1247. def display_dictlist(self, l=[], writer=sys.stdout, fmt=None):
  1248. """
  1249. Display a list of dictionaries using writer, defaults to
  1250. sys.stdout
  1251. :param l: The list to display
  1252. :type l: List
  1253. :param writer: The stream on which to write
  1254. :param fmt: An optional formatting string
  1255. :type fmt: str or None
  1256. :returns: Displays list of dictionaries
  1257. """
  1258. self.display_batch_status_as_dictlist(l, writer, fmt)
  1259. def dictlist_to_file(self, l=[], filename=None, mode='w'):
  1260. """
  1261. write a dictlist to file
  1262. :param l: Dictlist
  1263. :type l: List
  1264. :param filename: File to which dictlist need to be written
  1265. :type filename: str
  1266. :param mode: Mode of file
  1267. :type mode: str
  1268. :raises: Exception writing to file
  1269. """
  1270. if filename is None:
  1271. self.logger.error('a filename is required')
  1272. return
  1273. d = os.path.dirname(filename)
  1274. if d != '' and not os.path.isdir(d):
  1275. os.makedirs(d)
  1276. try:
  1277. f = open(filename, mode)
  1278. self.display_dictlist(l, f)
  1279. f.close()
  1280. except:
  1281. self.logger.error('error writing to file ' + filename)
  1282. raise
  1283. def batch_status_as_dictlist_to_file(self, l=[], writer=sys.stdout):
  1284. """
  1285. Write a dictlist to file
  1286. :param l: Dictlist
  1287. :type l: List
  1288. :raises: Exception writing to file
  1289. """
  1290. return self.dictlist_to_file(l, writer)
  1291. def file_to_dictlist(self, file=None, attribs=None, id=None):
  1292. """
  1293. Convert a file to a batch dictlist format
  1294. :param file: File to be converted
  1295. :type file: str
  1296. :param attribs: Attributes
  1297. :returns: File converted to a batch dictlist format
  1298. """
  1299. if file is None:
  1300. return []
  1301. try:
  1302. f = open(file, 'r')
  1303. lines = f.readlines()
  1304. f.close()
  1305. except Exception, e:
  1306. self.logger.error('error converting list of dictionaries to ' +
  1307. 'file ' + str(e))
  1308. return []
  1309. return self.convert_to_dictlist(lines, attribs, id=id)
  1310. def file_to_vnodedef(self, file=None):
  1311. """
  1312. Convert a file output of pbsnodes -av to a vnode
  1313. definition format
  1314. :param file: File to be converted
  1315. :type sile: str
  1316. :returns: Vnode definition format
  1317. """
  1318. if file is None:
  1319. return None
  1320. try:
  1321. f = open(file, 'r')
  1322. lines = f.readlines()
  1323. f.close()
  1324. except:
  1325. self.logger.error('error converting nodes to vnode def')
  1326. return None
  1327. dl = self.convert_to_dictlist(lines)
  1328. return self.dictlist_to_vnodedef(dl)
  1329. def show(self, l=[], name=None, fmt=None):
  1330. """
  1331. Alias to display_dictlist with sys.stdout as writer
  1332. :param name: if specified only show the object of
  1333. that name
  1334. :type name: str
  1335. :param fmt: Optional formatting string, uses %n for
  1336. object name, %a for attributes, for example
  1337. a format of '%nE{\}nE{\}t%aE{\}n' will display
  1338. objects with their name starting on the first
  1339. column, a new line, and attributes indented by
  1340. a tab followed by a new line at the end.
  1341. :type fmt: str
  1342. """
  1343. if name:
  1344. i = 0
  1345. for obj in l:
  1346. if obj['id'] == name:
  1347. l = [l[i]]
  1348. break
  1349. i += 1
  1350. self.display_dictlist(l, fmt=fmt)
  1351. def get_objtype(self, d={}):
  1352. """
  1353. Get the type of a given object
  1354. :param d: Dictionary
  1355. :type d: Dictionary
  1356. :Returns: Type of the object
  1357. """
  1358. if 'Job_Name' in d:
  1359. return JOB
  1360. elif 'queue_type' in d:
  1361. return QUEUE
  1362. elif 'Reserve_Name' in d:
  1363. return RESV
  1364. elif 'server_state' in d:
  1365. return SERVER
  1366. elif 'Mom' in d:
  1367. return NODE
  1368. elif 'event' in d:
  1369. return HOOK
  1370. elif 'type' in d:
  1371. return RSC
  1372. return None
  1373. def display_batch_status_as_dictlist(self, l=[], writer=sys.stdout,
  1374. fmt=None):
  1375. """
  1376. Display a batch status as a list of dictionaries
  1377. using writer, defaults to sys.stdout
  1378. :param l: List
  1379. :type l: List
  1380. :param fmt: - Optional format string
  1381. :type fmt: str or None
  1382. :returns: Displays batch status as a list of dictionaries
  1383. """
  1384. if l is None:
  1385. return
  1386. for d in l:
  1387. self.display_batch_status_as_dict(d, writer, fmt)
  1388. def batch_status_as_dict_to_str(self, d={}, fmt=None):
  1389. """
  1390. Return a string representation of a batch status dictionary
  1391. :param d: Dictionary
  1392. :type d: Dictionary
  1393. :param fmt: Optional format string
  1394. :type fmt: str or None
  1395. :returns: String representation of a batch status dictionary
  1396. """
  1397. objtype = self.get_objtype(d)
  1398. if fmt is not None:
  1399. if '%1' in fmt:
  1400. _d1 = fmt['%1']
  1401. else:
  1402. _d1 = '\n'
  1403. if '%2' in fmt:
  1404. _d2 = fmt['%2']
  1405. else:
  1406. _d2 = ' '
  1407. if '%3' in fmt:
  1408. _d3 = fmt['%3']
  1409. else:
  1410. _d3 = ' = '
  1411. if '%4' in fmt:
  1412. _d4 = fmt['%4']
  1413. else:
  1414. _d4 = '\n'
  1415. if '%5' in fmt:
  1416. _d5 = fmt['%5']
  1417. else:
  1418. _d5 = '\n'
  1419. if '%6' in fmt:
  1420. _d6 = fmt['%6']
  1421. else:
  1422. _d6 = ''
  1423. else:
  1424. _d1 = '\n'
  1425. _d2 = ' '
  1426. _d3 = ' = '
  1427. _d4 = '\n'
  1428. _d5 = '\n'
  1429. _d6 = ''
  1430. if objtype == JOB:
  1431. _n = 'Job Id: ' + d['id'] + _d1
  1432. elif objtype == QUEUE:
  1433. _n = 'Queue: ' + d['id'] + _d1
  1434. elif objtype == RESV:
  1435. _n = 'Name: ' + d['id'] + _d1
  1436. elif objtype == SERVER:
  1437. _n = 'Server: ' + d['id'] + _d1
  1438. elif objtype == RSC:
  1439. _n = 'Resource: ' + d['id'] + _d1
  1440. elif 'id' in d:
  1441. _n = d['id'] + _d1
  1442. del d['id']
  1443. else:
  1444. _n = ''
  1445. _a = []
  1446. for k, v in sorted(d.items()):
  1447. if k == 'id':
  1448. continue
  1449. _a += [_d2 + k + _d3 + str(v)]
  1450. return _n + _d4.join(_a) + _d5 + _d6
  1451. def display_batch_status_as_dict(self, d={}, writer=sys.stdout, fmt=None):
  1452. """
  1453. Display a dictionary representation of a batch status
  1454. using writer, defaults to sys.stdout
  1455. :param d: Dictionary
  1456. :type d: Dictionary
  1457. :param fmt: Optional format string
  1458. :param fmt: str
  1459. :returns: Displays dictionary representation of a batch
  1460. status
  1461. """
  1462. writer.write(self.batch_status_as_dict_to_str(d, fmt))
  1463. def decode_dictlist(self, l=None, json=True):
  1464. """
  1465. decode a list of dictionaries
  1466. :param l: List of dictionaries
  1467. :type l: List
  1468. :param json: The target of the decode is meant for ``JSON``
  1469. formatting
  1470. :returns: Decoded list of dictionaries
  1471. """
  1472. if l is None:
  1473. return ''
  1474. _js = []
  1475. for d in l:
  1476. _jdict = {}
  1477. for k, v in d.items():
  1478. if ',' in v:
  1479. _jdict[k] = v.split(',')
  1480. else:
  1481. _jdict[k] = self.decode_value(v)
  1482. _js.append(_jdict)
  1483. return _js
  1484. def convert_to_dictlist(self, l, attribs=None, mergelines=True, id=None):
  1485. """
  1486. Convert a list of records into a dictlist format.
  1487. :param l: array of records to convert
  1488. :type l: List
  1489. :param mergelines: merge qstat broken lines into one
  1490. :returns: Record list converted into dictlist format
  1491. """
  1492. if mergelines:
  1493. lines = []
  1494. for i in range(len(l)):
  1495. if l[i].startswith('\t'):
  1496. _e = len(lines) - 1
  1497. lines[_e] = lines[_e].strip('\r\n\t') + \
  1498. l[i].strip('\r\n\t')
  1499. else:
  1500. lines.append(l[i])
  1501. else:
  1502. lines = l
  1503. objlist = []
  1504. d = {}
  1505. for l in lines:
  1506. l = l.strip()
  1507. m = self.pbsobjname_re.match(l)
  1508. if m:
  1509. if len(d.keys()) > 1:
  1510. if id is None or (id is not None and d['id'] == id):
  1511. objlist.append(d.copy())
  1512. d = {}
  1513. d['id'] = m.group('name')
  1514. else:
  1515. m = self.pbsobjattrval_re.match(l)
  1516. if m:
  1517. attr = m.group('attribute')
  1518. if attribs is None or attr in attribs:
  1519. if attr in d:
  1520. d[attr] = d[attr] + "," + m.group('value')
  1521. else:
  1522. d[attr] = m.group('value')
  1523. # add the last element
  1524. if len(d.keys()) > 1:
  1525. if id is None or (id is not None and d['id'] == id):
  1526. objlist.append(d.copy())
  1527. return objlist
  1528. def convert_to_batch(self, l, mergelines=True):
  1529. """
  1530. Convert a list of records into a batch format.
  1531. :param l: array of records to convert
  1532. :type l: List
  1533. :param mergelines: qstat breaks long lines over
  1534. multiple lines, merge them\
  1535. to one by default.
  1536. :type mergelines: bool
  1537. :returns: A linked list of batch status
  1538. """
  1539. if mergelines:
  1540. lines = []
  1541. for i in range(len(l)):
  1542. if l[i].startswith('\t'):
  1543. _e = len(lines) - 1
  1544. lines[_e] = lines[_e].strip('\r\t') + \
  1545. l[i].strip('\r\n')
  1546. else:
  1547. lines.append(l[i])
  1548. else:
  1549. lines = l
  1550. head_bs = None
  1551. prev_bs = None
  1552. prev_attr = None
  1553. for l in lines:
  1554. l = l.strip()
  1555. m = self.pbsobjname_re.match(l)
  1556. if m:
  1557. bs = batch_status()
  1558. bs.name = m.group('name')
  1559. bs.attribs = None
  1560. bs.next = None
  1561. if prev_bs:
  1562. prev_bs.next = bs
  1563. if head_bs is None:
  1564. head_bs = bs
  1565. prev_bs = bs
  1566. prev_attr = None
  1567. else:
  1568. m = self.pbsobjattrval_re.match(l)
  1569. if m:
  1570. attr = attrl()
  1571. attr.name = m.group('attribute')
  1572. attr.value = m.group('value')
  1573. attr.next = None
  1574. if bs.attribs is None:
  1575. bs.attribs = attr
  1576. if prev_attr:
  1577. prev_attr.next = attr
  1578. prev_attr = attr
  1579. return head_bs
  1580. def file_to_batch(self, file=None):
  1581. """
  1582. Convert a file to batch format
  1583. :param file: File to be converted
  1584. :type file: str or None
  1585. :returns: File converted into batch format
  1586. """
  1587. if file is None:
  1588. return None
  1589. try:
  1590. f = open(file, 'r')
  1591. l = f.readlines()
  1592. f.close()
  1593. except:
  1594. self.logger.error('error converting file ' + file + ' to batch')
  1595. return None
  1596. return self.convert_to_batch(l)
  1597. def batch_to_file(self, bs=None, file=None):
  1598. """
  1599. Write a batch object to file
  1600. :param bs: Batch status
  1601. :param file: File to which batch object is to be written
  1602. :type file: str
  1603. """
  1604. if bs is None or file is None:
  1605. return
  1606. try:
  1607. f = open(file, 'w')
  1608. self.display_batch_status(bs, writer=f)
  1609. f.close()
  1610. except:
  1611. self.logger.error('error converting batch status to file')
  1612. def batch_to_vnodedef(self, bs):
  1613. """
  1614. :param bs: Batch status
  1615. :returns: The vnode definition string representation
  1616. of nodes batch_status
  1617. """
  1618. out = ["$configversion 2\n"]
  1619. while bs is not None:
  1620. attr = bs.attribs
  1621. while attr is not None:
  1622. if attr.name.startswith("resources_available") or \
  1623. attr.name.startswith("sharing"):
  1624. out += [bs.name + ": "]
  1625. out += [attr.name + "=" + attr.value + "\n"]
  1626. attr = attr.next
  1627. bs = bs.next
  1628. return "".join(out)
  1629. def dictlist_to_vnodedef(self, dl=None):
  1630. """
  1631. :param dl: Dictionary list
  1632. :type dl: List
  1633. :returns: The vnode definition string representation
  1634. of a dictlist
  1635. """
  1636. if dl is None:
  1637. return ''
  1638. out = ["$configversion 2\n"]
  1639. for node in dl:
  1640. for k, v in node.items():
  1641. if (k.startswith("resources_available") or
  1642. k.startswith("sharing") or
  1643. k.startswith("provision_enable") or
  1644. k.startswith("queue")):
  1645. out += [node['id'] + ": "]
  1646. # MoM dislikes empty values reported in vnode defs so
  1647. # we substitute no value for an actual empty string
  1648. if not v:
  1649. v = '""'
  1650. out += [k + "=" + str(v) + "\n"]
  1651. return "".join(out)
  1652. def objlist_to_dictlist(self, objlist=None):
  1653. """
  1654. Convert a list of PBS/PTL objects ``(e.g. Server/Job...)``
  1655. into a dictionary list representation of the batch status
  1656. :param objlist: List of ``PBS/PTL`` objects
  1657. :type objlist: List
  1658. :returns: Dictionary list representation of the batch status
  1659. """
  1660. if objlist is None:
  1661. return None
  1662. bsdlist = []
  1663. for obj in objlist:
  1664. newobj = self.obj_to_dict(obj)
  1665. bsdlist.append(newobj)
  1666. return bsdlist
  1667. def obj_to_dict(self, obj):
  1668. """
  1669. Convert a PBS/PTL object (e.g. Server/Job...) into a
  1670. dictionary format
  1671. :param obj: ``PBS/PTL`` object
  1672. :returns: Dictionary of ``PBS/PTL`` objects
  1673. """
  1674. newobj = dict(obj.attributes.items())
  1675. newobj[id] = obj.name
  1676. return newobj
  1677. def parse_execvnode(self, s=None):
  1678. """
  1679. Parse an execvnode string into chunk objects
  1680. :param s: Execvnode string
  1681. :type s: str or None
  1682. :returns: Chunk objects for parsed execvnode string
  1683. """
  1684. if s is None:
  1685. return None
  1686. chunks = []
  1687. start = 0
  1688. for c in range(len(s)):
  1689. if s[c] == '(':
  1690. start = c + 1
  1691. if s[c] == ')':
  1692. chunks.append(PbsTypeChunk(chunkstr=s[start:c]).info)
  1693. return chunks
  1694. def anupbs_exechost_numhosts(self, s=None):
  1695. """
  1696. :param s: Exechost string
  1697. :type s: str or None
  1698. """
  1699. n = 0
  1700. if '[' in s:
  1701. eh = re.sub(r'.*\[(.*)\].*', r'\1', s)
  1702. hosts = eh.split(',')
  1703. for hid in hosts:
  1704. elm = hid.split('-')
  1705. if len(elm) == 2:
  1706. n += int(elm[1]) - int(elm[0]) + 1
  1707. else:
  1708. n += 1
  1709. else:
  1710. n += 1
  1711. return n
  1712. def parse_exechost(self, s=None):
  1713. """
  1714. Parse an exechost string into a dictionary representation
  1715. :param s: String to be parsed
  1716. :type s: str or None
  1717. :returns: Dictionary format of the exechost string
  1718. """
  1719. if s is None:
  1720. return None
  1721. hosts = []
  1722. hsts = s.split('+')
  1723. for h in hsts:
  1724. hi = {}
  1725. ti = {}
  1726. (host, task) = h.split('/',)
  1727. d = task.split('*')
  1728. if len(d) == 1:
  1729. taskslot = d[0]
  1730. ncpus = 1
  1731. elif len(d) == 2:
  1732. (taskslot, ncpus) = d
  1733. else:
  1734. (taskslot, ncpus) = (0, 1)
  1735. ti['task'] = taskslot
  1736. ti['ncpus'] = ncpus
  1737. hi[host] = ti
  1738. hosts.append(hi)
  1739. return hosts
  1740. def parse_select(self, s=None):
  1741. """
  1742. Parse a ``select/schedselect`` string into a list
  1743. of dictionaries.
  1744. :param s: select/schedselect string
  1745. :type s: str or None
  1746. :returns: List of dictonaries
  1747. """
  1748. if s is None:
  1749. return
  1750. info = []
  1751. chunks = s.split('+')
  1752. for chunk in chunks:
  1753. d = chunk.split(':')
  1754. numchunks = int(d[0])
  1755. resources = {}
  1756. for e in d[1:]:
  1757. k, v = e.split('=')
  1758. resources[k] = v
  1759. for _ in range(numchunks):
  1760. info.append(resources)
  1761. return info
  1762. @classmethod
  1763. def isfloat(cls, value):
  1764. """
  1765. returns true if value is a float or a string representation
  1766. of a float returns false otherwise
  1767. :param value: value to be checked
  1768. :type value: str or int or float
  1769. :returns: True or False
  1770. """
  1771. if isinstance(value, float):
  1772. return True
  1773. if isinstance(value, str):
  1774. try:
  1775. float(value)
  1776. return True
  1777. except ValueError:
  1778. return False
  1779. @classmethod
  1780. def decode_value(cls, value):
  1781. """
  1782. Decode an attribute/resource value, if a value is
  1783. made up of digits only then return the numeric value
  1784. of it, if it is made of alphanumeric values only, return
  1785. it as a string, if it is of type size, i.e., with a memory
  1786. unit such as b,kb,mb,gb then return the converted size to
  1787. kb without the unit
  1788. :param value: attribute/resource value
  1789. :type value: str or int
  1790. :returns: int or float or string
  1791. """
  1792. if value is None or callable(value):
  1793. return value
  1794. if isinstance(value, (int, float)):
  1795. return value
  1796. if value.isdigit():
  1797. return int(value)
  1798. if value.isalpha() or value == '':
  1799. return value
  1800. if cls.isfloat(value):
  1801. return float(value)
  1802. if ':' in value:
  1803. try:
  1804. value = int(PbsTypeDuration(value))
  1805. except ValueError:
  1806. pass
  1807. return value
  1808. # TODO revisit: assume (this could be the wrong type, need a real
  1809. # data model anyway) that the remaining is a memory expression
  1810. try:
  1811. value = PbsTypeSize(value)
  1812. return value.value
  1813. except ValueError:
  1814. pass
  1815. except TypeError:
  1816. # if not then we pass to return the value as is
  1817. pass
  1818. return value
  1819. def convert_time(self, val, fmt='%a %b %d %H:%M:%S %Y'):
  1820. """
  1821. Convert a date time format into number of seconds
  1822. since epoch
  1823. :param val: date time value
  1824. :param fmt: date time format
  1825. :type fmt: str
  1826. :returns: seconds
  1827. """
  1828. # Tweak for NAS format that puts the number of seconds since epoch
  1829. # in between
  1830. if val.split()[0].isdigit():
  1831. val = int(val.split()[0])
  1832. elif not val.isdigit():
  1833. val = time.strptime(val, fmt)
  1834. val = int(time.mktime(val))
  1835. return val
  1836. def convert_duration(self, val):
  1837. """
  1838. Convert HH:MM:SS into number of seconds
  1839. If a number is fed in, that number is returned
  1840. If neither formatted data is fed in, returns 0
  1841. :param val: duration value
  1842. :type val: str
  1843. :raises: Incorrect format error
  1844. :returns: seconds
  1845. """
  1846. if val.isdigit():
  1847. return int(val)
  1848. hhmmss = val.split(':')
  1849. if len(hhmmss) != 3:
  1850. self.logger.error('Incorrect format, expected HH:MM:SS')
  1851. return 0
  1852. return int(hhmmss[0]) * 3600 + int(hhmmss[1]) * 60 + int(hhmmss[2])
  1853. def convert_seconds_to_datetime(self, tm, fmt=None, seconds=True):
  1854. """
  1855. Convert time format to number of seconds since epoch
  1856. :param tm: the time to convert
  1857. :type tm: str
  1858. :param fmt: optional format string. If used, the seconds
  1859. parameter is ignored.Defaults to ``%Y%m%d%H%M``
  1860. :type fmt: str or None
  1861. :param seconds: if True, convert time with seconds
  1862. granularity. Defaults to True.
  1863. :type seconds: bool
  1864. :returns: Number of seconds
  1865. """
  1866. if fmt is None:
  1867. fmt = "%Y%m%d%H%M"
  1868. if seconds:
  1869. fmt += ".%S"
  1870. return time.strftime(fmt, time.localtime(int(tm)))
  1871. def convert_stime_to_seconds(self, st):
  1872. """
  1873. Convert a time to seconds, if we fail we return the
  1874. original time
  1875. :param st: Time to be converted
  1876. :type st: str
  1877. :returns: Number of seconds
  1878. """
  1879. try:
  1880. ret = time.mktime(time.strptime(st, '%a %b %d %H:%M:%S %Y'))
  1881. except:
  1882. ret = st
  1883. return ret
  1884. def convert_dedtime(self, dtime):
  1885. """
  1886. Convert dedicated time string of form %m/%d/%Y %H:%M.
  1887. :param dtime: A datetime string, as an entry in the
  1888. dedicated_time file
  1889. :type dtime: str
  1890. :returns: A tuple of (from,to) of time since epoch
  1891. """
  1892. dtime_from = None
  1893. dtime_to = None
  1894. m = self.dt_tag.match(dtime.strip())
  1895. if m:
  1896. try:
  1897. _f = "%m/%d/%Y %H:%M"
  1898. dtime_from = self.convert_datetime_to_epoch(m.group('dt_from'),
  1899. fmt=_f)
  1900. dtime_to = self.convert_datetime_to_epoch(m.group('dt_to'),
  1901. fmt=_f)
  1902. except:
  1903. self.logger.error('error converting dedicated time')
  1904. return (dtime_from, dtime_to)
  1905. def convert_datetime_to_epoch(self, mdyhms, fmt="%m/%d/%Y %H:%M:%S"):
  1906. """
  1907. Convert the date time to epoch
  1908. :param mdyhms: date time
  1909. :type mdyhms: str
  1910. :param fmt: Format for date time
  1911. :type fmt: str
  1912. :returns: Epoch time
  1913. """
  1914. return int(time.mktime(time.strptime(mdyhms, fmt)))
  1915. def compare_versions(self, v1, v2, op=None):
  1916. """
  1917. Compare v1 to v2 with respect to operation op
  1918. :param v1: If not a looseversion, it gets converted
  1919. to it
  1920. :param v2: If not a looseversion, it gets converted
  1921. to it
  1922. :param op: An operation, one of ``LT``, ``LE``, ``EQ``,
  1923. ``GE``, ``GT``
  1924. :type op: str
  1925. :returns: True or False
  1926. """
  1927. if op is None:
  1928. self.logger.error('missing operator, one of LT,LE,EQ,GE,GT')
  1929. return None
  1930. if v1 is None or v2 is None:
  1931. return False
  1932. if isinstance(v1, str):
  1933. v1 = LooseVersion(v1)
  1934. if isinstance(v2, str):
  1935. v2 = LooseVersion(v2)
  1936. if op == GT:
  1937. if v1 > v2:
  1938. return True
  1939. elif op == GE:
  1940. if v1 >= v2:
  1941. return True
  1942. elif op == EQ:
  1943. if v1 == v2:
  1944. return True
  1945. elif op == LT:
  1946. if v1 < v2:
  1947. return True
  1948. elif op == LE:
  1949. if v1 <= v2:
  1950. return True
  1951. return False
  1952. def convert_arglist(self, attr):
  1953. """
  1954. strip the XML attributes from the argument list attribute
  1955. :param attr: Argument list attributes
  1956. :type attr: List
  1957. :returns: Stripped XML attributes
  1958. """
  1959. xmls = "<jsdl-hpcpa:Argument>"
  1960. xmle = "</jsdl-hpcpa:Argument>"
  1961. nattr = attr.replace(xmls, " ")
  1962. nattr = nattr.replace(xmle, " ")
  1963. return nattr.strip()
  1964. def convert_to_cli(self, attrs, op=None, hostname=None, dflt_conf=True,
  1965. exclude_attrs=None):
  1966. """
  1967. Convert attributes into their CLI format counterpart. This
  1968. method is far from complete, it grows as needs come by and
  1969. could use a rewrite, especially going along with a rewrite
  1970. of pbs_api_to_cli
  1971. :param attrs: Attributes to convert
  1972. :type attrs: List or str or dictionary
  1973. :param op: The qualifier of the operation being performed,
  1974. such as ``IFL_SUBMIT``, ``IFL_DELETE``,
  1975. ``IFL_TERMINUTE``...
  1976. :type op: str or None
  1977. :param hostname: The name of the host on which to operate
  1978. :type hostname: str or None
  1979. :param dflt_conf: Whether we are using the default PBS
  1980. configuration
  1981. :type dflt_conf: bool
  1982. :param exclude_attrs: Optional list of attributes to not
  1983. convert
  1984. :type exclude_attrs: List
  1985. :returns: CLI format of attributes
  1986. """
  1987. ret = []
  1988. if op == IFL_SUBMIT:
  1989. executable = arglist = None
  1990. elif op == IFL_DELETE:
  1991. _c = []
  1992. if isinstance(attrs, str):
  1993. attrs = [attrs]
  1994. if isinstance(attrs, list):
  1995. for a in attrs:
  1996. if 'force' in a:
  1997. _c.append('-W')
  1998. _c.append('force')
  1999. if 'deletehist' in a:
  2000. _c.append('-x')
  2001. return _c
  2002. elif op == IFL_TERMINATE:
  2003. _c = []
  2004. if attrs is None:
  2005. _c = []
  2006. elif isinstance(attrs, str):
  2007. _c = ['-t', attrs]
  2008. else:
  2009. if ((attrs & SHUT_QUICK) == SHUT_QUICK):
  2010. _c = ['-t', 'quick']
  2011. if ((attrs & SHUT_IMMEDIATE) == SHUT_IMMEDIATE):
  2012. _c = ['-t', 'immediate']
  2013. if ((attrs & SHUT_DELAY) == SHUT_DELAY):
  2014. _c = ['-t', 'delay']
  2015. if ((attrs & SHUT_WHO_SCHED) == SHUT_WHO_SCHED):
  2016. _c.append('-s')
  2017. if ((attrs & SHUT_WHO_MOM) == SHUT_WHO_MOM):
  2018. _c.append('-m')
  2019. if ((attrs & SHUT_WHO_SECDRY) == SHUT_WHO_SECDRY):
  2020. _c.append('-f')
  2021. if ((attrs & SHUT_WHO_IDLESECDRY) == SHUT_WHO_IDLESECDRY):
  2022. _c.append('-F')
  2023. if ((attrs & SHUT_WHO_SECDONLY) == SHUT_WHO_SECDONLY):
  2024. _c.append('-i')
  2025. return _c
  2026. if attrs is None or len(attrs) == 0:
  2027. return ret
  2028. # if a list, convert to a dictionary to fall into a single processing
  2029. # of the attributes
  2030. if (isinstance(attrs, list) and len(attrs) > 0 and
  2031. not isinstance(attrs[0], tuple)):
  2032. tmp_attrs = {}
  2033. for each_attr in attrs:
  2034. tmp_attrs[each_attr] = ''
  2035. del attrs
  2036. attrs = tmp_attrs
  2037. del tmp_attrs
  2038. if isinstance(attrs, (dict, OrderedDict)):
  2039. attrs = attrs.items()
  2040. for a, v in attrs:
  2041. if exclude_attrs is not None and a in exclude_attrs:
  2042. continue
  2043. if op == IFL_SUBMIT:
  2044. if a == ATTR_executable:
  2045. executable = v
  2046. continue
  2047. if a == ATTR_Arglist:
  2048. if v is not None:
  2049. arglist = self.convert_arglist(v)
  2050. if len(arglist) == 0:
  2051. return []
  2052. continue
  2053. if isinstance(v, list):
  2054. v = ','.join(v)
  2055. # when issuing remote commands, escape spaces in attribute values
  2056. if (((hostname is not None) and
  2057. (not self.du.is_localhost(hostname))) or
  2058. (not dflt_conf)):
  2059. if ' ' in str(v):
  2060. v = '"' + v + '"'
  2061. if '.' in a:
  2062. (attribute, resource) = a.split('.')
  2063. ret.append('-' + api_to_cli[attribute])
  2064. rv = resource
  2065. if v is not None:
  2066. rv += '=' + str(v)
  2067. ret.append(rv)
  2068. else:
  2069. try:
  2070. val = api_to_cli[a]
  2071. except KeyError:
  2072. self.logger.error('error retrieving key ' + str(a))
  2073. # for unknown or junk options
  2074. ret.append(a)
  2075. if v is not None:
  2076. ret.append(str(v))
  2077. continue
  2078. # on a remote job submit append the remote server name
  2079. # to the queue name
  2080. if ((op == IFL_SUBMIT) and (hostname is not None)):
  2081. if ((not self.du.is_localhost(hostname)) and
  2082. (val == 'q') and (v is not None) and
  2083. ('@' not in v) and (v != '')):
  2084. v += '@' + hostname
  2085. val = '-' + val
  2086. if '=' in val:
  2087. if v is not None:
  2088. ret.append(val + str(v))
  2089. else:
  2090. ret.append(val)
  2091. else:
  2092. ret.append(val)
  2093. if v is not None:
  2094. ret.append(str(v))
  2095. # Executable and argument list must come last in a job submission
  2096. if ((op == IFL_SUBMIT) and (executable is not None)):
  2097. ret.append('--')
  2098. ret.append(executable)
  2099. if arglist is not None:
  2100. ret.append(arglist)
  2101. return ret
  2102. def filter_batch_status(self, bs, attrib):
  2103. """
  2104. Filter out elements that don't have the attributes requested
  2105. This is needed to adapt to the fact that requesting a
  2106. resource attribute returns all ``'<resource-name>.*'``
  2107. attributes so we need to ensure that the specific resource
  2108. requested is present in the stat'ed object.
  2109. This is needed especially when calling expect with an op=NE
  2110. because we need to filter on objects that have exactly
  2111. the attributes requested
  2112. :param bs: Batch status
  2113. :param attrib: Requested attributes
  2114. :type attrib: str or dictionary
  2115. :returns: Filtered batch status
  2116. """
  2117. if isinstance(attrib, dict):
  2118. keys = attrib.keys()
  2119. elif isinstance(attrib, str):
  2120. keys = attrib.split(',')
  2121. else:
  2122. keys = attrib
  2123. if keys:
  2124. del_indices = []
  2125. for idx in range(len(bs)):
  2126. for k in bs[idx].keys():
  2127. if '.' not in k:
  2128. continue
  2129. if k != 'id' and k not in keys:
  2130. del bs[idx][k]
  2131. # if no matching resources, remove the object
  2132. if len(bs[idx]) == 1:
  2133. del_indices.append(idx)
  2134. for i in sorted(del_indices, reverse=True):
  2135. del bs[i]
  2136. return bs
  2137. def convert_attributes_by_op(self, attributes, setattrs=False):
  2138. """
  2139. Convert attributes by operator, i.e. convert an attribute
  2140. of the form
  2141. ``<attr_name><op><value>`` (e.g. resources_available.ncpus>4)
  2142. to
  2143. ``<attr_name>: (<op>, <value>)``
  2144. (e.g. resources_available.ncpus: (GT, 4))
  2145. :param attributes: the attributes to convert
  2146. :type attributes: List
  2147. :param setattrs: if True, set the attributes with no operator
  2148. as (SET, '')
  2149. :type setattrs: bool
  2150. :returns: Converted attributes by operator
  2151. """
  2152. # the order of operator matters because they are used to search by
  2153. # regex so the longer strings to search must come first
  2154. operators = ('<=', '>=', '!=', '=', '>', '<', '~')
  2155. d = {}
  2156. for attr in attributes:
  2157. found = False
  2158. for op in operators:
  2159. if op in attr:
  2160. a = attr.split(op)
  2161. d[a[0]] = (PTL_STR_TO_OP[op], a[1])
  2162. found = True
  2163. break
  2164. if not found and setattrs:
  2165. d[attr] = (SET, '')
  2166. return d
  2167. def operator_in_attribute(self, attrib):
  2168. """
  2169. Returns True if an operator string is present in an
  2170. attribute name
  2171. :param attrib: Attribute name
  2172. :type attrib: str
  2173. :returns: True or False
  2174. """
  2175. operators = PTL_STR_TO_OP.keys()
  2176. for a in attrib:
  2177. for op in operators:
  2178. if op in a:
  2179. return True
  2180. return False
  2181. def list_resources(self, objtype=None, objs=[]):
  2182. """
  2183. Lists the resources
  2184. :param objtype: Type of the object
  2185. :type objtype: str
  2186. :param objs: Object list
  2187. :type objs: List
  2188. :returns: List of resources
  2189. """
  2190. if objtype in (VNODE, NODE, SERVER, QUEUE, SCHED):
  2191. prefix = 'resources_available.'
  2192. elif objtype in (JOB, RESV):
  2193. prefix = 'Resource_List.'
  2194. else:
  2195. return
  2196. resources = []
  2197. for o in objs:
  2198. for a in o.keys():
  2199. if a.startswith(prefix):
  2200. res = a.replace(prefix, '')
  2201. if res not in resources:
  2202. resources.append(res)
  2203. return resources
  2204. def compare(self, obj1, obj2, showdiff=False):
  2205. """
  2206. Compare two objects.
  2207. :param showdiff: whether to print the specific differences,
  2208. defaults to False
  2209. :type showdiff: bool
  2210. :returns: 0 if objects are identical and non zero otherwise
  2211. """
  2212. if not showdiff:
  2213. ret = cmp(obj1, obj2)
  2214. if ret != 0:
  2215. self.logger.info('objects differ')
  2216. return ret
  2217. if not isinstance(obj1, type(obj2)):
  2218. self.logger.error('objects are of different type')
  2219. return 1
  2220. if isinstance(obj1, list):
  2221. if len(obj1) != len(obj2):
  2222. self.logger.info(
  2223. 'comparing ' + str(
  2224. obj1) + ' and ' + str(
  2225. obj2))
  2226. self.logger.info('objects are of different lengths')
  2227. return
  2228. for i in range(len(obj1)):
  2229. self.compare(obj1[i], obj2[i], showdiff=showdiff)
  2230. return
  2231. if isinstance(obj1, dict):
  2232. self.logger.info('comparing ' + str(obj1) + ' and ' + str(obj2))
  2233. onlyobj1 = []
  2234. diffobjs = []
  2235. onlyobj2 = []
  2236. for k1, v1 in obj1.items():
  2237. if k1 not in obj2:
  2238. onlyobj1.append(k1 + '=' + str(v1))
  2239. if k1 in obj2 and obj2[k1] != v1:
  2240. diffobjs.append(
  2241. k1 + '=' + str(v1) + ' vs ' + k1 + '=' + str(obj2[k1]))
  2242. for k2, v2 in obj2.items():
  2243. if k2 not in obj1:
  2244. onlyobj2.append(k2 + '=' + str(v2))
  2245. if len(onlyobj1) > 0:
  2246. self.logger.info("only in first object: " + " ".join(onlyobj1))
  2247. if len(onlyobj2) > 0:
  2248. self.logger.info(
  2249. "only in second object: " + " ".join(onlyobj2))
  2250. if len(diffobjs) > 0:
  2251. self.logger.info("diff between objects: " + " ".join(diffobjs))
  2252. if len(onlyobj1) == len(onlyobj2) == len(diffobjs) == 0:
  2253. self.logger.info("objects are identical")
  2254. return 0
  2255. return 1
  2256. @classmethod
  2257. def random_str(cls, length=1, prefix=''):
  2258. """
  2259. Generates the random string
  2260. :param length: Length of the string
  2261. :type length: int
  2262. :param prefix: Prefix of the string
  2263. :type prefix: str
  2264. :returns: Random string
  2265. """
  2266. r = [random.choice(string.letters) for _ in range(length)]
  2267. r = ''.join([prefix] + r)
  2268. if hasattr(cls, '__uniq_rstr'):
  2269. while r in cls.__uniq_rstr:
  2270. r = [random.choice(string.letters) for _ in range(length)]
  2271. r = ''.join([prefix] + r)
  2272. cls.__uniq_rstr.append(r)
  2273. else:
  2274. cls.__uniq_rstr = [r]
  2275. return r
  2276. def _make_template_formula(self, formula):
  2277. """
  2278. Create a template of the formula
  2279. :param formula: Formula for which template is to be created
  2280. :type formula: str
  2281. :returns: Template
  2282. """
  2283. tformula = []
  2284. skip = False
  2285. for c in formula:
  2286. if not skip and c.isalpha():
  2287. tformula.append('$')
  2288. skip = True
  2289. if c in ('+', '-', '/', ' ', '*', '%'):
  2290. skip = False
  2291. tformula.append(c)
  2292. return "".join(tformula)
  2293. def update_attributes_list(self, obj):
  2294. """
  2295. Updates the attribute list
  2296. :param obj: Objects
  2297. :returns: Updated attribute list
  2298. """
  2299. if not hasattr(obj, 'attributes'):
  2300. return
  2301. if not hasattr(obj, 'Resource_List'):
  2302. setattr(obj, 'Resource_List', {})
  2303. for attr, val in obj.attributes.items():
  2304. if attr.startswith('Resource_List.'):
  2305. (_, resource) = attr.split('.')
  2306. obj.Resource_List[resource] = val
  2307. def parse_fgc_limit(self, limstr=None):
  2308. """
  2309. Parse an ``FGC`` limit entry, of the form:
  2310. ``<limtype>[.<resource>]=\[<entity_type>:<entity_name>
  2311. =<entity_value>\]``
  2312. :param limstr: FGC limit string
  2313. :type limstr: str or None
  2314. :returns: Parsed FGC string in given format
  2315. """
  2316. m = self.lim_tag.match(limstr)
  2317. if m:
  2318. _v = str(self.decode_value(m.group('entity_value')))
  2319. return (m.group('limtype'), m.group('resource'),
  2320. m.group('entity_type'), m.group('entity_name'), _v)
  2321. return None
  2322. def is_job_array(self, jobid):
  2323. """
  2324. If a job array return True, otherwise return False
  2325. :param jobid: PBS jobid
  2326. :returns: True or False
  2327. """
  2328. if self.array_tag.match(jobid):
  2329. return True
  2330. return False
  2331. def is_subjob(self, jobid):
  2332. """
  2333. If a subjob of a job array, return the subjob id
  2334. otherwise return False
  2335. :param jobid: PBS job id
  2336. :type jobid: str
  2337. :returns: True or False
  2338. """
  2339. m = self.subjob_tag.match(jobid)
  2340. if m:
  2341. return m.group('subjobid')
  2342. return False
  2343. class PbsTypeFGCLimit(object):
  2344. """
  2345. FGC limit entry, of the form:
  2346. ``<limtype>[.<resource>]=\[<entity_type>:<entity_name>=
  2347. <entity_value>\]``
  2348. :param attr: FGC limit attribute
  2349. :type attr: str
  2350. :param value: Value of attribute
  2351. :type value: int
  2352. :returns: FGC limit entry of given format
  2353. """
  2354. fgc_attr_pat = re.compile("(?P<ltype>[a-z_]+)[\.]*(?P<resource>[\w\d-]*)")
  2355. fgc_val_pat = re.compile("[\s]*\[(?P<etype>[ugpo]):(?P<ename>[\w\d-]+)"
  2356. "=(?P<eval>[\d]+)\][\s]*")
  2357. utils = BatchUtils()
  2358. def __init__(self, attr, val):
  2359. self.attr = attr
  2360. self.val = val
  2361. a = self.fgc_attr_pat.match(attr)
  2362. if a:
  2363. self.limit_type = a.group('ltype')
  2364. self.resource_name = a.group('resource')
  2365. else:
  2366. self.limit_type = None
  2367. self.resource_name = None
  2368. v = self.fgc_val_pat.match(val)
  2369. if v:
  2370. self.lim_value = self.utils.decode_value(v.group('eval'))
  2371. self.entity_type = v.group('etype')
  2372. self.entity_name = v.group('ename')
  2373. else:
  2374. self.lim_value = None
  2375. self.entity_type = None
  2376. self.entity_name = None
  2377. def __val__(self):
  2378. return ('[' + str(self.entity_type) + ':' +
  2379. str(self.entity_name) + '=' + str(self.lim_value) + ']')
  2380. def __str__(self):
  2381. return (self.attr + ' = ' + self.__val__())
  2382. class PbsBatchStatus(list):
  2383. """
  2384. Wrapper class for Batch Status object
  2385. Converts a batch status (as dictlist) into a list of
  2386. PbsBatchObjects
  2387. :param bs: Batch status
  2388. :type bs: List or dictionary
  2389. :returns: List of PBS batch objects
  2390. """
  2391. def __init__(self, bs):
  2392. if not isinstance(bs, (list, dict)):
  2393. raise TypeError("Expected a list or dictionary")
  2394. if isinstance(bs, dict):
  2395. self.append(PbsBatchObject(bs))
  2396. else:
  2397. for b in bs:
  2398. self.append(PbsBatchObject(b))
  2399. def __str__(self):
  2400. rv = []
  2401. for l in self.__bs:
  2402. rv += [self.__bu.batch_status_as_dict_to_str(l)]
  2403. return "\n".join(rv)
  2404. class PbsBatchObject(list):
  2405. def __init__(self, bs):
  2406. self.set_batch_status(bs)
  2407. def set_batch_status(self, bs):
  2408. """
  2409. Sets the batch status
  2410. :param bs: Batch status
  2411. """
  2412. if 'id' in bs:
  2413. self.name = bs['id']
  2414. for k, v in bs.items():
  2415. self.append(PbsAttribute(k, v))
  2416. class PbsAttribute(object):
  2417. """
  2418. Descriptor class for PBS attribute
  2419. :param name: PBS attribute name
  2420. :type name: str
  2421. :param value: Value for the attribute
  2422. :type value: str or int or float
  2423. """
  2424. utils = BatchUtils()
  2425. def __init__(self, name=None, value=None):
  2426. self.set_name(name)
  2427. self.set_value(value)
  2428. def set_name(self, name):
  2429. """
  2430. Set PBS attribute name
  2431. :param name: PBS attribute
  2432. :type name: str
  2433. """
  2434. self.name = name
  2435. if name is not None and '.' in name:
  2436. self.is_resource = True
  2437. self.resource_type, self.resource_name = self.name.split('.')
  2438. else:
  2439. self.is_resource = False
  2440. self.resource_type = self.resource_name = None
  2441. def set_value(self, value):
  2442. """
  2443. Set PBS attribute value
  2444. :param value: Value of PBS attribute
  2445. :type value: str or int or float
  2446. """
  2447. self.value = value
  2448. if isinstance(value, (int, float)) or str(value).isdigit():
  2449. self.is_consumable = True
  2450. else:
  2451. self.is_consumable = False
  2452. def obfuscate_name(self, a=None):
  2453. """
  2454. Obfuscate PBS attribute name
  2455. """
  2456. if a is not None:
  2457. on = a
  2458. else:
  2459. on = self.utils.random_str(len(self.name))
  2460. self.decoded_name = self.name
  2461. if self.is_resource:
  2462. self.set_name(self.resource_name + '.' + on)
  2463. def obfuscate_value(self, v=None):
  2464. """
  2465. Obfuscate PBS attribute value
  2466. """
  2467. if not self.is_consuable:
  2468. self.decoded_value = self.value
  2469. return
  2470. if v is not None:
  2471. ov = v
  2472. else:
  2473. ov = self.utils.random_str(len(self.value))
  2474. self.decoded_value = self.value
  2475. self.set_value(ov)
  2476. class Entity(object):
  2477. """
  2478. Abstract representation of a PBS consumer that has an
  2479. external relationship to the PBS system. For example, a
  2480. user associated to an OS identifier (uid) maps to a PBS
  2481. user entity.
  2482. Entities may be subject to policies, such as limits, consume
  2483. a certain amount of resource and/or fairshare usage.
  2484. :param etype: Entity type
  2485. :type etype: str or None
  2486. :param name: Entity name
  2487. :type name: str or None
  2488. """
  2489. def __init__(self, etype=None, name=None):
  2490. self.type = etype
  2491. self.name = name
  2492. self.limits = []
  2493. self.resource_usage = {}
  2494. self.fairshare_usage = 0
  2495. def set_limit(self, limit=None):
  2496. """
  2497. :param limit: Limit to be set
  2498. :type limit: str or None
  2499. """
  2500. for l in self.limits:
  2501. if str(limit) == str(l):
  2502. return
  2503. self.limits.append(limit)
  2504. def set_resource_usage(self, container=None, resource=None, usage=None):
  2505. """
  2506. Set the resource type
  2507. :param resource: PBS resource
  2508. :type resource: str or None
  2509. :param usage: Resource usage value
  2510. :type usage: str or None
  2511. """
  2512. if self.type:
  2513. if container in self.resource_usage:
  2514. if self.resource_usage[self.type]:
  2515. if resource in self.resource_usage[container]:
  2516. self.resource_usage[container][resource] += usage
  2517. else:
  2518. self.resource_usage[container][resource] = usage
  2519. else:
  2520. self.resource_usage[container] = {resource: usage}
  2521. def set_fairshare_usage(self, usage=0):
  2522. """
  2523. Set fairshare usage
  2524. :param usage: Fairshare usage value
  2525. :type usage: int
  2526. """
  2527. self.fairshare_usage += usage
  2528. def __repr__(self):
  2529. return self.__str__()
  2530. def __str__(self):
  2531. return str(self.limits) + ' ' + str(self.resource_usage) + ' ' + \
  2532. str(self.fairshare_usage)
  2533. class Policy(object):
  2534. """
  2535. Abstract PBS policy. Can be one of ``limits``,
  2536. ``access control``, ``scheduling policy``, etc...this
  2537. class does not currently support any operations
  2538. """
  2539. def __init__(self):
  2540. pass
  2541. class Limit(Policy):
  2542. """
  2543. Representation of a PBS limit
  2544. Limits apply to containers, are of a certain type
  2545. (e.g., max_run_res.ncpus) associated to a given resource
  2546. (e.g., resource), on a given entity (e.g.,user Bob) and
  2547. have a certain value.
  2548. :param limit_type: Type of the limit
  2549. :type limit_type: str or None
  2550. :param resource: PBS resource
  2551. :type resource: str or None
  2552. :param entity_obj: Entity object
  2553. :param value: Limit value
  2554. :type value: int
  2555. """
  2556. def __init__(self, limit_type=None, resource=None,
  2557. entity_obj=None, value=None, container=None,
  2558. container_id=None):
  2559. self.set_container(container, container_id)
  2560. self.soft_limit = False
  2561. self.hard_limit = False
  2562. self.set_limit_type(limit_type)
  2563. self.set_resource(resource)
  2564. self.set_value(value)
  2565. self.entity = entity_obj
  2566. def set_container(self, container, container_id):
  2567. """
  2568. Set the container
  2569. :param container: Container which is to be set
  2570. :type container: str
  2571. :param container_id: Container id
  2572. """
  2573. self.container = container
  2574. self.container_id = container_id
  2575. def set_limit_type(self, t):
  2576. """
  2577. Set the limit type
  2578. :param t: Limit type
  2579. :type t: str
  2580. """
  2581. self.limit_type = t
  2582. if '_soft' in t:
  2583. self.soft_limit = True
  2584. else:
  2585. self.hard_limit = True
  2586. def set_resource(self, resource):
  2587. """
  2588. Set the resource
  2589. :param resource: resource value to set
  2590. :type resource: str
  2591. """
  2592. self.resource = resource
  2593. def set_value(self, value):
  2594. """
  2595. Set the resource value
  2596. :param value: Resource value
  2597. :type value: str
  2598. """
  2599. self.value = value
  2600. def __eq__(self, value):
  2601. if str(self) == str(value):
  2602. return True
  2603. return False
  2604. def __str__(self):
  2605. return self.__repr__()
  2606. def __repr__(self):
  2607. l = [self.container_id, self.limit_type, self.resource, '[',
  2608. self.entity.type, ':', self.entity.name, '=', self.value, ']']
  2609. return " ".join(l)
  2610. class ExpectActions(object):
  2611. """
  2612. List of action handlers to run when Server's expect
  2613. function does not get the expected result
  2614. :param action: Action to run
  2615. :type action: str
  2616. :param level: Logging level
  2617. """
  2618. actions = {}
  2619. def __init__(self, action=None, level=logging.INFO):
  2620. self.logger = logging.getLogger(__name__)
  2621. self.add_action(action, level=level)
  2622. def add_action(self, action=None, hostname=None, level=logging.INFO):
  2623. """
  2624. Add an action
  2625. :param action: Action to add
  2626. :param hostname: Machine hostname
  2627. :type hostname: str
  2628. :param level: Logging level
  2629. """
  2630. if action is not None and action.name is not None and\
  2631. action.name not in self.actions:
  2632. self.actions[action.name] = action
  2633. msg = ['expect action: added action ' + action.name]
  2634. if hostname:
  2635. msg += [' to server ' + hostname]
  2636. if level >= logging.INFO:
  2637. self.logger.info("".join(msg))
  2638. else:
  2639. self.logger.debug("".join(msg))
  2640. def has_action(self, name):
  2641. """
  2642. check whether action exists or not
  2643. :param name: Name of action
  2644. :type name: str
  2645. """
  2646. if name in self.actions:
  2647. return True
  2648. return False
  2649. def get_action(self, name):
  2650. """
  2651. Get an action if exists
  2652. :param name: Name of action
  2653. :type name: str
  2654. """
  2655. if name in self.actions:
  2656. return self.actions[name]
  2657. return None
  2658. def list_actions(self, level=logging.INFO):
  2659. """
  2660. List an actions
  2661. :param level: Logging level
  2662. """
  2663. if level >= logging.INFO:
  2664. self.logger.info(self.get_all_cations)
  2665. else:
  2666. self.logger.debug(self.get_all_cations)
  2667. def get_all_actions(self):
  2668. """
  2669. Get all the action
  2670. """
  2671. return self.actions.values()
  2672. def get_actions_by_type(self, atype=None):
  2673. """
  2674. Get an action by type
  2675. :param atype: Action type
  2676. :type atype: str
  2677. """
  2678. if atype is None:
  2679. return None
  2680. ret_actions = []
  2681. for action in self.actions.values():
  2682. if action.type is not None and action.type == atype:
  2683. ret_actions.append(action)
  2684. return ret_actions
  2685. def _control_action(self, action=None, name=None, enable=None):
  2686. if action:
  2687. action.enabled = False
  2688. name = action.name
  2689. elif name is not None:
  2690. if name == 'ALL':
  2691. for a in self.actions:
  2692. a.enabled = enable
  2693. else:
  2694. a = self.get_action(name)
  2695. a.enabled = False
  2696. else:
  2697. return
  2698. if enable:
  2699. msg = 'enabled'
  2700. else:
  2701. msg = 'disabled'
  2702. self.logger.info('expect action: ' + name + ' ' + msg)
  2703. def disable_action(self, action=None, name=None):
  2704. """
  2705. Disable an action
  2706. """
  2707. self._control_action(action, name, enable=False)
  2708. def enable_action(self, action=None, name=None):
  2709. """
  2710. Enable an action
  2711. """
  2712. self._control_action(action, name, enable=True)
  2713. def disable_all_actions(self):
  2714. """
  2715. Disable all actions
  2716. """
  2717. for a in self.actions.values():
  2718. a.enabled = False
  2719. def enable_all_actions(self):
  2720. """
  2721. Enable all actions
  2722. """
  2723. for a in self.actions.values():
  2724. a.enabled = True
  2725. class ExpectAction(object):
  2726. """
  2727. Action function to run when Server's expect function does
  2728. not get the expected result
  2729. :param atype: Action type
  2730. :type atype: str
  2731. """
  2732. def __init__(self, name=None, enabled=True, atype=None, action=None,
  2733. level=logging.INFO):
  2734. self.logger = logging.getLogger(__name__)
  2735. self.set_name(name, level=level)
  2736. self.set_enabled(enabled)
  2737. self.set_type(atype)
  2738. self.set_action(action)
  2739. def set_name(self, name, level=logging.INFO):
  2740. """
  2741. Set the actione name
  2742. :param name: Action name
  2743. :type name: str
  2744. """
  2745. if level >= logging.INFO:
  2746. self.logger.info('expect action: created new action ' + name)
  2747. else:
  2748. self.logger.debug('expect action: created new action ' + name)
  2749. self.name = name
  2750. def set_enabled(self, enabled):
  2751. self.enabled = enabled
  2752. def set_type(self, atype):
  2753. self.type = atype
  2754. def set_action(self, action):
  2755. self.action = action
  2756. class PbsTypeAttribute(dict):
  2757. """
  2758. Experimental. This is a placeholder object that will be used
  2759. in the future to map attribute information and circumvent
  2760. the error-pron dynamic type detection that is currently done
  2761. using ``decode_value()``
  2762. """
  2763. def __getitem__(self, name):
  2764. return BatchUtils.decode_value(super(PbsTypeAttribute,
  2765. self).__getitem__(name))
  2766. class PBSObject(object):
  2767. """
  2768. Generic PBS Object encapsulating attributes and defaults
  2769. :param name: The name associated to the object
  2770. :type name: str
  2771. :param attrs: Dictionary of attributes to set on object
  2772. :type attrs: Dictionary
  2773. :param defaults: Dictionary of default attributes. Setting
  2774. this will override any other object's default
  2775. :type defaults: Dictionary
  2776. """
  2777. utils = BatchUtils()
  2778. platform = sys.platform
  2779. def __init__(self, name, attrs={}, defaults={}):
  2780. self.attributes = OrderedDict()
  2781. self.name = name
  2782. self.dflt_attributes = defaults
  2783. self.attropl = None
  2784. self.custom_attrs = OrderedDict()
  2785. self.ctime = int(time.time())
  2786. self.set_attributes(attrs)
  2787. def set_attributes(self, a={}):
  2788. """
  2789. set attributes and custom attributes on this object.
  2790. custom attributes are used when converting attributes
  2791. to CLI
  2792. :param a: Attribute dictionary
  2793. :type a: Dictionary
  2794. """
  2795. if isinstance(a, list):
  2796. a = OrderedDict(a)
  2797. self.attributes = OrderedDict(self.dflt_attributes.items() +
  2798. self.attributes.items() + a.items())
  2799. self.custom_attrs = OrderedDict(self.custom_attrs.items() +
  2800. a.items())
  2801. def unset_attributes(self, attrl=[]):
  2802. """
  2803. Unset attributes from object's attributes and custom
  2804. attributes
  2805. :param attrl: Attribute list
  2806. :type attrl: List
  2807. """
  2808. for attr in attrl:
  2809. if attr in self.attributes:
  2810. del self.attributes[attr]
  2811. if attr in self.custom_attrs:
  2812. del self.custom_attrs[attr]
  2813. def __str__(self):
  2814. """
  2815. Return a string representation of this PBSObject
  2816. """
  2817. if self.name is None:
  2818. return ""
  2819. s = []
  2820. if isinstance(self, Job):
  2821. s += ["Job Id: " + self.name + "\n"]
  2822. elif isinstance(self, Queue):
  2823. s += ["Queue: " + self.name + "\n"]
  2824. elif isinstance(self, Server):
  2825. s += ["Server: " + self.hostname + "\n"]
  2826. elif isinstance(self, Reservation):
  2827. s += ["Name: " + "\n"]
  2828. else:
  2829. s += [self.name + "\n"]
  2830. for k, v in self.attributes.items():
  2831. s += [" " + k + " = " + str(v) + "\n"]
  2832. return "".join(s)
  2833. def __repr__(self):
  2834. return str(self.attributes)
  2835. class PBSService(PBSObject):
  2836. """
  2837. Generic PBS service object to hold properties of PBS daemons
  2838. :param name: The name associated to the object
  2839. :type name: str or None
  2840. :param attrs: Dictionary of attributes to set on object
  2841. :type attrs: Dictionary
  2842. :param defaults: Dictionary of default attributes. Setting
  2843. this will override any other object's default
  2844. :type defaults: Dictionary
  2845. :param pbsconf_file: Optional path to the pbs configuration
  2846. file
  2847. :type pbsconf_file: str or None
  2848. :param diagmap: A dictionary of PBS objects (node,server,etc)
  2849. to mapped files from PBS diag directory
  2850. :type diagmap: Dictionary
  2851. :param diag: path to PBS diag directory
  2852. (This will overrides diagmap)
  2853. :type diag: str or None
  2854. """
  2855. du = DshUtils()
  2856. pu = ProcUtils()
  2857. def __init__(self, name=None, attrs={}, defaults={}, pbsconf_file=None,
  2858. diagmap={}, diag=None):
  2859. if name is None:
  2860. self.hostname = socket.gethostname()
  2861. else:
  2862. self.hostname = name
  2863. if diag:
  2864. self.diagmap = self._load_from_diag(diag)
  2865. self.has_diag = True
  2866. self.diag = diag
  2867. elif len(diagmap) > 0:
  2868. self.diagmap = diagmap
  2869. self.diag = None
  2870. self.has_diag = True
  2871. else:
  2872. self.diagmap = {}
  2873. self.diag = None
  2874. self.has_diag = False
  2875. if not self.has_diag:
  2876. try:
  2877. self.fqdn = socket.gethostbyaddr(self.hostname)[0]
  2878. if self.hostname != self.fqdn:
  2879. self.logger.info('FQDN name ' + self.fqdn + ' differs '
  2880. 'from name provided ' + self.hostname)
  2881. self.hostname = self.fqdn
  2882. except:
  2883. pass
  2884. else:
  2885. self.fqdn = self.hostname
  2886. self.shortname = self.hostname.split('.')[0]
  2887. self.platform = self.du.get_platform()
  2888. self.logutils = None
  2889. self.logfile = None
  2890. self.acctlogfile = None
  2891. self.pid = None
  2892. self.pbs_conf = {}
  2893. self.pbs_env = {}
  2894. self._is_local = True
  2895. self.launcher = None
  2896. PBSObject.__init__(self, name, attrs, defaults)
  2897. if not self.has_diag:
  2898. if not self.du.is_localhost(self.hostname):
  2899. self._is_local = False
  2900. if pbsconf_file is None and not self.has_diag:
  2901. self.pbs_conf_file = self.du.get_pbs_conf_file(name)
  2902. else:
  2903. self.pbs_conf_file = pbsconf_file
  2904. if self.pbs_conf_file == '/etc/pbs.conf':
  2905. self.default_pbs_conf = True
  2906. elif (('PBS_CONF_FILE' not in os.environ) or
  2907. (os.environ['PBS_CONF_FILE'] != self.pbs_conf_file)):
  2908. self.default_pbs_conf = False
  2909. else:
  2910. self.default_pbs_conf = True
  2911. # default pbs_server_name to hostname, it will get set again once the
  2912. # config file is processed
  2913. self.pbs_server_name = self.hostname
  2914. # If diag is given then bypass parsing pbs.conf
  2915. if self.has_diag:
  2916. if diag is None:
  2917. t = 'pbs_diag_%s' % (time.strftime("%y%m%d_%H%M%S"))
  2918. self.diag = os.path.join(self.du.get_tempdir(), t)
  2919. self.pbs_conf['PBS_HOME'] = self.diag
  2920. self.pbs_conf['PBS_EXEC'] = self.diag
  2921. self.pbs_conf['PBS_SERVER'] = self.hostname
  2922. m = re.match('.*pbs_diag_(?P<datetime>\d{6,6}_\d{6,6}).*',
  2923. self.diag)
  2924. if m:
  2925. tm = time.strptime(m.group('datetime'), "%y%m%d_%H%M%S")
  2926. self.ctime = int(time.mktime(tm))
  2927. else:
  2928. self.pbs_conf = self.du.parse_pbs_config(self.hostname,
  2929. self.pbs_conf_file)
  2930. if self.pbs_conf is None or len(self.pbs_conf) == 0:
  2931. self.pbs_conf = {'PBS_HOME': "", 'PBS_EXEC': ""}
  2932. else:
  2933. ef = os.path.join(self.pbs_conf['PBS_HOME'], 'pbs_environment')
  2934. self.pbs_env = self.du.parse_pbs_environment(self.hostname, ef)
  2935. self.pbs_server_name = self.du.get_pbs_server_name(
  2936. self.pbs_conf)
  2937. self.init_logfile_path(self.pbs_conf)
  2938. def _load_from_diag(self, diag):
  2939. diagmap = {}
  2940. diagmap[SERVER] = os.path.join(diag, 'qstat_Bf.out')
  2941. diagmap[VNODE] = os.path.join(diag, 'pbsnodes_va.out')
  2942. diagmap[QUEUE] = os.path.join(diag, 'qstat_Qf.out')
  2943. diagmap[JOB] = os.path.join(diag, 'qstat_tf.out')
  2944. if not os.path.isfile(diagmap[JOB]):
  2945. diagmap[JOB] = os.path.join(diag, 'qstat_f.out')
  2946. diagmap[RESV] = os.path.join(diag, 'pbs_rstat_f.out')
  2947. diagmap[SCHED] = os.path.join(diag, 'qmgr_psched.out')
  2948. diagmap[HOOK] = []
  2949. if (os.path.isdir(os.path.join(diag, 'server_priv')) and
  2950. os.path.isdir(os.path.join(diag, 'server_priv', 'hooks'))):
  2951. _ld = os.listdir(os.path.join(diag, 'server_priv', 'hooks'))
  2952. for f in _ld:
  2953. if f.endswith('.HK'):
  2954. diagmap[HOOK].append(
  2955. os.path.join(diag, 'server_priv', 'hooks', f))
  2956. # Format of qmgr_psched.out differs from Batch Status, we transform
  2957. # it to go through the common batch status parsing routines
  2958. if os.path.isfile(diagmap[SCHED]):
  2959. f = open(os.path.join(diag, 'ptl_qstat_Sched.out'), 'w')
  2960. lines = open(diagmap[SCHED])
  2961. f.write("Sched \n")
  2962. for l in lines:
  2963. recs = l.split()
  2964. f.write("".join(recs[2:5]) + "\n")
  2965. f.close()
  2966. diagmap[SCHED] = os.path.join(diag, 'ptl_qstat_Sched.out')
  2967. else:
  2968. diagmap[SCHED] = None
  2969. return diagmap
  2970. def init_logfile_path(self, conf=None):
  2971. """
  2972. Initialize path to log files for this service
  2973. :param conf: PBS conf file parameters
  2974. :type conf: Dictionary
  2975. """
  2976. elmt = self._instance_to_logpath(self)
  2977. if elmt is None:
  2978. return
  2979. if conf is not None and 'PBS_HOME' in conf:
  2980. tm = time.strftime("%Y%m%d", time.localtime())
  2981. self.logfile = os.path.join(conf['PBS_HOME'], elmt, tm)
  2982. self.acctlogfile = os.path.join(conf['PBS_HOME'], 'server_priv',
  2983. 'accounting', tm)
  2984. def _instance_to_logpath(self, inst):
  2985. """
  2986. returns the log path associated to this service
  2987. """
  2988. if isinstance(inst, Scheduler):
  2989. logval = 'sched_logs'
  2990. elif isinstance(inst, Server):
  2991. logval = 'server_logs'
  2992. elif isinstance(inst, MoM):
  2993. logval = 'mom_logs'
  2994. elif isinstance(inst, Comm):
  2995. logval = 'comm_logs'
  2996. else:
  2997. logval = None
  2998. return logval
  2999. def _instance_to_cmd(self, inst):
  3000. """
  3001. returns the command associated to this service
  3002. """
  3003. if isinstance(inst, Scheduler):
  3004. cmd = 'pbs_sched'
  3005. elif isinstance(inst, Server):
  3006. cmd = 'pbs_server'
  3007. elif isinstance(inst, MoM):
  3008. cmd = 'pbs_mom'
  3009. elif isinstance(inst, Comm):
  3010. cmd = 'pbs_comm'
  3011. else:
  3012. cmd = None
  3013. return cmd
  3014. def _instance_to_servicename(self, inst):
  3015. """
  3016. return the service name associated to the instance. One of
  3017. ``server, scheduler, or mom.``
  3018. """
  3019. if isinstance(inst, Scheduler):
  3020. nm = 'scheduler'
  3021. elif isinstance(inst, Server):
  3022. nm = 'server'
  3023. elif isinstance(inst, MoM):
  3024. nm = 'mom'
  3025. elif isinstance(inst, Comm):
  3026. nm = 'comm'
  3027. else:
  3028. nm = ''
  3029. return nm
  3030. def _instance_to_privpath(self, inst):
  3031. """
  3032. returns the path to priv associated to this service
  3033. """
  3034. if isinstance(inst, Scheduler):
  3035. priv = 'sched_priv'
  3036. elif isinstance(inst, Server):
  3037. priv = 'server_priv'
  3038. elif isinstance(inst, MoM):
  3039. priv = 'mom_priv'
  3040. elif isinstance(inst, Comm):
  3041. priv = 'server_priv'
  3042. else:
  3043. priv = None
  3044. return priv
  3045. def _instance_to_lock(self, inst):
  3046. """
  3047. returns the path to lock file associated to this service
  3048. """
  3049. if isinstance(inst, Scheduler):
  3050. lock = 'sched.lock'
  3051. elif isinstance(inst, Server):
  3052. lock = 'server.lock'
  3053. elif isinstance(inst, MoM):
  3054. lock = 'mom.lock'
  3055. elif isinstance(inst, Comm):
  3056. lock = 'comm.lock'
  3057. else:
  3058. lock = None
  3059. return lock
  3060. def set_launcher(self, execargs=None):
  3061. self.launcher = execargs
  3062. def _isUp(self, inst):
  3063. """
  3064. returns True if service is up and False otherwise
  3065. """
  3066. live_pids = self._all_instance_pids(inst)
  3067. pid = self._get_pid(inst)
  3068. if live_pids is not None and pid in live_pids:
  3069. return True
  3070. return False
  3071. def _signal(self, sig, inst=None, procname=None):
  3072. """
  3073. Send signal ``sig`` to service. sig is the signal name
  3074. as it would be sent to the program kill, e.g. -HUP.
  3075. Return the ``out/err/rc`` from the command run to send
  3076. the signal. See DshUtils.run_cmd
  3077. :param inst: Instance
  3078. :type inst: str
  3079. :param procname: Process name
  3080. :type procname: str or None
  3081. """
  3082. pid = None
  3083. if inst is not None:
  3084. if inst.pid is not None:
  3085. pid = inst.pid
  3086. else:
  3087. pid = self._get_pid(inst)
  3088. if procname is not None:
  3089. pi = self.pu.get_proc_info(self.hostname, procname)
  3090. if pi is not None and pi.values() and pi.values()[0]:
  3091. for _p in pi.values()[0]:
  3092. ret = self.du.run_cmd(self.hostname, ['kill', sig, _p.pid],
  3093. sudo=True)
  3094. return ret
  3095. if pid is None:
  3096. return {'rc': 0, 'err': '', 'out': 'no pid to signal'}
  3097. return self.du.run_cmd(self.hostname, ['kill', sig, pid], sudo=True)
  3098. def _all_instance_pids(self, inst):
  3099. """
  3100. Return a list of all ``PIDS`` that match the
  3101. instance name or None.
  3102. """
  3103. cmd = self._instance_to_cmd(inst)
  3104. self.pu.get_proc_info(self.hostname, ".*" + cmd + ".*",
  3105. regexp=True)
  3106. _procs = self.pu.processes.values()
  3107. if _procs:
  3108. _pids = []
  3109. for _p in _procs:
  3110. _pids.extend(map(lambda x: x.pid, _p))
  3111. return _pids
  3112. return None
  3113. def _get_pid(self, inst):
  3114. """
  3115. Get the ``PID`` associated to this instance.
  3116. Implementation note, the pid is read from the
  3117. daemon's lock file.
  3118. This is different than _all_instance_pids in that
  3119. the PID of the last running instance can be retrieved
  3120. with ``_get_pid`` but not with ``_all_instance_pids``
  3121. """
  3122. priv = self._instance_to_privpath(inst)
  3123. lock = self._instance_to_lock(inst)
  3124. if isinstance(inst, Scheduler) and 'sched_priv' in inst.attributes:
  3125. path = os.path.join(inst.attributes['sched_priv'], lock)
  3126. else:
  3127. path = os.path.join(self.pbs_conf['PBS_HOME'], priv, lock)
  3128. rv = self.du.cat(self.hostname, path, sudo=True, logerr=False)
  3129. if ((rv['rc'] == 0) and (len(rv['out']) > 0)):
  3130. self.pid = rv['out'][0].strip()
  3131. else:
  3132. self.pid = None
  3133. return self.pid
  3134. def _update_pid(self, inst):
  3135. """
  3136. update pid of given inst
  3137. :param inst: inst to update pid
  3138. :type inst: object
  3139. """
  3140. for i in range(30):
  3141. live_pids = self._all_instance_pids(inst)
  3142. inst.pid = self._get_pid(inst)
  3143. if live_pids is not None and inst.pid in live_pids:
  3144. return
  3145. time.sleep(1)
  3146. inst.pid = None
  3147. def _start(self, inst=None, args=None, cmd_map=None, launcher=None):
  3148. """
  3149. Generic service startup
  3150. :param inst: The instance to act upon
  3151. :type inst: str
  3152. :param args: Optional command-line arguments
  3153. :type args: List
  3154. :param cmd_map: Optional dictionary of command line
  3155. options to configuration variables
  3156. :type cmd_map: Dictionary
  3157. :param launcher: Optional utility to invoke the launch
  3158. of the service. This option only takes
  3159. effect on ``Unix/Linux``. The option can
  3160. be a string or a list.Options may be passed
  3161. to the launcher, for example to start a
  3162. service through the valgrind utility
  3163. redirecting to a log file,launcher could be
  3164. set to e.g.
  3165. ``['valgrind', '--log-file=/tmp/vlgrd.out']``
  3166. or ``'valgrind --log-file=/tmp/vlgrd.out'``
  3167. """
  3168. if launcher is None and self.launcher is not None:
  3169. launcher = self.launcher
  3170. app = self._instance_to_cmd(inst)
  3171. if app is None:
  3172. return
  3173. _m = ['service: starting', app]
  3174. if args is not None:
  3175. _m += ['with args: ']
  3176. _m += args
  3177. as_script = False
  3178. wait_on = True
  3179. if launcher is not None:
  3180. if isinstance(launcher, str):
  3181. launcher = launcher.split()
  3182. if app == 'pbs_server':
  3183. # running the pbs server through valgrind requires a bit of
  3184. # a dance because the pbs_server binary is pbs_server.bin
  3185. # and to run it requires being able to find libraries, so
  3186. # LD_LIBRARY_PATH is set and pbs_server.bin is run as a
  3187. # script
  3188. pexec = inst.pbs_conf['PBS_EXEC']
  3189. ldlib = ['LD_LIBRARY_PATH=' +
  3190. os.path.join(pexec, 'lib') + ':' +
  3191. os.path.join(pexec, 'pgsql', 'lib')]
  3192. app = 'pbs_server.bin'
  3193. else:
  3194. ldlib = []
  3195. cmd = ldlib + launcher
  3196. as_script = True
  3197. wait_on = False
  3198. else:
  3199. cmd = []
  3200. cmd += [os.path.join(self.pbs_conf['PBS_EXEC'], 'sbin', app)]
  3201. if args is not None:
  3202. cmd += args
  3203. if not self.default_pbs_conf:
  3204. cmd = ['PBS_CONF_FILE=' + inst.pbs_conf_file] + cmd
  3205. as_script = True
  3206. if cmd_map is not None:
  3207. conf_cmd = self.du.map_pbs_conf_to_cmd(cmd_map,
  3208. pconf=self.pbs_conf)
  3209. cmd.extend(conf_cmd)
  3210. _m += conf_cmd
  3211. self.logger.info(" ".join(_m))
  3212. ret = self.du.run_cmd(self.hostname, cmd, sudo=True,
  3213. as_script=as_script, wait_on_script=wait_on,
  3214. level=logging.INFOCLI, logerr=False)
  3215. if ret['rc'] != 0:
  3216. raise PbsServiceError(rv=False, rc=ret['rc'], msg=ret['err'])
  3217. ret_msg = True
  3218. if ret['err']:
  3219. ret_msg = ret['err']
  3220. self._update_pid(inst)
  3221. if inst.pid is None:
  3222. raise PbsServiceError(rv=False, rc=-1, msg="Could not find PID")
  3223. return ret_msg
  3224. def _stop(self, sig='-TERM', inst=None):
  3225. if inst is None:
  3226. return True
  3227. self._signal(sig, inst)
  3228. pid = self._get_pid(inst)
  3229. chk_pid = self._all_instance_pids(inst)
  3230. if pid is None or chk_pid is None:
  3231. return True
  3232. num_seconds = 0
  3233. while (chk_pid is not None) and (str(pid) in chk_pid):
  3234. if num_seconds > 60:
  3235. m = (self.logprefix + 'could not stop service ' +
  3236. self._instance_to_servicename(inst))
  3237. raise PbsServiceError(rv=False, rc=-1, msg=m)
  3238. time.sleep(1)
  3239. num_seconds += 1
  3240. chk_pid = self._all_instance_pids(inst)
  3241. inst.pid = None
  3242. return True
  3243. def initialise_service(self):
  3244. """
  3245. Purpose of this method is to override and initialise
  3246. the service
  3247. """
  3248. def log_lines(self, logtype, id=None, n=50, tail=True, starttime=None,
  3249. endtime=None):
  3250. """
  3251. Return the last ``<n>`` lines of a PBS log file, which
  3252. can be one of ``server``, ``scheduler``, ``MoM``, or
  3253. ``tracejob``
  3254. :param logtype: The entity requested, an instance of a
  3255. Scheduler, Server or MoM object, or the
  3256. string 'tracejob' for tracejob
  3257. :type logtype: str or object
  3258. :param id: The id of the object to trace. Only used for
  3259. tracejob
  3260. :param n: One of 'ALL' of the number of lines to
  3261. process/display, defaults to 50.
  3262. :type n: str or int
  3263. :param tail: if True, parse log from the end to the start,
  3264. otherwise parse from the start to the end.
  3265. Defaults to True.
  3266. :type tail: bool
  3267. :param day: Optional day in ``YYYMMDD`` format. Defaults
  3268. to current day
  3269. :type day: int
  3270. :param starttime: date timestamp to start matching
  3271. :param endtime: date timestamp to end matching
  3272. :returns: Last ``<n>`` lines of logfile for ``Server``,
  3273. ``Scheduler``, ``MoM or tracejob``
  3274. """
  3275. logval = None
  3276. lines = []
  3277. sudo = False
  3278. if endtime is None:
  3279. endtime = int(time.time())
  3280. if starttime is None:
  3281. starttime = self.ctime
  3282. try:
  3283. if logtype == 'tracejob':
  3284. if id is None:
  3285. return None
  3286. cmd = [os.path.join(
  3287. self.pbs_conf['PBS_EXEC'],
  3288. 'bin',
  3289. 'tracejob')]
  3290. cmd += [str(id)]
  3291. lines = self.du.run_cmd(self.hostname, cmd)['out']
  3292. if n != 'ALL':
  3293. lines = lines[-n:]
  3294. else:
  3295. daystart = time.strftime("%Y%m%d", time.localtime(starttime))
  3296. dayend = time.strftime("%Y%m%d", time.localtime(endtime))
  3297. firstday_obj = datetime.datetime.strptime(daystart, '%Y%m%d')
  3298. lastday_obj = datetime.datetime.strptime(dayend, '%Y%m%d')
  3299. if logtype == 'accounting':
  3300. logdir = os.path.join(self.pbs_conf['PBS_HOME'],
  3301. 'server_priv', 'accounting')
  3302. sudo = True
  3303. elif (isinstance(self, Scheduler) and
  3304. 'sched_log' in self.attributes):
  3305. # if setup is multi-sched then get logdir from
  3306. # its attributes
  3307. logdir = self.attributes['sched_log']
  3308. else:
  3309. logval = self._instance_to_logpath(logtype)
  3310. if logval is None:
  3311. m = 'Invalid logtype'
  3312. raise PtlLogMatchError(rv=False, rc=-1, msg=m)
  3313. logdir = os.path.join(self.pbs_conf['PBS_HOME'], logval)
  3314. while firstday_obj <= lastday_obj:
  3315. day = firstday_obj.strftime("%Y%m%d")
  3316. filename = os.path.join(logdir, day)
  3317. if n == 'ALL':
  3318. if self._is_local and not sudo:
  3319. with open(filename) as f:
  3320. day_lines = f.readlines()
  3321. else:
  3322. day_lines = self.du.cat(
  3323. self.hostname, filename, sudo=sudo,
  3324. level=logging.DEBUG2)['out']
  3325. elif self._is_local and not sudo:
  3326. if tail:
  3327. futils = FileUtils(filename, FILE_TAIL)
  3328. else:
  3329. futils = FileUtils(filename)
  3330. day_lines = futils.next(n)
  3331. else:
  3332. if tail:
  3333. cmd = ['/usr/bin/tail']
  3334. else:
  3335. cmd = ['/usr/bin/head']
  3336. cmd += ['-n']
  3337. cmd += [str(n), filename]
  3338. day_lines = self.du.run_cmd(
  3339. self.hostname, cmd, sudo=sudo,
  3340. level=logging.DEBUG2)['out']
  3341. lines.extend(day_lines)
  3342. firstday_obj = firstday_obj + datetime.timedelta(days=1)
  3343. if n == 'ALL':
  3344. continue
  3345. n = n - len(day_lines)
  3346. if n <= 0:
  3347. break
  3348. except:
  3349. self.logger.error('error in log_lines ')
  3350. traceback.print_exc()
  3351. return None
  3352. return lines
  3353. def _log_match(self, logtype, msg, id=None, n=50, tail=True,
  3354. allmatch=False, regexp=False, max_attempts=None,
  3355. interval=None, starttime=None, endtime=None,
  3356. level=logging.INFO, existence=True):
  3357. """
  3358. Match given ``msg`` in given ``n`` lines of log file
  3359. :param logtype: The entity requested, an instance of a
  3360. Scheduler, Server, or MoM object, or the
  3361. strings 'tracejob' for tracejob or
  3362. 'accounting' for accounting logs.
  3363. :type logtype: object
  3364. :param msg: log message to match, can be regex also when
  3365. ``regexp`` is True
  3366. :type msg: str
  3367. :param id: The id of the object to trace. Only used for
  3368. tracejob
  3369. :type id: str
  3370. :param n: 'ALL' or the number of lines to search through,
  3371. defaults to 50
  3372. :type n: str or int
  3373. :param tail: If true (default), starts from the end of
  3374. the file
  3375. :type tail: bool
  3376. :param allmatch: If True all matching lines out of then
  3377. parsed are returned as a list. Defaults
  3378. to False
  3379. :type allmatch: bool
  3380. :param regexp: If true msg is a Python regular expression.
  3381. Defaults to False
  3382. :type regexp: bool
  3383. :param max_attempts: the number of attempts to make to find
  3384. a matching entry
  3385. :type max_attempts: int
  3386. :param interval: the interval between attempts
  3387. :type interval: int
  3388. :param starttime: If set ignore matches that occur before
  3389. specified time
  3390. :type starttime: int
  3391. :param endtime: If set ignore matches that occur after
  3392. specified time
  3393. :type endtime: int
  3394. :param level: The logging level, defaults to INFO
  3395. :type level: int
  3396. :param existence: If True (default), check for existence of
  3397. given msg, else check for non-existence of
  3398. given msg.
  3399. :type existence: bool
  3400. :return: (x,y) where x is the matching line
  3401. number and y the line itself. If allmatch is True,
  3402. a list of tuples is returned.
  3403. :rtype: tuple
  3404. :raises PtlLogMatchError:
  3405. When ``existence`` is True and given
  3406. ``msg`` is not found in ``n`` line
  3407. Or
  3408. When ``existence`` is False and given
  3409. ``msg`` found in ``n`` line.
  3410. .. note:: The matching line number is relative to the record
  3411. number, not the absolute line number in the file.
  3412. """
  3413. try:
  3414. from ptl.utils.pbs_logutils import PBSLogUtils
  3415. except:
  3416. _msg = 'error loading ptl.utils.pbs_logutils'
  3417. raise PtlLogMatchError(rc=1, rv=False, msg=_msg)
  3418. if self.logutils is None:
  3419. self.logutils = PBSLogUtils()
  3420. if max_attempts is None:
  3421. max_attempts = 60
  3422. if interval is None:
  3423. interval = 0.5
  3424. rv = (None, None)
  3425. attempt = 1
  3426. lines = None
  3427. name = self._instance_to_servicename(logtype)
  3428. infomsg = (name + ' ' + self.shortname +
  3429. ' log match: searching for "' + msg + '"')
  3430. if regexp:
  3431. infomsg += ' - using regular expression '
  3432. if allmatch:
  3433. infomsg += ' - on all matches '
  3434. attemptmsg = ' - No match'
  3435. while attempt <= max_attempts:
  3436. if attempt > 1:
  3437. attemptmsg = ' - attempt ' + str(attempt)
  3438. lines = self.log_lines(logtype, id, n=n, tail=tail,
  3439. starttime=starttime, endtime=endtime)
  3440. rv = self.logutils.match_msg(lines, msg, allmatch=allmatch,
  3441. regexp=regexp, starttime=starttime,
  3442. endtime=endtime)
  3443. if rv:
  3444. self.logger.log(level, infomsg + '... OK')
  3445. break
  3446. else:
  3447. if ((starttime is not None or endtime is not None) and
  3448. n != 'ALL'):
  3449. if attempt > max_attempts:
  3450. # We will do one last attempt to match in case the
  3451. # number of lines that were provided did not capture
  3452. # the start or end time of interest
  3453. max_attempts += 1
  3454. n = 'ALL'
  3455. self.logger.log(level, infomsg + attemptmsg)
  3456. attempt += 1
  3457. time.sleep(interval)
  3458. try:
  3459. # Depending on whether the hostname is local or remote and whether
  3460. # sudo privileges were required, lines returned by log_lines can be
  3461. # an open file descriptor, we close here but ignore errors in case
  3462. # any were raised for all irrelevant cases
  3463. lines.close()
  3464. except:
  3465. pass
  3466. if (rv is None and existence) or (rv is not None and not existence):
  3467. _msg = infomsg + attemptmsg
  3468. raise PtlLogMatchError(rc=1, rv=False, msg=_msg)
  3469. return rv
  3470. def accounting_match(self, msg, id=None, n=50, tail=True,
  3471. allmatch=False, regexp=False, max_attempts=None,
  3472. interval=None, starttime=None, endtime=None,
  3473. level=logging.INFO, existence=True):
  3474. """
  3475. Match given ``msg`` in given ``n`` lines of accounting log
  3476. :param msg: log message to match, can be regex also when
  3477. ``regexp`` is True
  3478. :type msg: str
  3479. :param id: The id of the object to trace. Only used for
  3480. tracejob
  3481. :type id: str
  3482. :param n: 'ALL' or the number of lines to search through,
  3483. defaults to 50
  3484. :type n: str or int
  3485. :param tail: If true (default), starts from the end of
  3486. the file
  3487. :type tail: bool
  3488. :param allmatch: If True all matching lines out of then
  3489. parsed are returned as a list. Defaults
  3490. to False
  3491. :type allmatch: bool
  3492. :param regexp: If true msg is a Python regular expression.
  3493. Defaults to False
  3494. :type regexp: bool
  3495. :param max_attempts: the number of attempts to make to find
  3496. a matching entry
  3497. :type max_attempts: int
  3498. :param interval: the interval between attempts
  3499. :type interval: int
  3500. :param starttime: If set ignore matches that occur before
  3501. specified time
  3502. :type starttime: int
  3503. :param endtime: If set ignore matches that occur after
  3504. specified time
  3505. :type endtime: int
  3506. :param level: The logging level, defaults to INFO
  3507. :type level: int
  3508. :param existence: If True (default), check for existence of
  3509. given msg, else check for non-existence of
  3510. given msg.
  3511. :type existence: bool
  3512. :return: (x,y) where x is the matching line
  3513. number and y the line itself. If allmatch is True,
  3514. a list of tuples is returned.
  3515. :rtype: tuple
  3516. :raises PtlLogMatchError:
  3517. When ``existence`` is True and given
  3518. ``msg`` is not found in ``n`` line
  3519. Or
  3520. When ``existence`` is False and given
  3521. ``msg`` found in ``n`` line.
  3522. .. note:: The matching line number is relative to the record
  3523. number, not the absolute line number in the file.
  3524. """
  3525. return self._log_match('accounting', msg, id, n, tail, allmatch,
  3526. regexp, max_attempts, interval, starttime,
  3527. endtime, level, existence)
  3528. def tracejob_match(self, msg, id=None, n=50, tail=True,
  3529. allmatch=False, regexp=False, max_attempts=None,
  3530. interval=None, starttime=None, endtime=None,
  3531. level=logging.INFO, existence=True):
  3532. """
  3533. Match given ``msg`` in given ``n`` lines of tracejob log
  3534. :param msg: log message to match, can be regex also when
  3535. ``regexp`` is True
  3536. :type msg: str
  3537. :param id: The id of the object to trace.
  3538. :type id: str
  3539. :param n: 'ALL' or the number of lines to search through,
  3540. defaults to 50
  3541. :type n: str or int
  3542. :param tail: If true (default), starts from the end of
  3543. the file
  3544. :type tail: bool
  3545. :param allmatch: If True all matching lines out of then
  3546. parsed are returned as a list. Defaults
  3547. to False
  3548. :type allmatch: bool
  3549. :param regexp: If true msg is a Python regular expression.
  3550. Defaults to False
  3551. :type regexp: bool
  3552. :param max_attempts: the number of attempts to make to find
  3553. a matching entry
  3554. :type max_attempts: int
  3555. :param interval: the interval between attempts
  3556. :type interval: int
  3557. :param starttime: If set ignore matches that occur before
  3558. specified time
  3559. :type starttime: int
  3560. :param endtime: If set ignore matches that occur after
  3561. specified time
  3562. :type endtime: int
  3563. :param level: The logging level, defaults to INFO
  3564. :type level: int
  3565. :param existence: If True (default), check for existence of
  3566. given msg, else check for non-existence of
  3567. given msg.
  3568. :type existence: bool
  3569. :return: (x,y) where x is the matching line
  3570. number and y the line itself. If allmatch is True,
  3571. a list of tuples is returned.
  3572. :rtype: tuple
  3573. :raises PtlLogMatchError:
  3574. When ``existence`` is True and given
  3575. ``msg`` is not found in ``n`` line
  3576. Or
  3577. When ``existence`` is False and given
  3578. ``msg`` found in ``n`` line.
  3579. .. note:: The matching line number is relative to the record
  3580. number, not the absolute line number in the file.
  3581. """
  3582. return self._log_match('tracejob', msg, id, n, tail, allmatch,
  3583. regexp, max_attempts, interval, starttime,
  3584. endtime, level, existence)
  3585. def _save_config_file(self, dict_conf, fname):
  3586. ret = self.du.cat(self.hostname, fname, sudo=True)
  3587. if ret['rc'] == 0:
  3588. dict_conf[fname] = ret['out']
  3589. else:
  3590. self.logger.error('error saving configuration ' + fname)
  3591. def _load_configuration(self, infile, objtype=None):
  3592. """
  3593. Load configuration as was saved in infile
  3594. :param infile: the file in which configuration
  3595. was saved
  3596. :type infile: str
  3597. :param objtype: the object type to load configuration
  3598. for, one of server, scheduler, mom or
  3599. if None, load all objects in infile
  3600. """
  3601. if os.path.isfile(infile):
  3602. conf = {}
  3603. f = open(infile, 'r')
  3604. # load all objects from the Pickled file
  3605. while True:
  3606. try:
  3607. conf = cPickle.load(f)
  3608. except:
  3609. break
  3610. f.close()
  3611. if objtype and objtype in conf:
  3612. conf = conf[objtype]
  3613. else:
  3614. # load all object types that could be in infile
  3615. newconf = {}
  3616. for ky in [MGR_OBJ_SERVER, MGR_OBJ_SCHED, MGR_OBJ_NODE]:
  3617. if ky not in conf:
  3618. conf[ky] = {}
  3619. newconf = dict(newconf.items() + conf[ky].items())
  3620. conf = newconf
  3621. for k, v in conf.items():
  3622. fn = self.du.create_temp_file()
  3623. with open(fn, 'w') as fd:
  3624. # handle server data saved as output of qmgr commands
  3625. # by piping data back into qmgr
  3626. if k.startswith('qmgr_'):
  3627. qmgr = os.path.join(self.client_conf['PBS_EXEC'],
  3628. 'bin', 'qmgr')
  3629. fd.write("\n".join(v))
  3630. self.du.run_cmd(self.hostname, [qmgr], cstdin=fd,
  3631. sudo=True)
  3632. else:
  3633. fd.write("\n".join(v))
  3634. # append the last line
  3635. fd.write("\n")
  3636. self.du.run_cmd(self.hostname, ['cp', fn, k],
  3637. sudo=True)
  3638. os.remove(fn)
  3639. return True
  3640. return False
  3641. def get_tempdir(self):
  3642. """
  3643. platform independent call to get a temporary directory
  3644. """
  3645. return self.du.get_tempdir(self.hostname)
  3646. def __str__(self):
  3647. return (self.__class__.__name__ + ' ' + self.hostname + ' config ' +
  3648. self.pbs_conf_file)
  3649. def __repr__(self):
  3650. return (self.__class__.__name__ + '/' + self.pbs_conf_file + '@' +
  3651. self.hostname)
  3652. class Comm(PBSService):
  3653. """
  3654. PBS ``Comm`` configuration and control
  3655. """
  3656. """
  3657. :param name: The hostname of the Comm. Defaults to current hostname.
  3658. :type name: str
  3659. :param attrs: Dictionary of attributes to set, these will override
  3660. defaults.
  3661. :type attrs: dictionary
  3662. :param pbsconf_file: path to config file to parse for PBS_HOME,
  3663. PBS_EXEC, etc
  3664. :type pbsconf_file: str or None
  3665. :param diagmap: A dictionary of PBS objects (node,server,etc) to
  3666. mapped files from PBS diag directory
  3667. :type diagmap: dictionary
  3668. :param diag: path to PBS diag directory (This will override diagmap)
  3669. :type diag: str or None
  3670. :param server: A PBS server instance to which this Comm is associated
  3671. :type server: str
  3672. :param db_access: set to either file containing credentials to DB access or
  3673. dictionary containing {'dbname':...,'user':...,
  3674. 'port':...}
  3675. :type db_access: str or dictionary
  3676. """
  3677. dflt_attributes = {}
  3678. def __init__(self, name=None, attrs={}, pbsconf_file=None, diagmap={},
  3679. diag=None, server=None, db_access=None):
  3680. self.logger = logging.getLogger(__name__)
  3681. if server is not None:
  3682. self.server = server
  3683. if diag is None and self.server.diag is not None:
  3684. diag = self.server.diag
  3685. if (len(diagmap) == 0) and (len(self.server.diagmap) != 0):
  3686. diagmap = self.server.diagmap
  3687. else:
  3688. self.server = Server(name, pbsconf_file=pbsconf_file,
  3689. db_access=db_access, diag=diag,
  3690. diagmap=diagmap)
  3691. PBSService.__init__(self, name, attrs, self.dflt_attributes,
  3692. pbsconf_file, diagmap, diag)
  3693. _m = ['Comm ', self.shortname]
  3694. if pbsconf_file is not None:
  3695. _m += ['@', pbsconf_file]
  3696. _m += [': ']
  3697. self.logprefix = "".join(_m)
  3698. self.conf_to_cmd_map = {
  3699. 'PBS_COMM_ROUTERS': '-r',
  3700. 'PBS_COMM_THREADS': '-t'
  3701. }
  3702. self.pi = PBSInitServices(hostname=self.hostname,
  3703. conf=self.pbs_conf_file)
  3704. def isUp(self):
  3705. """
  3706. Check for comm up
  3707. """
  3708. return super(Comm, self)._isUp(self)
  3709. def signal(self, sig):
  3710. """
  3711. Send signal to comm
  3712. """
  3713. self.logger.info(self.logprefix + 'sent signal ' + sig)
  3714. return super(Comm, self)._signal(sig, inst=self)
  3715. def get_pid(self):
  3716. """
  3717. Get the comm pid
  3718. """
  3719. return super(Comm, self)._get_pid(inst=self)
  3720. def all_instance_pids(self):
  3721. """
  3722. Get all pids of given instance
  3723. """
  3724. return super(Comm, self)._all_instance_pids(inst=self)
  3725. def start(self, args=None, launcher=None):
  3726. """
  3727. Start the comm
  3728. :param args: Argument required to start the comm
  3729. :type args: str
  3730. :param launcher: Optional utility to invoke the launch of the service
  3731. :type launcher: str or list
  3732. """
  3733. if args is not None or launcher is not None:
  3734. return super(Comm, self)._start(inst=self, args=args,
  3735. cmd_map=self.conf_to_cmd_map,
  3736. launcher=launcher)
  3737. else:
  3738. try:
  3739. rv = self.pi.start_comm()
  3740. self._update_pid(self)
  3741. except PbsInitServicesError as e:
  3742. raise PbsServiceError(rc=e.rc, rv=e.rv, msg=e.msg)
  3743. return rv
  3744. def stop(self, sig=None):
  3745. """
  3746. Stop the comm.
  3747. :param sig: Signal to stop the comm
  3748. :type sig: str
  3749. """
  3750. if sig is not None:
  3751. self.logger.info(self.logprefix + 'stopping Comm on host ' +
  3752. self.hostname)
  3753. return super(Comm, self)._stop(sig, inst=self)
  3754. else:
  3755. try:
  3756. self.pi.stop_comm()
  3757. self.pid = None
  3758. except PbsInitServicesError as e:
  3759. raise PbsServiceError(rc=e.rc, rv=e.rv, msg=e.msg)
  3760. return True
  3761. def restart(self):
  3762. """
  3763. Restart the comm.
  3764. """
  3765. if self.isUp():
  3766. if not self.stop():
  3767. return False
  3768. return self.start()
  3769. def log_match(self, msg=None, id=None, n=50, tail=True, allmatch=False,
  3770. regexp=False, max_attempts=None, interval=None,
  3771. starttime=None, endtime=None, level=logging.INFO,
  3772. existence=True):
  3773. """
  3774. Match given ``msg`` in given ``n`` lines of Comm log
  3775. :param msg: log message to match, can be regex also when
  3776. ``regexp`` is True
  3777. :type msg: str
  3778. :param id: The id of the object to trace. Only used for
  3779. tracejob
  3780. :type id: str
  3781. :param n: 'ALL' or the number of lines to search through,
  3782. defaults to 50
  3783. :type n: str or int
  3784. :param tail: If true (default), starts from the end of
  3785. the file
  3786. :type tail: bool
  3787. :param allmatch: If True all matching lines out of then
  3788. parsed are returned as a list. Defaults
  3789. to False
  3790. :type allmatch: bool
  3791. :param regexp: If true msg is a Python regular expression.
  3792. Defaults to False
  3793. :type regexp: bool
  3794. :param max_attempts: the number of attempts to make to find
  3795. a matching entry
  3796. :type max_attempts: int
  3797. :param interval: the interval between attempts
  3798. :type interval: int
  3799. :param starttime: If set ignore matches that occur before
  3800. specified time
  3801. :type starttime: int
  3802. :param endtime: If set ignore matches that occur after
  3803. specified time
  3804. :type endtime: int
  3805. :param level: The logging level, defaults to INFO
  3806. :type level: int
  3807. :param existence: If True (default), check for existence of
  3808. given msg, else check for non-existence of
  3809. given msg.
  3810. :type existence: bool
  3811. :return: (x,y) where x is the matching line
  3812. number and y the line itself. If allmatch is True,
  3813. a list of tuples is returned.
  3814. :rtype: tuple
  3815. :raises PtlLogMatchError:
  3816. When ``existence`` is True and given
  3817. ``msg`` is not found in ``n`` line
  3818. Or
  3819. When ``existence`` is False and given
  3820. ``msg`` found in ``n`` line.
  3821. .. note:: The matching line number is relative to the record
  3822. number, not the absolute line number in the file.
  3823. """
  3824. return self._log_match(self, msg, id, n, tail, allmatch, regexp,
  3825. max_attempts, interval, starttime, endtime,
  3826. level=level, existence=existence)
  3827. class Server(PBSService):
  3828. """
  3829. PBS server ``configuration`` and ``control``
  3830. The Server class is a container to PBS server attributes
  3831. and implements wrappers to the ``IFL API`` to perform
  3832. operations on the server. For example to submit, status,
  3833. delete, manage, etc... jobs, reservations and configurations.
  3834. This class also offers higher-level routines to ease testing,
  3835. see functions, for ``example: revert_to_defaults,
  3836. init_logging, expect, counter.``
  3837. The ptl_conf dictionary holds general configuration for the
  3838. framework's operations, specifically, one can control:
  3839. mode: set to ``PTL_CLI`` to operate in ``CLI`` mode or
  3840. ``PTL_API`` to operate in ``API`` mode
  3841. expect_max_attempts: the default maximum number of attempts
  3842. to be used\ by expect. Defaults to 60
  3843. expect_interval: the default time interval (in seconds)
  3844. between expect\ requests. Defaults to 0.5
  3845. update_attributes: the default on whether Object attributes
  3846. should be\ updated using a list of dictionaries. Defaults
  3847. to True
  3848. :param name: The hostname of the server. Defaults to
  3849. calling pbs_default()
  3850. :type name: str
  3851. :param attrs: Dictionary of attributes to set, these will
  3852. override defaults.
  3853. :type attrs: Dictionary
  3854. :param defaults: Dictionary of default attributes.
  3855. Default: dflt_attributes
  3856. :type defaults: Dictionary
  3857. :param pbsconf_file: path to config file to parse for PBS_HOME,
  3858. PBS_EXEC, etc
  3859. :type pbsconf_file: str
  3860. :param diagmap: A dictionary of PBS objects (node,server,etc)
  3861. to mapped files from PBS diag directory
  3862. :type diagmap: Dictionary
  3863. :param diag: path to PBS diag directory (This will overrides
  3864. diagmap)
  3865. :type diag: str
  3866. :param client: The host to use as client for CLI queries.
  3867. Defaults to the local hostname.
  3868. :type client: str
  3869. :param client_pbsconf_file: The path to a custom PBS_CONF_FILE
  3870. on the client host. Defaults to
  3871. the same path as pbsconf_file.
  3872. :type client_pbsconf_file: str
  3873. :param db_acccess: set to either file containing credentials
  3874. to DB access or dictionary containing
  3875. {'dbname':...,'user':...,'port':...}
  3876. :param stat: if True, stat the server attributes
  3877. :type stat: bool
  3878. """
  3879. logger = logging.getLogger(__name__)
  3880. dflt_attributes = {
  3881. ATTR_scheduling: "True",
  3882. ATTR_dfltque: "workq",
  3883. ATTR_logevents: "511",
  3884. ATTR_mailfrom: "adm",
  3885. ATTR_queryother: "True",
  3886. ATTR_rescdflt + ".ncpus": "1",
  3887. ATTR_DefaultChunk + ".ncpus": "1",
  3888. ATTR_schedit: "600",
  3889. ATTR_ResvEnable: "True",
  3890. ATTR_nodefailrq: "310",
  3891. ATTR_maxarraysize: "10000",
  3892. ATTR_license_linger: "3600",
  3893. ATTR_EligibleTimeEnable: "False",
  3894. ATTR_max_concurrent_prov: "5",
  3895. ATTR_FlatUID: 'True',
  3896. }
  3897. dflt_sched_name = 'default'
  3898. ptl_conf = {
  3899. 'mode': PTL_API,
  3900. 'expect_max_attempts': 60,
  3901. 'expect_interval': 0.5,
  3902. 'update_attributes': True,
  3903. }
  3904. # this pattern is a bit relaxed to match common developer build numbers
  3905. version_tag = re.compile("[a-zA-Z_]*(?P<version>[\d\.]+.[\w\d\.]*)[\s]*")
  3906. actions = ExpectActions()
  3907. def __init__(self, name=None, attrs={}, defaults={}, pbsconf_file=None,
  3908. diagmap={}, diag=None, client=None, client_pbsconf_file=None,
  3909. db_access=None, stat=True):
  3910. self.jobs = {}
  3911. self.nodes = {}
  3912. self.reservations = {}
  3913. self.queues = {}
  3914. self.resources = {}
  3915. self.hooks = {}
  3916. self.pbshooks = {}
  3917. self.entities = {}
  3918. self.schedulers = {}
  3919. self.version = None
  3920. self.default_queue = None
  3921. self.last_error = [] # type: array. Set for CLI IFL errors. Not reset
  3922. self.last_out = [] # type: array. Set for CLI IFL output. Not reset
  3923. self.last_rc = None # Set for CLI IFL return code. Not thread-safe
  3924. self.moms = {}
  3925. # default timeout on connect/disconnect set to 60s to mimick the qsub
  3926. # buffer introduced in PBS 11
  3927. self._conn_timeout = 60
  3928. self._conn_timer = None
  3929. self._conn = None
  3930. self._db_conn = None
  3931. self.current_user = pwd.getpwuid(os.getuid())[0]
  3932. if len(defaults.keys()) == 0:
  3933. defaults = self.dflt_attributes
  3934. self.pexpect_timeout = 15
  3935. self.pexpect_sleep_time = .1
  3936. PBSService.__init__(self, name, attrs, defaults, pbsconf_file, diagmap,
  3937. diag)
  3938. _m = ['server ', self.shortname]
  3939. if pbsconf_file is not None:
  3940. _m += ['@', pbsconf_file]
  3941. _m += [': ']
  3942. self.logprefix = "".join(_m)
  3943. self.pi = PBSInitServices(hostname=self.hostname,
  3944. conf=self.pbs_conf_file)
  3945. self.set_client(client)
  3946. if client_pbsconf_file is None:
  3947. self.client_pbs_conf_file = self.du.get_pbs_conf_file(self.client)
  3948. else:
  3949. self.client_pbs_conf_file = client_pbsconf_file
  3950. self.client_conf = self.du.parse_pbs_config(
  3951. self.client, file=self.client_pbs_conf_file)
  3952. if self.client_pbs_conf_file == '/etc/pbs.conf':
  3953. self.default_client_pbs_conf = True
  3954. elif (('PBS_CONF_FILE' not in os.environ) or
  3955. (os.environ['PBS_CONF_FILE'] != self.client_pbs_conf_file)):
  3956. self.default_client_pbs_conf = False
  3957. else:
  3958. self.default_client_pbs_conf = True
  3959. a = {}
  3960. if os.getuid() == 0:
  3961. a = {ATTR_aclroot: 'root'}
  3962. self.dflt_attributes.update(a)
  3963. if not API_OK:
  3964. # mode must be set before the first stat call
  3965. self.set_op_mode(PTL_CLI)
  3966. if stat:
  3967. try:
  3968. tmp_attrs = self.status(SERVER, level=logging.DEBUG,
  3969. db_access=db_access)
  3970. except (PbsConnectError, PbsStatusError):
  3971. tmp_attrs = None
  3972. if tmp_attrs is not None and len(tmp_attrs) > 0:
  3973. self.attributes = tmp_attrs[0]
  3974. if ATTR_dfltque in self.attributes:
  3975. self.default_queue = self.attributes[ATTR_dfltque]
  3976. self.update_version_info()
  3977. def update_version_info(self):
  3978. """
  3979. Update the version information.
  3980. """
  3981. if ATTR_version not in self.attributes:
  3982. self.attributes[ATTR_version] = 'unknown'
  3983. else:
  3984. m = self.version_tag.match(self.attributes[ATTR_version])
  3985. if m:
  3986. v = m.group('version')
  3987. self.version = LooseVersion(v)
  3988. self.logger.info(self.logprefix + 'version ' +
  3989. self.attributes[ATTR_version])
  3990. @classmethod
  3991. def set_update_attributes(cls, val):
  3992. """
  3993. Set update attributes
  3994. """
  3995. cls.logger.info('setting update attributes ' + str(val))
  3996. if val == 1 or val[0] in ('t', 'T'):
  3997. val = True
  3998. else:
  3999. val = False
  4000. cls.ptl_conf['update_attributes'] = val
  4001. @classmethod
  4002. def set_expect_max_attempts(cls, val):
  4003. """
  4004. Set expect max attempts
  4005. """
  4006. cls.logger.info('setting expect max attempts ' + str(val))
  4007. cls.ptl_conf['expect_max_attempts'] = int(val)
  4008. @classmethod
  4009. def set_expect_interval(cls, val):
  4010. """
  4011. Set expect interval
  4012. """
  4013. cls.logger.info('setting expect interval ' + str(val))
  4014. cls.ptl_conf['expect_interval'] = float(val)
  4015. def set_client(self, name=None):
  4016. """
  4017. Set server client
  4018. :param name: Client name
  4019. :type name: str
  4020. """
  4021. if name is None:
  4022. self.client = socket.gethostname()
  4023. else:
  4024. self.client = name
  4025. def _connect(self, hostname, attempt=1):
  4026. if ((self._conn is None or self._conn < 0) or
  4027. (self._conn_timeout == 0 or self._conn_timer is None)):
  4028. self._conn = pbs_connect(hostname)
  4029. self._conn_timer = time.time()
  4030. if self._conn is None or self._conn < 0:
  4031. if attempt > 5:
  4032. m = self.logprefix + 'unable to connect'
  4033. raise PbsConnectError(rv=None, rc=-1, msg=m)
  4034. else:
  4035. self._disconnect(self._conn, force=True)
  4036. time.sleep(1)
  4037. return self._connect(hostname, attempt + 1)
  4038. return self._conn
  4039. def _disconnect(self, conn, force=False):
  4040. """
  4041. disconnect a connection to a Server.
  4042. For performance of the API calls, a connection is
  4043. maintained up to _conn_timer, unless the force parameter
  4044. is set to True
  4045. :param conn: Server connection
  4046. :param force: If true then diconnect forcefully
  4047. :type force: bool
  4048. """
  4049. if ((conn is not None and conn >= 0) and
  4050. (force or
  4051. (self._conn_timeout == 0 or
  4052. (self._conn_timer is not None and
  4053. (time.time() - self._conn_timer > self._conn_timeout))))):
  4054. pbs_disconnect(conn)
  4055. self._conn_timer = None
  4056. self._conn = None
  4057. def set_connect_timeout(self, timeout=0):
  4058. """
  4059. Set server connection timeout
  4060. :param timeout: Timeout value
  4061. :type timeout: int
  4062. """
  4063. self._conn_timeout = timeout
  4064. def get_op_mode(self):
  4065. """
  4066. Returns operating mode for calls to the PBS server.
  4067. Currently, two modes are supported, either the ``API``
  4068. or the ``CLI``. Default is ``API``
  4069. """
  4070. if (not API_OK or (self.ptl_conf['mode'] == PTL_CLI)):
  4071. return PTL_CLI
  4072. return PTL_API
  4073. def set_op_mode(self, mode):
  4074. """
  4075. set operating mode to one of either ``PTL_CLI`` or
  4076. ``PTL_API``.Returns the mode that was set which can
  4077. be different from the value requested, for example, if
  4078. requesting to set ``PTL_API``, in the absence of the
  4079. appropriate SWIG wrappers, the library will fall back to
  4080. ``CLI``, or if requesting ``PTL_CLI`` and there is no
  4081. ``PBS_EXEC`` on the system, None is returned.
  4082. :param mode: Operating mode
  4083. :type mode: str
  4084. """
  4085. if mode == PTL_API:
  4086. if self._conn is not None or self._conn < 0:
  4087. self._conn = None
  4088. if not API_OK:
  4089. self.logger.error(self.logprefix +
  4090. 'API submission is not available')
  4091. return PTL_CLI
  4092. elif mode == PTL_CLI:
  4093. if ((not self.has_diag) and
  4094. not os.path.isdir(os.path.join(self.client_conf['PBS_EXEC'],
  4095. 'bin'))):
  4096. self.logger.error(self.logprefix +
  4097. 'PBS commands are not available')
  4098. return None
  4099. else:
  4100. self.logger.error(self.logprefix + "Unrecognized operating mode")
  4101. return None
  4102. self.ptl_conf['mode'] = mode
  4103. self.logger.info(self.logprefix + 'server operating mode set to ' +
  4104. mode)
  4105. return mode
  4106. def add_expect_action(self, name=None, action=None):
  4107. """
  4108. Add an action handler to expect. Expect Actions are
  4109. custom handlers that are triggered when an unexpected
  4110. value is encountered
  4111. :param name: Action name
  4112. :type name: str or None
  4113. :param action: Action to add
  4114. """
  4115. if name is None and action.name is None:
  4116. return
  4117. if name is None and action.name is not None:
  4118. name = action.name
  4119. if not self.actions.has_action(name):
  4120. self.actions.add_action(action, self.shortname)
  4121. def set_attributes(self, a={}):
  4122. """
  4123. set server attributes
  4124. :param a: Attribute dictionary
  4125. :type a: Dictionary
  4126. """
  4127. super(Server, self).set_attributes(a)
  4128. self.__dict__.update(a)
  4129. def isUp(self):
  4130. """
  4131. returns ``True`` if server is up and ``False`` otherwise
  4132. """
  4133. if self.has_diag:
  4134. return True
  4135. i = 0
  4136. op_mode = self.get_op_mode()
  4137. if ((op_mode == PTL_API) and (self._conn is not None)):
  4138. self._disconnect(self._conn, force=True)
  4139. while i < 20:
  4140. rv = False
  4141. try:
  4142. if op_mode == PTL_CLI:
  4143. self.status(SERVER, level=logging.DEBUG, logerr=False)
  4144. else:
  4145. c = self._connect(self.hostname)
  4146. self._disconnect(c, force=True)
  4147. return True
  4148. except (PbsConnectError, PbsStatusError):
  4149. # if the status/connect operation fails then there might be
  4150. # chances that server process is running but not responsive
  4151. # so we wait until the server is reported operational.
  4152. rv = self._isUp(self)
  4153. # We really mean to check != False rather than just "rv"
  4154. if str(rv) != 'False':
  4155. self.logger.warning('Server process started' +
  4156. 'but not up yet')
  4157. time.sleep(1)
  4158. i += 1
  4159. else:
  4160. # status/connect failed + no server process means
  4161. # server is actually down
  4162. return False
  4163. return False
  4164. def signal(self, sig):
  4165. """
  4166. Send signal to server
  4167. :param sig: Signal to send
  4168. :type sig: str
  4169. """
  4170. self.logger.info('server ' + self.shortname + ': sent signal ' + sig)
  4171. return super(Server, self)._signal(sig, inst=self)
  4172. def get_pid(self):
  4173. """
  4174. Get the server pid
  4175. """
  4176. return super(Server, self)._get_pid(inst=self)
  4177. def all_instance_pids(self):
  4178. """
  4179. Get all pids for a given instance
  4180. """
  4181. return super(Server, self)._all_instance_pids(inst=self)
  4182. def start(self, args=None, launcher=None):
  4183. """
  4184. Start the PBS server
  4185. :param args: Argument required to start the server
  4186. :type args: str
  4187. :param launcher: Optional utility to invoke the launch of the service
  4188. :type launcher: str or list
  4189. """
  4190. if args is not None or launcher is not None:
  4191. rv = super(Server, self)._start(inst=self, args=args,
  4192. launcher=launcher)
  4193. else:
  4194. try:
  4195. rv = self.pi.start_server()
  4196. self._update_pid(self)
  4197. except PbsInitServicesError as e:
  4198. raise PbsServiceError(rc=e.rc, rv=e.rv, msg=e.msg)
  4199. if self.isUp():
  4200. return rv
  4201. else:
  4202. raise PbsServiceError(rv=False, rc=1, msg=rv['err'])
  4203. def stop(self, sig=None):
  4204. """
  4205. Stop the PBS server
  4206. :param sig: Signal to stop PBS server
  4207. :type sig: str
  4208. """
  4209. if sig is not None:
  4210. self.logger.info(self.logprefix + 'stopping Server on host ' +
  4211. self.hostname)
  4212. rc = super(Server, self)._stop(sig, inst=self)
  4213. else:
  4214. try:
  4215. self.pi.stop_server()
  4216. self.pid = None
  4217. except PbsInitServicesError as e:
  4218. raise PbsServiceError(rc=e.rc, rv=e.rv, msg=e.msg,
  4219. post=self._disconnect, conn=self._conn,
  4220. force=True)
  4221. rc = True
  4222. self._disconnect(self._conn, force=True)
  4223. return rc
  4224. def restart(self):
  4225. """
  4226. Terminate and start a PBS server.
  4227. """
  4228. if self.isUp():
  4229. if not self.stop():
  4230. return False
  4231. return self.start()
  4232. def log_match(self, msg=None, id=None, n=50, tail=True, allmatch=False,
  4233. regexp=False, max_attempts=None, interval=None,
  4234. starttime=None, endtime=None, level=logging.INFO,
  4235. existence=True):
  4236. """
  4237. Match given ``msg`` in given ``n`` lines of Server log
  4238. :param msg: log message to match, can be regex also when
  4239. ``regexp`` is True
  4240. :type msg: str
  4241. :param id: The id of the object to trace. Only used for
  4242. tracejob
  4243. :type id: str
  4244. :param n: 'ALL' or the number of lines to search through,
  4245. defaults to 50
  4246. :type n: str or int
  4247. :param tail: If true (default), starts from the end of
  4248. the file
  4249. :type tail: bool
  4250. :param allmatch: If True all matching lines out of then
  4251. parsed are returned as a list. Defaults
  4252. to False
  4253. :type allmatch: bool
  4254. :param regexp: If true msg is a Python regular expression.
  4255. Defaults to False
  4256. :type regexp: bool
  4257. :param max_attempts: the number of attempts to make to find
  4258. a matching entry
  4259. :type max_attempts: int
  4260. :param interval: the interval between attempts
  4261. :type interval: int
  4262. :param starttime: If set ignore matches that occur before
  4263. specified time
  4264. :type starttime: int
  4265. :param endtime: If set ignore matches that occur after
  4266. specified time
  4267. :type endtime: int
  4268. :param level: The logging level, defaults to INFO
  4269. :type level: int
  4270. :param existence: If True (default), check for existence of
  4271. given msg, else check for non-existence of
  4272. given msg.
  4273. :type existence: bool
  4274. :return: (x,y) where x is the matching line
  4275. number and y the line itself. If allmatch is True,
  4276. a list of tuples is returned.
  4277. :rtype: tuple
  4278. :raises PtlLogMatchError:
  4279. When ``existence`` is True and given
  4280. ``msg`` is not found in ``n`` line
  4281. Or
  4282. When ``existence`` is False and given
  4283. ``msg`` found in ``n`` line.
  4284. .. note:: The matching line number is relative to the record
  4285. number, not the absolute line number in the file.
  4286. """
  4287. return self._log_match(self, msg, id, n, tail, allmatch, regexp,
  4288. max_attempts, interval, starttime, endtime,
  4289. level=level, existence=existence)
  4290. def revert_to_defaults(self, reverthooks=True, revertqueues=True,
  4291. revertresources=True, delhooks=True,
  4292. delqueues=True, delscheds=True, server_stat=None):
  4293. """
  4294. reset server attributes back to out of box defaults.
  4295. :param reverthooks: If True disable all hooks. Defaults
  4296. to True
  4297. :type reverthooks: bool
  4298. :param revertqueues: If True disable all non-default
  4299. queues. Defaults to True
  4300. :type revertqueues: bool
  4301. :param revertresources: If True, resourcedef file is
  4302. removed. Defaults to True.
  4303. Reverting resources causes a server
  4304. restart to occur.
  4305. :type revertresources: bool
  4306. :param delhooks: If True, hooks are deleted, if deletion
  4307. fails, fall back to reverting hooks. Defaults
  4308. to True.
  4309. :type delhooks: bool
  4310. :param delqueues: If True, all non-default queues are deleted,
  4311. will attempt to delete all jobs first, if it
  4312. fails, revertqueues will be honored,
  4313. otherwise,revertqueues is ignored. Defaults
  4314. to True
  4315. :type delqueues: bool
  4316. :param delscheds: If True all non-default schedulers are deleted
  4317. The sched_priv and sched_logs directories will be
  4318. deleted.
  4319. :type delscheds: bool
  4320. :returns: True upon success and False if an error is
  4321. encountered.
  4322. :raises: PbsStatusError or PbsManagerError
  4323. """
  4324. ignore_attrs = ['id', 'pbs_license', ATTR_NODE_ProvisionEnable]
  4325. ignore_attrs += [ATTR_status, ATTR_total, ATTR_count]
  4326. ignore_attrs += [ATTR_rescassn, ATTR_FLicenses, ATTR_SvrHost]
  4327. ignore_attrs += [ATTR_license_count, ATTR_version, ATTR_managers]
  4328. ignore_attrs += [ATTR_pbs_license_info, ATTR_power_provisioning]
  4329. unsetlist = []
  4330. setdict = {}
  4331. self.logger.info(self.logprefix +
  4332. 'reverting configuration to defaults')
  4333. self.cleanup_jobs_and_reservations()
  4334. self.mpp_hook = os.path.join(self.pbs_conf['PBS_HOME'],
  4335. 'server_priv', 'hooks',
  4336. 'PBS_translate_mpp.HK')
  4337. self.dflt_mpp_hook = os.path.join(self.pbs_conf['PBS_EXEC'],
  4338. 'lib', 'python', 'altair',
  4339. 'pbs_hooks',
  4340. 'PBS_translate_mpp.HK')
  4341. if server_stat is None:
  4342. server_stat = self.status(SERVER, level=logging.DEBUG)[0]
  4343. for k in server_stat.keys():
  4344. if (k in ignore_attrs) or (k in self.dflt_attributes.keys()):
  4345. continue
  4346. elif (('.' in k) and (k.split('.')[0] in ignore_attrs)):
  4347. continue
  4348. else:
  4349. unsetlist.append(k)
  4350. if len(unsetlist) != 0:
  4351. self.manager(MGR_CMD_UNSET, MGR_OBJ_SERVER, unsetlist)
  4352. for k in self.dflt_attributes.keys():
  4353. if(k not in self.attributes or
  4354. self.attributes[k] != self.dflt_attributes[k]):
  4355. setdict[k] = self.dflt_attributes[k]
  4356. if self.platform == 'cray' or self.platform == 'craysim':
  4357. setdict[ATTR_restrict_res_to_release_on_suspend] = 'ncpus'
  4358. if delhooks:
  4359. if self.platform == 'cray' or self.platform == 'craysim':
  4360. reverthooks = True
  4361. else:
  4362. reverthooks = False
  4363. hooks = self.status(HOOK, level=logging.DEBUG)
  4364. hooks = [h['id'] for h in hooks]
  4365. if len(hooks) > 0:
  4366. self.manager(MGR_CMD_DELETE, HOOK, id=hooks, expect=True)
  4367. if delqueues:
  4368. revertqueues = False
  4369. queues = self.status(QUEUE, level=logging.DEBUG)
  4370. queues = [q['id'] for q in queues]
  4371. if len(queues) > 0:
  4372. try:
  4373. nodes = self.status(VNODE, logerr=False)
  4374. for node in nodes:
  4375. if 'queue' in node.keys():
  4376. self.manager(MGR_CMD_UNSET, NODE, 'queue',
  4377. node['id'])
  4378. except:
  4379. pass
  4380. self.manager(MGR_CMD_DELETE, QUEUE, id=queues, expect=True)
  4381. a = {ATTR_qtype: 'Execution',
  4382. ATTR_enable: 'True',
  4383. ATTR_start: 'True'}
  4384. self.manager(MGR_CMD_CREATE, QUEUE, a, id='workq', expect=True)
  4385. setdict.update({ATTR_dfltque: 'workq'})
  4386. if delscheds:
  4387. self.manager(MGR_CMD_LIST, SCHED)
  4388. for name in self.schedulers.keys():
  4389. if name != 'default':
  4390. self.schedulers[name].terminate()
  4391. sched_log = self.schedulers[
  4392. name].attributes['sched_log']
  4393. sched_priv = self.schedulers[
  4394. name].attributes['sched_priv']
  4395. self.du.rm(path=sched_log, sudo=True,
  4396. recursive=True, force=True)
  4397. self.du.rm(path=sched_priv, sudo=True,
  4398. recursive=True, force=True)
  4399. self.manager(MGR_CMD_DELETE, SCHED, id=name)
  4400. if reverthooks:
  4401. if self.platform == 'cray' or self.platform == 'craysim':
  4402. if self.du.cmp(self.hostname, self.dflt_mpp_hook,
  4403. self.mpp_hook, sudo=True) != 0:
  4404. self.du.run_copy(self.hostname, self.dflt_mpp_hook,
  4405. self.mpp_hook, mode=0644, sudo=True)
  4406. self.signal('-HUP')
  4407. hooks = self.status(HOOK, level=logging.DEBUG)
  4408. hooks = [h['id'] for h in hooks]
  4409. a = {ATTR_enable: 'false'}
  4410. if len(hooks) > 0:
  4411. self.manager(MGR_CMD_SET, MGR_OBJ_HOOK, a, hooks,
  4412. expect=True)
  4413. if revertqueues:
  4414. self.status(QUEUE, level=logging.DEBUG)
  4415. queues = []
  4416. for (qname, qobj) in self.queues.items():
  4417. # skip reservation queues. This syntax for Python 2.4
  4418. # compatibility
  4419. if (qname.startswith('R') or qname.startswith('S') or
  4420. qname == server_stat[ATTR_dfltque]):
  4421. continue
  4422. qobj.revert_to_defaults()
  4423. queues.append(qname)
  4424. a = {ATTR_enable: 'false'}
  4425. self.manager(MGR_CMD_SET, QUEUE, a, id=queues, expect=True)
  4426. a = {ATTR_enable: 'True', ATTR_start: 'True'}
  4427. self.manager(MGR_CMD_SET, MGR_OBJ_QUEUE, a,
  4428. id=server_stat[ATTR_dfltque], expect=True)
  4429. if len(setdict) > 0:
  4430. self.manager(MGR_CMD_SET, MGR_OBJ_SERVER, setdict)
  4431. if revertresources:
  4432. try:
  4433. rescs = self.status(RSC)
  4434. rescs = [r['id'] for r in rescs]
  4435. except:
  4436. rescs = []
  4437. if len(rescs) > 0:
  4438. self.manager(MGR_CMD_DELETE, RSC, id=rescs, expect=True)
  4439. return True
  4440. def save_configuration(self, outfile, mode='a'):
  4441. """
  4442. Save a server configuration, this includes:
  4443. - ``server_priv/resourcedef``
  4444. - ``qmgr -c "print server"``
  4445. - ``qmgr -c "print sched"``
  4446. - ``qmgr -c "print hook"``
  4447. :param outfile: the output file to which onfiguration is
  4448. saved
  4449. :type outfile: str
  4450. :param mode: The mode in which to open outfile to save
  4451. configuration. The first object being saved
  4452. should open this file with 'w' and subsequent
  4453. calls from other objects should save with
  4454. mode 'a' or 'a+'. Defaults to a+
  4455. :type mode: str
  4456. :returns: True on success, False on error
  4457. """
  4458. conf = {}
  4459. sconf = {MGR_OBJ_SERVER: conf}
  4460. rd = os.path.join(self.pbs_conf['PBS_HOME'], 'server_priv',
  4461. 'resourcedef')
  4462. self._save_config_file(conf, rd)
  4463. qmgr = os.path.join(self.client_conf['PBS_EXEC'], 'bin', 'qmgr')
  4464. ret = self.du.run_cmd(self.client, [qmgr, '-c', 'p s'], sudo=True)
  4465. if ret['rc'] != 0:
  4466. return False
  4467. else:
  4468. conf['qmgr_print_server'] = ret['out']
  4469. ret = self.du.run_cmd(self.hostname, [qmgr, '-c', 'p sched'],
  4470. sudo=True)
  4471. if ret['rc'] != 0:
  4472. return False
  4473. else:
  4474. conf['qmgr_print_sched'] = ret['out']
  4475. ret = self.du.run_cmd(self.hostname, [qmgr, '-c', 'p h'], sudo=True)
  4476. if ret['rc'] != 0:
  4477. return False
  4478. else:
  4479. conf['qmgr_print_hook'] = ret['out']
  4480. try:
  4481. f = open(outfile, mode)
  4482. cPickle.dump(sconf, f)
  4483. f.close()
  4484. except:
  4485. self.logger.error('Error processing file ' + outfile)
  4486. return False
  4487. return True
  4488. def load_configuration(self, infile):
  4489. """
  4490. load configuration from saved file ``infile``
  4491. """
  4492. self.revert_to_defaults()
  4493. self._load_configuration(infile, MGR_OBJ_SERVER)
  4494. def get_hostname(self):
  4495. """
  4496. return the default server hostname
  4497. """
  4498. if self.get_op_mode() == PTL_CLI:
  4499. return self.hostname
  4500. return pbs_default()
  4501. def _db_connect(self, db_access=None):
  4502. if self._db_conn is None:
  4503. if 'user' not in db_access or\
  4504. 'password' not in db_access:
  4505. self.logger.error('missing credentials to access DB')
  4506. return None
  4507. if 'dbname' not in db_access:
  4508. db_access['dbname'] = 'pbs_datastore'
  4509. if 'port' not in db_access:
  4510. db_access['port'] = '15007'
  4511. if 'host' not in db_access:
  4512. db_access['host'] = self.hostname
  4513. user = db_access['user']
  4514. dbname = db_access['dbname']
  4515. port = db_access['port']
  4516. password = db_access['password']
  4517. host = db_access['host']
  4518. cred = "host=%s dbname=%s user=%s password=%s port=%s" % \
  4519. (host, dbname, user, password, port)
  4520. self._db_conn = psycopg2.connect(cred)
  4521. return self._db_conn
  4522. def _db_server_host(self, cur=None, db_access=None):
  4523. """
  4524. Get the server host name from the database. The server
  4525. host name is stored in the pbs.server table and not in
  4526. pbs.server_attr.
  4527. :param cur: Optional, a predefined cursor to use to
  4528. operate on the DB
  4529. :param db_acccess: set to either file containing
  4530. credentials to DB access or
  4531. dictionary containing
  4532. ``{'dbname':...,'user':...,'port':...}``
  4533. :type db_access: str or dictionary
  4534. """
  4535. local_init = False
  4536. if cur is None:
  4537. conn = self._db_connect(db_access)
  4538. local_init = True
  4539. if conn is None:
  4540. return None
  4541. cur = conn.cursor()
  4542. # obtain server name. The server hostname is stored in table
  4543. # pbs.server
  4544. cur.execute('SELECT sv_hostname from pbs.server')
  4545. if local_init:
  4546. conn.commit()
  4547. tmp_query = cur.fetchone()
  4548. if len(tmp_query) > 0:
  4549. svr_host = tmp_query[0]
  4550. else:
  4551. svr_host = "unknown"
  4552. return svr_host
  4553. def status_db(self, obj_type=None, attrib=None, id=None, db_access=None,
  4554. logerr=True):
  4555. """
  4556. Status PBS objects from the SQL database
  4557. :param obj_type: The type of object to query, one of the
  4558. * objects,\ Default: SERVER
  4559. :param attrib: Attributes to query, can a string, a list,
  4560. a dictionary\ Default: None. All attributes
  4561. will be queried
  4562. :type attrib: str or list or dictionary
  4563. :param id: An optional identifier, the name of the object
  4564. to status
  4565. :type id: str
  4566. :param db_access: information needed to access the database,
  4567. can be either a file containing user,
  4568. port, dbname, password info or a
  4569. dictionary of key/value entries
  4570. :type db_access: str or dictionary
  4571. """
  4572. if not PSYCOPG:
  4573. self.logger.error('psycopg module unavailable, install from ' +
  4574. 'http://initd.org/psycopg/ and retry')
  4575. return None
  4576. if not isinstance(db_access, dict):
  4577. try:
  4578. f = open(db_access, 'r')
  4579. except IOError:
  4580. self.logger.error('Unable to access ' + db_access)
  4581. return None
  4582. lines = f.readlines()
  4583. db_access = {}
  4584. for line in lines:
  4585. (k, v) = line.split('=')
  4586. db_access[k] = v
  4587. conn = self._db_connect(db_access)
  4588. if conn is None:
  4589. return None
  4590. cur = conn.cursor()
  4591. stmt = []
  4592. if obj_type == SERVER:
  4593. stmt = ["SELECT sv_name,attr_name,attr_resource,attr_value " +
  4594. "FROM pbs.server_attr"]
  4595. svr_host = self.hostname # self._db_server_host(cur)
  4596. elif obj_type == SCHED:
  4597. stmt = ["SELECT sched_name,attr_name,attr_resource,attr_value " +
  4598. "FROM pbs.scheduler_attr"]
  4599. # reuse server host name for sched host
  4600. svr_host = self.hostname
  4601. elif obj_type == JOB:
  4602. stmt = ["SELECT ji_jobid,attr_name,attr_resource,attr_value " +
  4603. "FROM pbs.job_attr"]
  4604. if id:
  4605. id_stmt = ["ji_jobid='" + id + "'"]
  4606. elif obj_type == QUEUE:
  4607. stmt = ["SELECT qu_name,attr_name,attr_resource,attr_value " +
  4608. "FROM pbs.queue_attr"]
  4609. if id:
  4610. id_stmt = ["qu_name='" + id + "'"]
  4611. elif obj_type == RESV:
  4612. stmt = ["SELECT ri_resvid,attr_name,attr_resource,attr_value " +
  4613. "FROM pbs.resv_attr"]
  4614. if id:
  4615. id_stmt = ["ri_resvid='" + id + "'"]
  4616. elif obj_type in (NODE, VNODE):
  4617. stmt = ["SELECT nd_name,attr_name,attr_resource,attr_value " +
  4618. "FROM pbs.node_attr"]
  4619. if id:
  4620. id_stmt = ["nd_name='" + id + "'"]
  4621. else:
  4622. self.logger.error('status: object type not handled')
  4623. return None
  4624. if attrib or id:
  4625. stmt += ["WHERE"]
  4626. extra_stmt = []
  4627. if attrib:
  4628. if isinstance(attrib, dict):
  4629. attrs = attrib.keys()
  4630. elif isinstance(attrib, list):
  4631. attrs = attrib
  4632. elif isinstance(attrib, str):
  4633. attrs = attrib.split(',')
  4634. for a in attrs:
  4635. extra_stmt += ["attr_name='" + a + "'"]
  4636. stmt += [" OR ".join(extra_stmt)]
  4637. if id:
  4638. stmt += [" AND ", " AND ".join(id_stmt)]
  4639. exec_stmt = " ".join(stmt)
  4640. self.logger.debug('server: executing db statement: ' + exec_stmt)
  4641. cur.execute(exec_stmt)
  4642. conn.commit()
  4643. _results = cur.fetchall()
  4644. obj_dict = {}
  4645. for _res in _results:
  4646. if obj_type in (SERVER, SCHED):
  4647. obj_name = svr_host
  4648. else:
  4649. obj_name = _res[0]
  4650. if obj_name not in obj_dict:
  4651. obj_dict[obj_name] = {'id': obj_name}
  4652. attr = _res[1]
  4653. if _res[2]:
  4654. attr += '.' + _res[2]
  4655. obj_dict[obj_name][attr] = _res[3]
  4656. return obj_dict.values()
  4657. #
  4658. # Begin IFL Wrappers
  4659. #
  4660. def status(self, obj_type=SERVER, attrib=None, id=None,
  4661. extend=None, level=logging.INFO, db_access=None, runas=None,
  4662. resolve_indirectness=False, logerr=True):
  4663. """
  4664. Stat any PBS object ``[queue, server, node, hook, job,
  4665. resv, sched]``.If the Server is setup from diag input,
  4666. see diag or diagmap member, the status calls are routed
  4667. directly to the data on files from diag.
  4668. The server can be queried either through the 'qstat'
  4669. command line tool or through the wrapped PBS IFL api,
  4670. see set_op_mode.
  4671. Return a dictionary representation of a batch status object
  4672. raises ``PbsStatsuError on error``.
  4673. :param obj_type: The type of object to query, one of the *
  4674. objects.Default: SERVER
  4675. :param attrib: Attributes to query, can be a string, a
  4676. list, a dictionary.Default is to query all
  4677. attributes.
  4678. :type attrib: str or list or dictionary
  4679. :param id: An optional id, the name of the object to status
  4680. :type id: str
  4681. :param extend: Optional extension to the IFL call
  4682. :param level: The logging level, defaults to INFO
  4683. :type level: str
  4684. :param db_acccess: set to either file containing credentials
  4685. to DB access or dictionary containing
  4686. ``{'dbname':...,'user':...,'port':...}``
  4687. :type db_access: str or dictionary
  4688. :param runas: run stat as user
  4689. :type runas: str
  4690. :param resolve_indirectness: If True resolves indirect node
  4691. resources values
  4692. :type resolve_indirectness: bool
  4693. :param logerr: If True (default) logs run_cmd errors
  4694. :type logerr: bool
  4695. In addition to standard IFL stat call, this wrapper handles
  4696. a few cases that aren't implicitly offered by pbs_stat*,
  4697. those are for Hooks,Resources, and a formula evaluation.
  4698. """
  4699. prefix = 'status on ' + self.shortname
  4700. if runas:
  4701. prefix += ' as ' + str(runas)
  4702. prefix += ': '
  4703. self.logit(prefix, obj_type, attrib, id, level)
  4704. bs = None
  4705. bsl = []
  4706. freebs = False
  4707. # 2 - Special handling for gathering the job formula value.
  4708. if attrib is not None and PTL_FORMULA in attrib:
  4709. if (((isinstance(attrib, list) or isinstance(attrib, dict)) and
  4710. (len(attrib) == 1)) or
  4711. (isinstance(attrib, str) and len(attrib.split(',')) == 1)):
  4712. bsl = self.status(
  4713. JOB, 'Resource_List.select', id=id, extend='t')
  4714. if self.schedulers[self.dflt_sched_name] is None:
  4715. self.schedulers[self.dflt_sched_name] = Scheduler(
  4716. self.hostname)
  4717. if 'log_filter' in self.schedulers[
  4718. self.dflt_sched_name].sched_config:
  4719. _prev_filter = self.schedulers[
  4720. self.dflt_sched_name].sched_config[
  4721. 'log_filter']
  4722. if int(_prev_filter) & 2048:
  4723. self.schedulers[self.dflt_sched_name].set_sched_config(
  4724. {'log_filter': 2048})
  4725. self.manager(MGR_CMD_SET, SERVER, {'scheduling': 'True'})
  4726. if id is None:
  4727. _formulas = self.schedulers[self.dflt_sched_name].job_formula()
  4728. else:
  4729. _formulas = {
  4730. id: self.schedulers[
  4731. self.dflt_sched_name].job_formula(
  4732. jobid=id)
  4733. }
  4734. if not int(_prev_filter) & 2048:
  4735. self.schedulers[self.dflt_sched_name].set_sched_config(
  4736. {'log_filter': int(_prev_filter)})
  4737. if len(bsl) == 0:
  4738. bsl = [{'id': id}]
  4739. for _b in bsl:
  4740. if _b['id'] in _formulas:
  4741. _b[PTL_FORMULA] = _formulas[_b['id']]
  4742. return bsl
  4743. # 3- Serve data from database if requested... and available for the
  4744. # given object type
  4745. if db_access and obj_type in (SERVER, SCHED, NODE, QUEUE, RESV, JOB):
  4746. bsl = self.status_db(obj_type, attrib, id, db_access=db_access,
  4747. logerr=logerr)
  4748. # 4- Serve data from diag files
  4749. elif obj_type in self.diagmap:
  4750. if obj_type in (HOOK, PBS_HOOK):
  4751. for f in self.diagmap[obj_type]:
  4752. _b = self.utils.file_to_dictlist(f, attrib)
  4753. if _b and 'hook_name' in _b[0]:
  4754. _b[0]['id'] = _b[0]['hook_name']
  4755. else:
  4756. _b[0]['id'] = os.path.basename(f)
  4757. if id is None or id == _b[0]['id']:
  4758. bsl.extend(_b)
  4759. else:
  4760. bsl = self.utils.file_to_dictlist(self.diagmap[obj_type],
  4761. attrib, id=id)
  4762. # 6- Stat using PBS CLI commands
  4763. elif self.get_op_mode() == PTL_CLI:
  4764. tgt = self.client
  4765. if obj_type in (JOB, QUEUE, SERVER):
  4766. pcmd = [os.path.join(
  4767. self.client_conf['PBS_EXEC'],
  4768. 'bin',
  4769. 'qstat')]
  4770. if extend:
  4771. pcmd += ['-' + extend]
  4772. if obj_type == JOB:
  4773. pcmd += ['-f']
  4774. if id:
  4775. pcmd += [id]
  4776. else:
  4777. pcmd += ['@' + self.hostname]
  4778. elif obj_type == QUEUE:
  4779. pcmd += ['-Qf']
  4780. if id:
  4781. if '@' not in id:
  4782. pcmd += [id + '@' + self.hostname]
  4783. else:
  4784. pcmd += [id]
  4785. else:
  4786. pcmd += ['@' + self.hostname]
  4787. elif obj_type == SERVER:
  4788. pcmd += ['-Bf', self.hostname]
  4789. elif obj_type in (NODE, VNODE, HOST):
  4790. pcmd = [os.path.join(self.client_conf['PBS_EXEC'], 'bin',
  4791. 'pbsnodes')]
  4792. pcmd += ['-s', self.hostname]
  4793. if obj_type in (NODE, VNODE):
  4794. pcmd += ['-v']
  4795. if obj_type == HOST:
  4796. pcmd += ['-H']
  4797. if id:
  4798. pcmd += [id]
  4799. else:
  4800. pcmd += ['-a']
  4801. elif obj_type == RESV:
  4802. pcmd = [os.path.join(self.client_conf['PBS_EXEC'], 'bin',
  4803. 'pbs_rstat')]
  4804. pcmd += ['-f']
  4805. if id:
  4806. pcmd += [id]
  4807. elif obj_type in (SCHED, PBS_HOOK, HOOK, RSC):
  4808. try:
  4809. rc = self.manager(MGR_CMD_LIST, obj_type, attrib, id,
  4810. runas=runas, level=level, logerr=logerr)
  4811. except PbsManagerError, e:
  4812. rc = e.rc
  4813. # PBS bug, no hooks yields a return code of 1, we ignore
  4814. if obj_type != HOOK:
  4815. raise PbsStatusError(
  4816. rc=rc, rv=[], msg=self.geterrmsg())
  4817. if rc == 0:
  4818. if obj_type == HOOK:
  4819. o = self.hooks
  4820. elif obj_type == PBS_HOOK:
  4821. o = self.pbshooks
  4822. elif obj_type == SCHED:
  4823. o = self.schedulers
  4824. elif obj_type == RSC:
  4825. o = self.resources
  4826. if id:
  4827. if id in o:
  4828. return [o[id].attributes]
  4829. else:
  4830. return None
  4831. return [h.attributes for h in o.values()]
  4832. return []
  4833. else:
  4834. self.logger.error(self.logprefix + "unrecognized object type")
  4835. raise PbsStatusError(rc=-1, rv=[],
  4836. msg="unrecognized object type")
  4837. return None
  4838. # as_script is used to circumvent some shells that will not pass
  4839. # along environment variables when invoking a command through sudo
  4840. if not self.default_client_pbs_conf:
  4841. pcmd = ['PBS_CONF_FILE=' + self.client_pbs_conf_file] + pcmd
  4842. as_script = True
  4843. elif obj_type == RESV and not self._is_local:
  4844. pcmd = ['PBS_SERVER=' + self.hostname] + pcmd
  4845. as_script = True
  4846. else:
  4847. as_script = False
  4848. ret = self.du.run_cmd(tgt, pcmd, runas=runas, as_script=as_script,
  4849. level=logging.INFOCLI, logerr=logerr)
  4850. o = ret['out']
  4851. if ret['err'] != ['']:
  4852. self.last_error = ret['err']
  4853. self.last_rc = ret['rc']
  4854. if ret['rc'] != 0:
  4855. raise PbsStatusError(rc=ret['rc'], rv=[], msg=self.geterrmsg())
  4856. bsl = self.utils.convert_to_dictlist(o, attrib, mergelines=True)
  4857. # 7- Stat with impersonation over PBS IFL swig-wrapped API
  4858. elif runas is not None:
  4859. _data = {'obj_type': obj_type, 'attrib': attrib, 'id': id}
  4860. bsl = self.pbs_api_as('status', user=runas, data=_data,
  4861. extend=extend)
  4862. else:
  4863. # 8- Stat over PBS IFL API
  4864. #
  4865. # resources are special attributes, all resources are queried as
  4866. # a single attribute.
  4867. # e.g. querying the resources_available attribute returns all
  4868. # resources such as ncpus, mem etc. when querying for
  4869. # resources_available.ncpus and resources_available.mem only query
  4870. # resources_available once and retrieve the resources desired from
  4871. # there
  4872. if isinstance(attrib, dict):
  4873. attribcopy = {}
  4874. restype = []
  4875. for k, v in attrib.items():
  4876. if isinstance(v, tuple):
  4877. # SET requires a special handling because status may
  4878. # have been called through counter to count the number
  4879. # of objects have a given attribute set, in this case
  4880. # we set the attribute to an empty string rather than
  4881. # the number of elements requested. This is a
  4882. # side-effect of the way pbs_statjob works
  4883. if v[0] in (SET, MATCH_RE):
  4884. v = ''
  4885. else:
  4886. v = v[1]
  4887. if callable(v):
  4888. v = ''
  4889. if '.' in k:
  4890. _r = k.split('.')[0]
  4891. if _r not in restype:
  4892. attribcopy[k] = v
  4893. restype.append(_r)
  4894. else:
  4895. attribcopy[k] = v
  4896. elif isinstance(attrib, list):
  4897. attribcopy = []
  4898. for k in attrib:
  4899. if '.' in k:
  4900. _found = False
  4901. for _e in attribcopy:
  4902. _r = k.split('.')[0]
  4903. if _r == _e.split('.')[0]:
  4904. _found = True
  4905. break
  4906. if not _found:
  4907. attribcopy.append(k)
  4908. else:
  4909. attribcopy.append(k)
  4910. else:
  4911. attribcopy = attrib
  4912. a = self.utils.convert_to_attrl(attribcopy)
  4913. c = self._connect(self.hostname)
  4914. if obj_type == JOB:
  4915. bs = pbs_statjob(c, id, a, extend)
  4916. elif obj_type == QUEUE:
  4917. bs = pbs_statque(c, id, a, extend)
  4918. elif obj_type == SERVER:
  4919. bs = pbs_statserver(c, a, extend)
  4920. elif obj_type == HOST:
  4921. bs = pbs_statnode(c, id, a, extend)
  4922. elif obj_type == VNODE:
  4923. bs = pbs_statvnode(c, id, a, extend)
  4924. elif obj_type == RESV:
  4925. bs = pbs_statresv(c, id, a, extend)
  4926. elif obj_type == SCHED:
  4927. bs = pbs_statsched(c, a, extend)
  4928. elif obj_type == RSC:
  4929. # up to PBSPro 12.3 pbs_statrsc was not in pbs_ifl.h
  4930. bs = pbs_statrsc(c, id, a, extend)
  4931. elif obj_type in (HOOK, PBS_HOOK):
  4932. if os.getuid() != 0:
  4933. try:
  4934. rc = self.manager(MGR_CMD_LIST, obj_type, attrib,
  4935. id, level=level)
  4936. if rc == 0:
  4937. if id:
  4938. if (obj_type == HOOK and
  4939. id in self.hooks):
  4940. return [self.hooks[id].attributes]
  4941. elif (obj_type == PBS_HOOK and
  4942. id in self.pbshooks):
  4943. return [self.pbshooks[id].attributes]
  4944. else:
  4945. return None
  4946. if obj_type == HOOK:
  4947. return [h.attributes for h in
  4948. self.hooks.values()]
  4949. elif obj_type == PBS_HOOK:
  4950. return [h.attributes for h in
  4951. self.pbshooks.values()]
  4952. except:
  4953. pass
  4954. else:
  4955. bs = pbs_stathook(c, id, a, extend)
  4956. else:
  4957. self.logger.error(self.logprefix +
  4958. "unrecognized object type " + str(obj_type))
  4959. freebs = True
  4960. err = self.geterrmsg()
  4961. self._disconnect(c)
  4962. if err:
  4963. raise PbsStatusError(rc=-1, rv=[], msg=err)
  4964. if not isinstance(bs, list):
  4965. bsl = self.utils.batch_status_to_dictlist(bs, attrib)
  4966. else:
  4967. bsl = self.utils.filter_batch_status(bs, attrib)
  4968. # Update each object's dictionary with corresponding attributes and
  4969. # values
  4970. self.update_attributes(obj_type, bsl)
  4971. # Hook stat is done through CLI, no need to free the batch_status
  4972. if (not isinstance(bs, list) and freebs and
  4973. obj_type not in (HOOK, PBS_HOOK) and os.getuid() != 0):
  4974. pbs_statfree(bs)
  4975. # 9- Resolve indirect resources
  4976. if obj_type in (NODE, VNODE) and resolve_indirectness:
  4977. nodes = {}
  4978. for _b in bsl:
  4979. for k, v in _b.items():
  4980. if v.startswith('@'):
  4981. if v[1:] in nodes:
  4982. _b[k] = nodes[v[1:]][k]
  4983. else:
  4984. for l in bsl:
  4985. if l['id'] == v[1:]:
  4986. nodes[k] = l[k]
  4987. _b[k] = l[k]
  4988. break
  4989. del nodes
  4990. return bsl
  4991. def submit_interactive_job(self, job, cmd):
  4992. """
  4993. submit an ``interactive`` job. Returns a job identifier
  4994. or raises PbsSubmitError on error
  4995. :param cmd: The command to run to submit the interactive
  4996. job
  4997. :type cmd: str
  4998. :param job: the job object. The job must have the attribute
  4999. 'interactive_job' populated. That attribute is
  5000. a list of tuples of the form:
  5001. (<command>, <expected output>, <...>)
  5002. for example to send the command
  5003. hostname and expect 'myhost.mydomain' one would
  5004. set:job.interactive_job =
  5005. [('hostname', 'myhost.mydomain')]
  5006. If more than one lines are expected they are
  5007. appended to the tuple.
  5008. :raises: PbsSubmitError
  5009. """
  5010. ij = InteractiveJob(job, cmd, self.hostname)
  5011. # start the interactive job submission thread and wait to pickup the
  5012. # actual job identifier
  5013. ij.start()
  5014. while ij.jobid is None:
  5015. continue
  5016. return ij.jobid
  5017. def submit(self, obj, script=None, extend=None, submit_dir=None):
  5018. """
  5019. Submit a job or reservation. Returns a job identifier
  5020. or raises PbsSubmitError on error
  5021. :param obj: The Job or Reservation instance to submit
  5022. :param script: Path to a script to submit. Default: None
  5023. as an executable\ /bin/sleep 100 is submitted
  5024. :type script: str or None
  5025. :param extend: Optional extension to the IFL call.
  5026. see pbs_ifl.h
  5027. :type extend: str or None
  5028. :param submit_dir: directory from which job is submitted.
  5029. Defaults to temporary directory
  5030. :type submit_dir: str or None
  5031. :raises: PbsSubmitError
  5032. """
  5033. _interactive_job = False
  5034. as_script = False
  5035. rc = None
  5036. if isinstance(obj, Job):
  5037. if self.platform == 'cray' or self.platform == 'craysim':
  5038. m = False
  5039. vncompute = False
  5040. if 'Resource_List.select' in obj.attributes:
  5041. select = obj.attributes['Resource_List.select']
  5042. start = select.startswith('vntype=cray_compute')
  5043. m = start or ':vntype=cray_compute' in select
  5044. if 'Resource_List.vntype' in obj.attributes:
  5045. vn_type = obj.attributes['Resource_List.vntype']
  5046. if vn_type == 'cray_compute':
  5047. vncompute = True
  5048. if obj.script is not None:
  5049. script = obj.script
  5050. elif m or vncompute:
  5051. aprun_cmd = "aprun -b -B"
  5052. executable = obj.attributes[ATTR_executable]
  5053. start = executable.startswith('aprun ')
  5054. aprun_exist = start or '/aprun' in executable
  5055. if script:
  5056. aprun_cmd += " " + script
  5057. else:
  5058. if aprun_exist:
  5059. aprun_cmd = executable
  5060. else:
  5061. aprun_cmd += " " + executable
  5062. arg_list = obj.attributes[ATTR_Arglist]
  5063. aprun_cmd += " " + self.utils.convert_arglist(arg_list)
  5064. fn = self.du.create_temp_file(hostname=None,
  5065. prefix='PtlPbsJobScript',
  5066. asuser=obj.username,
  5067. body=aprun_cmd)
  5068. self.du.chmod(path=fn, mode=0755)
  5069. script = fn
  5070. elif script is None and obj.script is not None:
  5071. script = obj.script
  5072. if ATTR_inter in obj.attributes:
  5073. _interactive_job = True
  5074. if ATTR_executable in obj.attributes:
  5075. del obj.attributes[ATTR_executable]
  5076. if ATTR_Arglist in obj.attributes:
  5077. del obj.attributes[ATTR_Arglist]
  5078. elif not isinstance(obj, Reservation):
  5079. m = self.logprefix + "unrecognized object type"
  5080. self.logger.error(m)
  5081. return None
  5082. if submit_dir is None:
  5083. submit_dir = pwd.getpwnam(obj.username)[5]
  5084. cwd = os.getcwd()
  5085. os.chdir(submit_dir)
  5086. c = None
  5087. # 1- Submission using the command line tools
  5088. if self.get_op_mode() == PTL_CLI:
  5089. exclude_attrs = [] # list of attributes to not convert to CLI
  5090. if isinstance(obj, Job):
  5091. runcmd = [os.path.join(self.client_conf['PBS_EXEC'], 'bin',
  5092. 'qsub')]
  5093. elif isinstance(obj, Reservation):
  5094. runcmd = [os.path.join(self.client_conf['PBS_EXEC'], 'bin',
  5095. 'pbs_rsub')]
  5096. if ATTR_resv_start in obj.custom_attrs:
  5097. start = obj.custom_attrs[ATTR_resv_start]
  5098. obj.custom_attrs[ATTR_resv_start] = \
  5099. self.utils.convert_seconds_to_datetime(start)
  5100. if ATTR_resv_end in obj.custom_attrs:
  5101. end = obj.custom_attrs[ATTR_resv_end]
  5102. obj.custom_attrs[ATTR_resv_end] = \
  5103. self.utils.convert_seconds_to_datetime(end)
  5104. if ATTR_resv_timezone in obj.custom_attrs:
  5105. exclude_attrs += [ATTR_resv_timezone, ATTR_resv_standing]
  5106. # handling of impersonation differs widely across OS's,
  5107. # when setting PBS_TZID we standardize on running the cmd
  5108. # as a script instead of customizing for each OS flavor
  5109. _tz = obj.custom_attrs[ATTR_resv_timezone]
  5110. runcmd = ['PBS_TZID=' + _tz] + runcmd
  5111. as_script = True
  5112. if ATTR_resv_rrule in obj.custom_attrs:
  5113. _rrule = obj.custom_attrs[ATTR_resv_rrule]
  5114. if _rrule[0] not in ("'", '"'):
  5115. _rrule = "'" + _rrule + "'"
  5116. obj.custom_attrs[ATTR_resv_rrule] = _rrule
  5117. if not self._is_local:
  5118. if ATTR_queue not in obj.attributes:
  5119. runcmd += ['-q@' + self.hostname]
  5120. elif '@' not in obj.attributes[ATTR_queue]:
  5121. curq = obj.attributes[ATTR_queue]
  5122. runcmd += ['-q' + curq + '@' + self.hostname]
  5123. if obj.custom_attrs and (ATTR_queue in obj.custom_attrs):
  5124. del obj.custom_attrs[ATTR_queue]
  5125. _conf = self.default_client_pbs_conf
  5126. cmd = self.utils.convert_to_cli(obj.custom_attrs, IFL_SUBMIT,
  5127. self.hostname, dflt_conf=_conf,
  5128. exclude_attrs=exclude_attrs)
  5129. if cmd is None:
  5130. try:
  5131. os.chdir(cwd)
  5132. except OSError:
  5133. pass
  5134. return None
  5135. runcmd += cmd
  5136. if script:
  5137. runcmd += [script]
  5138. else:
  5139. if ATTR_executable in obj.attributes:
  5140. runcmd += ['--', obj.attributes[ATTR_executable]]
  5141. if ((ATTR_Arglist in obj.attributes) and
  5142. (obj.attributes[ATTR_Arglist] is not None)):
  5143. args = obj.attributes[ATTR_Arglist]
  5144. arglist = self.utils.convert_arglist(args)
  5145. if arglist is None:
  5146. try:
  5147. os.chdir(cwd)
  5148. except OSError:
  5149. pass
  5150. return None
  5151. runcmd += [arglist]
  5152. if obj.username != self.current_user:
  5153. runas = obj.username
  5154. else:
  5155. runas = None
  5156. if _interactive_job:
  5157. ijid = self.submit_interactive_job(obj, runcmd)
  5158. try:
  5159. os.chdir(cwd)
  5160. except OSError:
  5161. pass
  5162. return ijid
  5163. if not self.default_client_pbs_conf:
  5164. runcmd = [
  5165. 'PBS_CONF_FILE=' + self.client_pbs_conf_file] + runcmd
  5166. as_script = True
  5167. ret = self.du.run_cmd(self.client, runcmd, runas=runas,
  5168. level=logging.INFOCLI, as_script=as_script,
  5169. logerr=False)
  5170. if ret['rc'] != 0:
  5171. objid = None
  5172. else:
  5173. objid = ret['out'][0]
  5174. if ret['err'] != ['']:
  5175. self.last_error = ret['err']
  5176. self.last_rc = rc = ret['rc']
  5177. # 2- Submission with impersonation over API
  5178. elif obj.username != self.current_user:
  5179. # submit job as a user requires setting uid to that user. It's
  5180. # done in a separate process
  5181. obj.set_variable_list(obj.username, submit_dir)
  5182. obj.set_attributes()
  5183. if (obj.script is not None and not self._is_local):
  5184. # This copy assumes that the file system layout on the
  5185. # remote host is identical to the local host. When not
  5186. # the case, this code will need to be updated to copy
  5187. # to a known remote location and update the obj.script
  5188. self.du.run_copy(self.hostname, obj.script, obj.script)
  5189. os.remove(obj.script)
  5190. objid = self.pbs_api_as('submit', obj, user=obj.username,
  5191. extend=extend)
  5192. # 3- Submission as current user over API
  5193. else:
  5194. c = self._connect(self.hostname)
  5195. if isinstance(obj, Job):
  5196. if script:
  5197. if ATTR_o not in obj.attributes:
  5198. obj.attributes[ATTR_o] = (self.hostname + ':' +
  5199. obj.script + '.o')
  5200. if ATTR_e not in obj.attributes:
  5201. obj.attributes[ATTR_e] = (self.hostname + ':' +
  5202. obj.script + '.e')
  5203. sc = os.path.basename(script)
  5204. obj.unset_attributes([ATTR_executable, ATTR_Arglist])
  5205. if ATTR_N not in obj.custom_attrs:
  5206. obj.attributes[ATTR_N] = sc
  5207. if ATTR_queue in obj.attributes:
  5208. destination = obj.attributes[ATTR_queue]
  5209. # queue must be removed otherwise will cause the submit
  5210. # to fail silently
  5211. del obj.attributes[ATTR_queue]
  5212. else:
  5213. destination = None
  5214. if (ATTR_o not in obj.attributes or
  5215. ATTR_e not in obj.attributes):
  5216. fn = self.utils.random_str(
  5217. length=4, prefix='PtlPbsJob')
  5218. tmp = self.du.get_tempdir(self.hostname)
  5219. fn = os.path.join(tmp, fn)
  5220. if ATTR_o not in obj.attributes:
  5221. obj.attributes[ATTR_o] = (self.hostname + ':' +
  5222. fn + '.o')
  5223. if ATTR_e not in obj.attributes:
  5224. obj.attributes[ATTR_e] = (self.hostname + ':' +
  5225. fn + '.e')
  5226. obj.attropl = self.utils.dict_to_attropl(obj.attributes)
  5227. objid = pbs_submit(c, obj.attropl, script, destination,
  5228. extend)
  5229. elif isinstance(obj, Reservation):
  5230. if ATTR_resv_duration in obj.attributes:
  5231. # reserve_duration is not a valid attribute, the API call
  5232. # will get rejected if it is used
  5233. wlt = ATTR_l + '.walltime'
  5234. obj.attributes[wlt] = obj.attributes[ATTR_resv_duration]
  5235. del obj.attributes[ATTR_resv_duration]
  5236. obj.attropl = self.utils.dict_to_attropl(obj.attributes)
  5237. objid = pbs_submit_resv(c, obj.attropl, extend)
  5238. prefix = 'submit to ' + self.shortname + ' as '
  5239. if isinstance(obj, Job):
  5240. self.logit(prefix + '%s: ' % obj.username, JOB, obj.custom_attrs,
  5241. objid)
  5242. if obj.script_body:
  5243. self.logger.log(logging.INFOCLI, 'job script ' + script +
  5244. '\n---\n' + obj.script_body + '\n---')
  5245. if objid is not None:
  5246. self.jobs[objid] = obj
  5247. elif isinstance(obj, Reservation):
  5248. # Reservations without -I option return as 'R123 UNCONFIRMED'
  5249. # so split to get the R123 only
  5250. self.logit(prefix + '%s: ' % obj.username, RESV, obj.attributes,
  5251. objid)
  5252. if objid is not None:
  5253. objid = objid.split()[0]
  5254. self.reservations[objid] = obj
  5255. if objid is not None:
  5256. obj.server[self.hostname] = objid
  5257. else:
  5258. try:
  5259. os.chdir(cwd)
  5260. except OSError:
  5261. pass
  5262. raise PbsSubmitError(rc=rc, rv=None, msg=self.geterrmsg(),
  5263. post=self._disconnect, conn=c)
  5264. if c:
  5265. self._disconnect(c)
  5266. try:
  5267. os.chdir(cwd)
  5268. except OSError:
  5269. pass
  5270. return objid
  5271. def deljob(self, id=None, extend=None, runas=None, wait=False,
  5272. logerr=True, attr_W=None):
  5273. """
  5274. delete a single job or list of jobs specified by id
  5275. raises ``PbsDeljobError`` on error
  5276. :param id: The identifier(s) of the jobs to delete
  5277. :type id: str or list
  5278. :param extend: Optional parameters to pass along to PBS
  5279. :type extend: str or None
  5280. :param runas: run as user
  5281. :type runas: str or None
  5282. :param wait: Set to True to wait for job(s) to no longer
  5283. be reported by PBS. False by default
  5284. :type wait: bool
  5285. :param logerr: Whether to log errors. Defaults to True.
  5286. :type logerr: bool
  5287. :param attr_w: -W args to qdel (Only for cli mode)
  5288. :type attr_w: str
  5289. :raises: PbsDeljobError
  5290. """
  5291. prefix = 'delete job on ' + self.shortname
  5292. if runas is not None:
  5293. prefix += ' as ' + str(runas)
  5294. prefix += ': '
  5295. if id is not None:
  5296. if not isinstance(id, list):
  5297. id = id.split(',')
  5298. prefix += ', '.join(id)
  5299. self.logger.info(prefix)
  5300. c = None
  5301. if self.get_op_mode() == PTL_CLI:
  5302. pcmd = [os.path.join(self.client_conf['PBS_EXEC'], 'bin', 'qdel')]
  5303. if extend is not None:
  5304. pcmd += self.utils.convert_to_cli(extend, op=IFL_DELETE,
  5305. hostname=self.hostname)
  5306. if attr_W is not None:
  5307. pcmd += ['-W']
  5308. if attr_W != PTL_NOARG:
  5309. pcmd += [attr_W]
  5310. if id is not None:
  5311. pcmd += id
  5312. if not self.default_client_pbs_conf:
  5313. pcmd = ['PBS_CONF_FILE=' + self.client_pbs_conf_file] + pcmd
  5314. as_script = True
  5315. elif not self._is_local:
  5316. pcmd = ['PBS_SERVER=' + self.hostname] + pcmd
  5317. as_script = True
  5318. else:
  5319. as_script = False
  5320. ret = self.du.run_cmd(self.client, pcmd, runas=runas,
  5321. as_script=as_script, logerr=logerr,
  5322. level=logging.INFOCLI)
  5323. rc = ret['rc']
  5324. if ret['err'] != ['']:
  5325. self.last_error = ret['err']
  5326. self.last_rc = rc
  5327. elif runas is not None:
  5328. rc = self.pbs_api_as('deljob', id, user=runas, extend=extend)
  5329. else:
  5330. c = self._connect(self.hostname)
  5331. rc = 0
  5332. for ajob in id:
  5333. tmp_rc = pbs_deljob(c, ajob, extend)
  5334. if tmp_rc != 0:
  5335. rc = tmp_rc
  5336. if rc != 0:
  5337. raise PbsDeljobError(rc=rc, rv=False, msg=self.geterrmsg(),
  5338. post=self._disconnect, conn=c)
  5339. if self.jobs is not None:
  5340. for j in id:
  5341. if j in self.jobs:
  5342. if self.jobs[j].interactive_handle is not None:
  5343. self.jobs[j].interactive_handle.close()
  5344. del self.jobs[j]
  5345. if c:
  5346. self._disconnect(c)
  5347. if wait:
  5348. for oid in id:
  5349. self.expect(JOB, 'queue', id=oid, op=UNSET, runas=runas,
  5350. level=logging.DEBUG)
  5351. return rc
  5352. def delresv(self, id=None, extend=None, runas=None, wait=False,
  5353. logerr=True):
  5354. """
  5355. delete a single job or list of jobs specified by id
  5356. raises ``PbsDeljobError`` on error
  5357. :param id: The identifier(s) of the jobs to delete
  5358. :type id: str or list
  5359. :param extend: Optional parameters to pass along to PBS
  5360. :type extend: str or None
  5361. :param runas: run as user
  5362. :type runas: str or None
  5363. :param wait: Set to True to wait for job(s) to no longer
  5364. be reported by PBS. False by default
  5365. :type wait: bool
  5366. :param logerr: Whether to log errors. Defaults to True.
  5367. :type logerr: bool
  5368. :raises: PbsDeljobError
  5369. """
  5370. prefix = 'delete resv on ' + self.shortname
  5371. if runas is not None:
  5372. prefix += ' as ' + str(runas)
  5373. prefix += ': '
  5374. if id is not None:
  5375. if not isinstance(id, list):
  5376. id = id.split(',')
  5377. prefix += ', '.join(id)
  5378. self.logger.info(prefix)
  5379. c = None
  5380. if self.get_op_mode() == PTL_CLI:
  5381. pcmd = [os.path.join(self.client_conf['PBS_EXEC'], 'bin',
  5382. 'pbs_rdel')]
  5383. if id is not None:
  5384. pcmd += id
  5385. if not self.default_client_pbs_conf:
  5386. pcmd = ['PBS_CONF_FILE=' + self.client_pbs_conf_file] + pcmd
  5387. as_script = True
  5388. elif not self._is_local:
  5389. pcmd = ['PBS_SERVER=' + self.hostname] + pcmd
  5390. as_script = True
  5391. else:
  5392. as_script = False
  5393. ret = self.du.run_cmd(self.client, pcmd, runas=runas,
  5394. as_script=as_script, logerr=logerr,
  5395. level=logging.INFOCLI)
  5396. rc = ret['rc']
  5397. if ret['err'] != ['']:
  5398. self.last_error = ret['err']
  5399. self.last_rc = rc
  5400. elif runas is not None:
  5401. rc = self.pbs_api_as('delresv', id, user=runas, extend=extend)
  5402. else:
  5403. c = self._connect(self.hostname)
  5404. rc = 0
  5405. for ajob in id:
  5406. tmp_rc = pbs_delresv(c, ajob, extend)
  5407. if tmp_rc != 0:
  5408. rc = tmp_rc
  5409. if rc != 0:
  5410. raise PbsDelresvError(rc=rc, rv=False, msg=self.geterrmsg(),
  5411. post=self._disconnect, conn=c)
  5412. if self.reservations is not None:
  5413. for j in id:
  5414. if j in self.reservations:
  5415. del self.reservations[j]
  5416. if c:
  5417. self._disconnect(c)
  5418. if wait:
  5419. for oid in id:
  5420. self.expect(RESV, 'queue', id=oid, op=UNSET, runas=runas,
  5421. level=logging.DEBUG)
  5422. return rc
  5423. def delete(self, id=None, extend=None, runas=None, wait=False,
  5424. logerr=True):
  5425. """
  5426. delete a single job or list of jobs specified by id
  5427. raises ``PbsDeleteError`` on error
  5428. :param id: The identifier(s) of the jobs/resvs to delete
  5429. :type id: str or list
  5430. :param extend: Optional parameters to pass along to PBS
  5431. :type extend: str or none
  5432. :param runas: run as user
  5433. :type runas: str
  5434. :param wait: Set to True to wait for job(s)/resv(s) to
  5435. no longer be reported by PBS. False by default
  5436. :type wait: bool
  5437. :param logerr: Whether to log errors. Defaults to True.
  5438. :type logerr: bool
  5439. :raises: PbsDeleteError
  5440. """
  5441. prefix = 'delete on ' + self.shortname
  5442. if runas is not None:
  5443. prefix += ' as ' + str(runas)
  5444. prefix += ': '
  5445. if id is not None:
  5446. if not isinstance(id, list):
  5447. id = id.split(',')
  5448. prefix += ','.join(id)
  5449. if extend is not None:
  5450. prefix += ' with ' + str(extend)
  5451. self.logger.info(prefix)
  5452. if not len(id) > 0:
  5453. return 0
  5454. obj_type = {}
  5455. for j in id:
  5456. if j[0] in ('R', 'S'):
  5457. obj_type[j] = RESV
  5458. try:
  5459. rc = self.delresv(j, extend, runas, logerr=logerr)
  5460. except PbsDelresvError, e:
  5461. rc = e.rc
  5462. msg = e.msg
  5463. rv = e.rv
  5464. else:
  5465. obj_type[j] = JOB
  5466. try:
  5467. rc = self.deljob(j, extend, runas, logerr=logerr)
  5468. except PbsDeljobError, e:
  5469. rc = e.rc
  5470. msg = e.msg
  5471. rv = e.rv
  5472. if rc != 0:
  5473. raise PbsDeleteError(rc=rc, rv=rv, msg=msg)
  5474. if wait:
  5475. for oid in id:
  5476. self.expect(obj_type[oid], 'queue', id=oid, op=UNSET,
  5477. runas=runas, level=logging.DEBUG)
  5478. return rc
  5479. def select(self, attrib=None, extend=None, runas=None, logerr=True):
  5480. """
  5481. Select jobs that match attributes list or all jobs if no
  5482. attributes raises ``PbsSelectError`` on error
  5483. :param attrib: A string, list, or dictionary of attributes
  5484. :type attrib: str or list or dictionary
  5485. :param extend: the extended attributes to pass to select
  5486. :type extend: str or None
  5487. :param runas: run as user
  5488. :type runas: str or None
  5489. :param logerr: If True (default) logs run_cmd errors
  5490. :type logerr: bool
  5491. :returns: A list of job identifiers that match the
  5492. attributes specified
  5493. :raises: PbsSelectError
  5494. """
  5495. prefix = "select on " + self.shortname
  5496. if runas is not None:
  5497. prefix += " as " + str(runas)
  5498. prefix += ": "
  5499. if attrib is None:
  5500. s = PTL_ALL
  5501. elif not isinstance(attrib, dict):
  5502. self.logger.error(prefix + "attributes must be a dictionary")
  5503. return
  5504. else:
  5505. s = str(attrib)
  5506. self.logger.info(prefix + s)
  5507. c = None
  5508. if self.get_op_mode() == PTL_CLI:
  5509. pcmd = [os.path.join(self.client_conf['PBS_EXEC'],
  5510. 'bin', 'qselect')]
  5511. cmd = self.utils.convert_to_cli(attrib, op=IFL_SELECT,
  5512. hostname=self.hostname)
  5513. if extend is not None:
  5514. pcmd += ['-' + extend]
  5515. if not self._is_local and ((attrib is None) or
  5516. (ATTR_queue not in attrib)):
  5517. pcmd += ['-q', '@' + self.hostname]
  5518. pcmd += cmd
  5519. if not self.default_client_pbs_conf:
  5520. pcmd = ['PBS_CONF_FILE=' + self.client_pbs_conf_file] + pcmd
  5521. as_script = True
  5522. else:
  5523. as_script = False
  5524. ret = self.du.run_cmd(self.client, pcmd, runas=runas,
  5525. as_script=as_script, level=logging.INFOCLI,
  5526. logerr=logerr)
  5527. if ret['err'] != ['']:
  5528. self.last_error = ret['err']
  5529. self.last_rc = ret['rc']
  5530. if self.last_rc != 0:
  5531. raise PbsSelectError(rc=self.last_rc, rv=False,
  5532. msg=self.geterrmsg())
  5533. jobs = ret['out']
  5534. # command returns no jobs as empty, since we expect a valid id,
  5535. # we reset the jobs to an empty array
  5536. if len(jobs) == 1 and jobs[0] == '':
  5537. jobs = []
  5538. elif runas is not None:
  5539. jobs = self.pbs_api_as('select', user=runas, data=attrib,
  5540. extend=extend)
  5541. else:
  5542. attropl = self.utils.convert_to_attropl(attrib, op=EQ)
  5543. c = self._connect(self.hostname)
  5544. jobs = pbs_selectjob(c, attropl, extend)
  5545. err = self.geterrmsg()
  5546. if err:
  5547. raise PbsSelectError(rc=-1, rv=False, msg=err,
  5548. post=self._disconnect, conn=c)
  5549. self._disconnect(c)
  5550. return jobs
  5551. def selstat(self, select_list, rattrib, runas=None, extend=None):
  5552. """
  5553. stat and filter jobs attributes.
  5554. :param select_list: The filter criteria
  5555. :type select: List
  5556. :param rattrib: The attributes to query
  5557. :type rattrib: List
  5558. :param runas: run as user
  5559. :type runas: str or None
  5560. .. note:: No ``CLI`` counterpart for this call
  5561. """
  5562. attrl = self.utils.convert_to_attrl(rattrib)
  5563. attropl = self.utils.convert_to_attropl(select_list)
  5564. c = self._connect(self.hostname)
  5565. bs = pbs_selstat(c, attropl, attrl, extend)
  5566. self._disconnect(c)
  5567. return bs
  5568. def manager(self, cmd, obj_type, attrib=None, id=None, extend=None,
  5569. expect=False, max_attempts=None, level=logging.INFO,
  5570. sudo=None, runas=None, logerr=True):
  5571. """
  5572. issue a management command to the server, e.g to set an
  5573. attribute
  5574. Returns 0 for Success and non 0 number for Failure
  5575. :param cmd: The command to issue,
  5576. ``MGR_CMD_[SET,UNSET, LIST,...]`` see pbs_ifl.h
  5577. :type cmd: str
  5578. :param obj_type: The type of object to query, one of
  5579. the * objects
  5580. :param attrib: Attributes to operate on, can be a string, a
  5581. list,a dictionary
  5582. :type attrib: str or list or dictionary
  5583. :param id: The name or list of names of the object(s) to act
  5584. upon.
  5585. :type id: str or list
  5586. :param extend: Optional extension to the IFL call. see
  5587. pbs_ifl.h
  5588. :type extend: str or None
  5589. :param expect: If set to True, query the server expecting
  5590. the value to be\ accurately reflected.
  5591. Defaults to False
  5592. :type expect: bool
  5593. :param max_attempts: Sets a maximum number of attempts to
  5594. call expect with.
  5595. :type max_attempts: int
  5596. :param level: logging level
  5597. :param sudo: If True, run the manager command as super user.
  5598. Defaults to None. Some attribute settings
  5599. should be run with sudo set to True, those are
  5600. acl_roots, job_sort_formula, hook operations,
  5601. no_sched_hook_event, in those cases, setting
  5602. sudo to False is only needed for testing
  5603. purposes
  5604. :type sudo: bool
  5605. :param runas: run as user
  5606. :type runas: str
  5607. :param logerr: If False, CLI commands do not log error,
  5608. i.e. silent mode
  5609. :type logerr: bool
  5610. :raises: PbsManagerError
  5611. :returns: On success:
  5612. - if expect is False, return code of qmgr/pbs_manager
  5613. - if expect is True, 0 for success
  5614. :raises: On Error/Failure:
  5615. - PbsManagerError if qmgr/pbs_manager() failed
  5616. - PtlExpectError if expect() failed
  5617. """
  5618. # Currently, expect() doesn't validate the values being set for
  5619. # create operations.
  5620. # For unset operations, expect does not handle attributes that are
  5621. # reset to default after unset.
  5622. # So, only call expect automatically for set and delete operations.
  5623. if cmd in (MGR_CMD_SET, MGR_CMD_DELETE):
  5624. expect = True
  5625. if isinstance(id, str):
  5626. oid = id.split(',')
  5627. else:
  5628. oid = id
  5629. self.logit('manager on ' + self.shortname +
  5630. [' as ' + str(runas), ''][runas is None] + ': ' +
  5631. PBS_CMD_MAP[cmd] + ' ', obj_type, attrib, oid, level=level)
  5632. c = None # connection handle
  5633. if (self.get_op_mode() == PTL_CLI or
  5634. sudo is not None or
  5635. obj_type in (HOOK, PBS_HOOK) or
  5636. (attrib is not None and ('job_sort_formula' in attrib or
  5637. 'acl_roots' in attrib or
  5638. 'no_sched_hook_event' in attrib))):
  5639. execcmd = [PBS_CMD_MAP[cmd], PBS_OBJ_MAP[obj_type]]
  5640. if oid is not None:
  5641. if cmd == MGR_CMD_DELETE and obj_type == NODE and oid[0] == "":
  5642. oid[0] = "@default"
  5643. execcmd += [",".join(oid)]
  5644. if attrib is not None and cmd != MGR_CMD_LIST:
  5645. if cmd == MGR_CMD_IMPORT:
  5646. execcmd += [attrib['content-type'],
  5647. attrib['content-encoding'],
  5648. attrib['input-file']]
  5649. else:
  5650. if isinstance(attrib, (dict, OrderedDict)):
  5651. kvpairs = []
  5652. for k, v in attrib.items():
  5653. if isinstance(v, tuple):
  5654. if v[0] == INCR:
  5655. op = '+='
  5656. elif v[0] == DECR:
  5657. op = '-='
  5658. else:
  5659. msg = 'Invalid operation: %s' % (v[0])
  5660. raise PbsManagerError(rc=1, rv=False,
  5661. msg=msg)
  5662. v = v[1]
  5663. else:
  5664. op = '='
  5665. # handle string arrays as double quotes if
  5666. # not already set:
  5667. if isinstance(v, str) and ',' in v and v[0] != '"':
  5668. v = '"' + v + '"'
  5669. kvpairs += [str(k) + op + str(v)]
  5670. if kvpairs:
  5671. execcmd += [",".join(kvpairs)]
  5672. del kvpairs
  5673. elif isinstance(attrib, list):
  5674. execcmd += [",".join(attrib)]
  5675. elif isinstance(attrib, str):
  5676. execcmd += [attrib]
  5677. if not self.default_pbs_conf or not self.default_client_pbs_conf:
  5678. as_script = True
  5679. else:
  5680. as_script = False
  5681. if not self._is_local or as_script:
  5682. execcmd = '\'' + " ".join(execcmd) + '\''
  5683. else:
  5684. execcmd = " ".join(execcmd)
  5685. # Hooks can only be queried as a privileged user on the host where
  5686. # the server is running, care must be taken to use the appropriate
  5687. # path to qmgr and appropriate escaping sequences
  5688. # VERSION INFO: no_sched_hook_event introduced in 11.3.120 only
  5689. if sudo is None:
  5690. if (obj_type in (HOOK, PBS_HOOK) or
  5691. (attrib is not None and
  5692. ('job_sort_formula' in attrib or
  5693. 'acl_roots' in attrib or
  5694. 'no_sched_hook_event' in attrib))):
  5695. sudo = True
  5696. else:
  5697. sudo = False
  5698. pcmd = [os.path.join(self.pbs_conf['PBS_EXEC'], 'bin', 'qmgr'),
  5699. '-c', execcmd]
  5700. if as_script:
  5701. pcmd = ['PBS_CONF_FILE=' + self.client_pbs_conf_file] + pcmd
  5702. ret = self.du.run_cmd(self.hostname, pcmd, sudo=sudo, runas=runas,
  5703. level=logging.INFOCLI, as_script=as_script,
  5704. logerr=logerr)
  5705. rc = ret['rc']
  5706. # NOTE: workaround the fact that qmgr overloads the return code in
  5707. # cases where the list returned is empty an error flag is set even
  5708. # through there is no error. Handled here by checking if there is
  5709. # no err and out message, in which case return code is set to 0
  5710. if rc != 0 and (ret['out'] == [''] and ret['err'] == ['']):
  5711. rc = 0
  5712. if rc == 0:
  5713. if cmd == MGR_CMD_LIST:
  5714. bsl = self.utils.convert_to_dictlist(ret['out'], attrib,
  5715. mergelines=True)
  5716. self.update_attributes(obj_type, bsl)
  5717. else:
  5718. # Need to rework setting error, this is not thread safe
  5719. self.last_error = ret['err']
  5720. self.last_rc = ret['rc']
  5721. elif runas is not None:
  5722. _data = {'cmd': cmd, 'obj_type': obj_type, 'attrib': attrib,
  5723. 'id': oid}
  5724. rc = self.pbs_api_as('manager', user=runas, data=_data,
  5725. extend=extend)
  5726. else:
  5727. a = self.utils.convert_to_attropl(attrib, cmd)
  5728. c = self._connect(self.hostname)
  5729. rc = 0
  5730. if obj_type == SERVER and oid is None:
  5731. oid = [self.hostname]
  5732. if oid is None:
  5733. # server will run strlen on id, it can not be NULL
  5734. oid = ['']
  5735. if cmd == MGR_CMD_LIST:
  5736. if oid is None:
  5737. bsl = self.status(obj_type, attrib, oid, extend)
  5738. else:
  5739. bsl = None
  5740. for i in oid:
  5741. tmpbsl = self.status(obj_type, attrib, i, extend)
  5742. if tmpbsl is None:
  5743. rc = 1
  5744. else:
  5745. if bsl is None:
  5746. bsl = tmpbsl
  5747. else:
  5748. bsl += tmpbsl
  5749. else:
  5750. rc = 0
  5751. if oid is None:
  5752. rc = pbs_manager(c, cmd, obj_type, i, a, extend)
  5753. else:
  5754. for i in oid:
  5755. tmprc = pbs_manager(c, cmd, obj_type, i, a, extend)
  5756. if tmprc != 0:
  5757. rc = tmprc
  5758. break
  5759. if rc == 0:
  5760. rc = tmprc
  5761. bs_list = []
  5762. if cmd == MGR_CMD_DELETE and oid is not None and rc == 0:
  5763. for i in oid:
  5764. if obj_type == MGR_OBJ_HOOK and i in self.hooks:
  5765. del self.hooks[i]
  5766. if obj_type in (NODE, VNODE) and i in self.nodes:
  5767. del self.nodes[i]
  5768. if obj_type == MGR_OBJ_QUEUE and i in self.queues:
  5769. del self.queues[i]
  5770. if obj_type == MGR_OBJ_RSC and i in self.resources:
  5771. del self.resources[i]
  5772. if obj_type == SCHED and i in self.schedulers:
  5773. del self.schedulers[i]
  5774. elif cmd == MGR_CMD_SET and rc == 0 and id is not None:
  5775. if isinstance(id, list):
  5776. for name in id:
  5777. tbsl = copy.deepcopy(attrib)
  5778. tbsl['name'] = name
  5779. bs_list.append(tbsl)
  5780. self.update_attributes(obj_type, bs_list)
  5781. else:
  5782. tbsl = copy.deepcopy(attrib)
  5783. tbsl['id'] = id
  5784. bs_list.append(tbsl)
  5785. self.update_attributes(obj_type, bs_list)
  5786. elif cmd == MGR_CMD_CREATE and rc == 0:
  5787. if isinstance(id, list):
  5788. for name in id:
  5789. bsl = self.status(obj_type, id=name, extend=extend)
  5790. self.update_attributes(obj_type, bsl)
  5791. else:
  5792. bsl = self.status(obj_type, id=id, extend=extend)
  5793. self.update_attributes(obj_type, bsl)
  5794. if rc != 0:
  5795. raise PbsManagerError(rv=False, rc=rc, msg=self.geterrmsg(),
  5796. post=self._disconnect, conn=c)
  5797. if c is not None:
  5798. self._disconnect(c)
  5799. if expect:
  5800. offset = None
  5801. attrop = PTL_OR
  5802. if obj_type in (NODE, HOST):
  5803. obj_type = VNODE
  5804. if obj_type in (VNODE, QUEUE):
  5805. offset = 0.5
  5806. if cmd in PBS_CMD_TO_OP:
  5807. op = PBS_CMD_TO_OP[cmd]
  5808. else:
  5809. op = EQ
  5810. # If scheduling is set to false then check for
  5811. # state to be idle
  5812. if attrib and isinstance(attrib,
  5813. dict) and 'scheduling' in attrib.keys():
  5814. if str(attrib['scheduling']) in PTL_FALSE:
  5815. if obj_type == MGR_OBJ_SERVER:
  5816. state_val = 'Idle'
  5817. state_attr = ATTR_status
  5818. else: # SCHED object
  5819. state_val = 'idle'
  5820. state_attr = 'state'
  5821. attrib[state_attr] = state_val
  5822. attrop = PTL_AND
  5823. if oid is None:
  5824. self.expect(obj_type, attrib, op=op,
  5825. max_attempts=max_attempts,
  5826. attrop=attrop, offset=offset)
  5827. else:
  5828. for i in oid:
  5829. self.expect(obj_type, attrib, i, op=op,
  5830. max_attempts=max_attempts,
  5831. attrop=attrop, offset=offset)
  5832. rc = 0 # If we've reached here then expect passed, so return 0
  5833. return rc
  5834. def sigjob(self, jobid=None, signal=None, extend=None, runas=None,
  5835. logerr=True):
  5836. """
  5837. Send a signal to a job. Raises ``PbsSignalError`` on error.
  5838. :param jobid: identifier of the job or list of jobs to send
  5839. the signal to
  5840. :type jobid: str or list
  5841. :param signal: The signal to send to the job, see pbs_ifl.h
  5842. :type signal: str or None
  5843. :param extend: extend options
  5844. :param runas: run as user
  5845. :type runas: str or None
  5846. :param logerr: If True (default) logs run_cmd errors
  5847. :type logerr: bool
  5848. :raises: PbsSignalError
  5849. """
  5850. prefix = 'signal on ' + self.shortname
  5851. if runas is not None:
  5852. prefix += ' as ' + str(runas)
  5853. prefix += ': '
  5854. if jobid is not None:
  5855. if not isinstance(jobid, list):
  5856. jobid = jobid.split(',')
  5857. prefix += ', '.join(jobid)
  5858. if signal is not None:
  5859. prefix += ' with signal = ' + str(signal)
  5860. self.logger.info(prefix)
  5861. c = None
  5862. if self.get_op_mode() == PTL_CLI:
  5863. pcmd = [os.path.join(self.client_conf['PBS_EXEC'], 'bin', 'qsig')]
  5864. if signal is not None:
  5865. pcmd += ['-s']
  5866. if signal != PTL_NOARG:
  5867. pcmd += [str(signal)]
  5868. if jobid is not None:
  5869. pcmd += jobid
  5870. if not self.default_client_pbs_conf:
  5871. pcmd = ['PBS_CONF_FILE=' + self.client_pbs_conf_file] + pcmd
  5872. as_script = True
  5873. else:
  5874. as_script = False
  5875. ret = self.du.run_cmd(self.client, pcmd, runas=runas,
  5876. as_script=as_script, level=logging.INFOCLI,
  5877. logerr=logerr)
  5878. rc = ret['rc']
  5879. if ret['err'] != ['']:
  5880. self.last_error = ret['err']
  5881. self.last_rc = rc
  5882. elif runas is not None:
  5883. rc = self.pbs_api_as('sigjob', jobid, runas, data=signal)
  5884. else:
  5885. c = self._connect(self.hostname)
  5886. rc = 0
  5887. for ajob in jobid:
  5888. tmp_rc = pbs_sigjob(c, ajob, signal, extend)
  5889. if tmp_rc != 0:
  5890. rc = tmp_rc
  5891. if rc != 0:
  5892. raise PbsSignalError(rc=rc, rv=False, msg=self.geterrmsg(),
  5893. post=self._disconnect, conn=c)
  5894. if c:
  5895. self._disconnect(c)
  5896. return rc
  5897. def msgjob(self, jobid=None, to_file=None, msg=None, extend=None,
  5898. runas=None, logerr=True):
  5899. """
  5900. Send a message to a job. Raises ``PbsMessageError`` on
  5901. error.
  5902. :param jobid: identifier of the job or list of jobs to
  5903. send the message to
  5904. :type jobid: str or List
  5905. :param msg: The message to send to the job
  5906. :type msg: str or None
  5907. :param to_file: one of ``MSG_ERR`` or ``MSG_OUT`` or
  5908. ``MSG_ERR|MSG_OUT``
  5909. :type to_file: str or None
  5910. :param extend: extend options
  5911. :param runas: run as user
  5912. :type runas: str or None
  5913. :param logerr: If True (default) logs run_cmd errors
  5914. :type logerr: bool
  5915. :raises: PbsMessageError
  5916. """
  5917. prefix = 'msgjob on ' + self.shortname
  5918. if runas is not None:
  5919. prefix += ' as ' + str(runas)
  5920. prefix += ': '
  5921. if jobid is not None:
  5922. if not isinstance(jobid, list):
  5923. jobid = jobid.split(',')
  5924. prefix += ', '.join(jobid)
  5925. if to_file is not None:
  5926. prefix += ' with to_file = '
  5927. if MSG_ERR == to_file:
  5928. prefix += 'MSG_ERR'
  5929. elif MSG_OUT == to_file:
  5930. prefix += 'MSG_OUT'
  5931. elif MSG_OUT | MSG_ERR == to_file:
  5932. prefix += 'MSG_ERR|MSG_OUT'
  5933. else:
  5934. prefix += str(to_file)
  5935. if msg is not None:
  5936. prefix += ' msg = %s' % (str(msg))
  5937. if extend is not None:
  5938. prefix += ' extend = %s' % (str(extend))
  5939. self.logger.info(prefix)
  5940. c = None
  5941. if self.get_op_mode() == PTL_CLI:
  5942. pcmd = [os.path.join(self.client_conf['PBS_EXEC'], 'bin', 'qmsg')]
  5943. if to_file is not None:
  5944. if MSG_ERR == to_file:
  5945. pcmd += ['-E']
  5946. elif MSG_OUT == to_file:
  5947. pcmd += ['-O']
  5948. elif MSG_OUT | MSG_ERR == to_file:
  5949. pcmd += ['-E', '-O']
  5950. else:
  5951. pcmd += ['-' + str(to_file)]
  5952. if msg is not None:
  5953. pcmd += [msg]
  5954. if jobid is not None:
  5955. pcmd += jobid
  5956. if not self.default_client_pbs_conf:
  5957. pcmd = ['PBS_CONF_FILE=' + self.client_pbs_conf_file] + pcmd
  5958. as_script = True
  5959. else:
  5960. as_script = False
  5961. ret = self.du.run_cmd(self.client, pcmd, runas=runas,
  5962. as_script=as_script, level=logging.INFOCLI,
  5963. logerr=logerr)
  5964. rc = ret['rc']
  5965. if ret['err'] != ['']:
  5966. self.last_error = ret['err']
  5967. self.last_rc = rc
  5968. elif runas is not None:
  5969. data = {'msg': msg, 'to_file': to_file}
  5970. rc = self.pbs_api_as('msgjob', jobid, runas, data=data,
  5971. extend=extend)
  5972. else:
  5973. c = self._connect(self.hostname)
  5974. if c < 0:
  5975. return c
  5976. for ajob in jobid:
  5977. tmp_rc = pbs_msgjob(c, ajob, to_file, msg, extend)
  5978. if tmp_rc != 0:
  5979. rc = tmp_rc
  5980. if rc != 0:
  5981. raise PbsMessageError(rc=rc, rv=False, msg=self.geterrmsg(),
  5982. post=self._disconnect, conn=c)
  5983. if c:
  5984. self._disconnect(c)
  5985. return rc
  5986. def alterjob(self, jobid=None, attrib=None, extend=None, runas=None,
  5987. logerr=True):
  5988. """
  5989. Alter attributes associated to a job. Raises
  5990. ``PbsAlterError`` on error.
  5991. :param jobid: identifier of the job or list of jobs to
  5992. operate on
  5993. :type jobid: str or list
  5994. :param attrib: A dictionary of attributes to set
  5995. :type attrib: dictionary
  5996. :param extend: extend options
  5997. :param runas: run as user
  5998. :type runas: str or None
  5999. :param logerr: If False, CLI commands do not log error,
  6000. i.e. silent mode
  6001. :type logerr: bool
  6002. :raises: PbsAlterError
  6003. """
  6004. prefix = 'alter on ' + self.shortname
  6005. if runas is not None:
  6006. prefix += ' as ' + str(runas)
  6007. prefix += ': '
  6008. if jobid is not None:
  6009. if not isinstance(jobid, list):
  6010. jobid = jobid.split(',')
  6011. prefix += ', '.join(jobid)
  6012. if attrib is not None:
  6013. prefix += ' %s' % (str(attrib))
  6014. self.logger.info(prefix)
  6015. c = None
  6016. if self.get_op_mode() == PTL_CLI:
  6017. pcmd = [os.path.join(self.client_conf['PBS_EXEC'], 'bin',
  6018. 'qalter')]
  6019. if attrib is not None:
  6020. _conf = self.default_client_pbs_conf
  6021. pcmd += self.utils.convert_to_cli(attrib, op=IFL_ALTER,
  6022. hostname=self.client,
  6023. dflt_conf=_conf)
  6024. if jobid is not None:
  6025. pcmd += jobid
  6026. if not self.default_client_pbs_conf:
  6027. pcmd = ['PBS_CONF_FILE=' + self.client_pbs_conf_file] + pcmd
  6028. as_script = True
  6029. else:
  6030. as_script = False
  6031. ret = self.du.run_cmd(self.client, pcmd, runas=runas,
  6032. as_script=as_script, level=logging.INFOCLI,
  6033. logerr=logerr)
  6034. rc = ret['rc']
  6035. if ret['err'] != ['']:
  6036. self.last_error = ret['err']
  6037. self.last_rc = rc
  6038. elif runas is not None:
  6039. rc = self.pbs_api_as('alterjob', jobid, runas, data=attrib)
  6040. else:
  6041. c = self._connect(self.hostname)
  6042. if c < 0:
  6043. return c
  6044. a = self.utils.convert_to_attrl(attrib)
  6045. rc = 0
  6046. for ajob in jobid:
  6047. tmp_rc = pbs_alterjob(c, ajob, a, extend)
  6048. if tmp_rc != 0:
  6049. rc = tmp_rc
  6050. if rc != 0:
  6051. raise PbsAlterError(rc=rc, rv=False, msg=self.geterrmsg(),
  6052. post=self._disconnect, conn=c)
  6053. if c:
  6054. self._disconnect(c)
  6055. return rc
  6056. def holdjob(self, jobid=None, holdtype=None, extend=None, runas=None,
  6057. logerr=True):
  6058. """
  6059. Hold a job. Raises ``PbsHoldError`` on error.
  6060. :param jobid: identifier of the job or list of jobs to hold
  6061. :type jobid: str or list
  6062. :param holdtype: The type of hold to put on the job
  6063. :type holdtype: str or None
  6064. :param extend: extend options
  6065. :param runas: run as user
  6066. :type runas: str or None
  6067. :param logerr: If True (default) logs run_cmd errors
  6068. :type logerr: bool
  6069. :raises: PbsHoldError
  6070. """
  6071. prefix = 'holdjob on ' + self.shortname
  6072. if runas is not None:
  6073. prefix += ' as ' + str(runas)
  6074. prefix += ': '
  6075. if jobid is not None:
  6076. if not isinstance(jobid, list):
  6077. jobid = jobid.split(',')
  6078. prefix += ', '.join(jobid)
  6079. if holdtype is not None:
  6080. prefix += ' with hold_list = %s' % (holdtype)
  6081. self.logger.info(prefix)
  6082. c = None
  6083. if self.get_op_mode() == PTL_CLI:
  6084. pcmd = [os.path.join(self.client_conf['PBS_EXEC'], 'bin', 'qhold')]
  6085. if holdtype is not None:
  6086. pcmd += ['-h']
  6087. if holdtype != PTL_NOARG:
  6088. pcmd += [holdtype]
  6089. if jobid is not None:
  6090. pcmd += jobid
  6091. if not self.default_client_pbs_conf:
  6092. pcmd = ['PBS_CONF_FILE=' + self.client_pbs_conf_file] + pcmd
  6093. as_script = True
  6094. else:
  6095. as_script = False
  6096. ret = self.du.run_cmd(self.client, pcmd, runas=runas,
  6097. logerr=logerr, as_script=as_script,
  6098. level=logging.INFOCLI)
  6099. rc = ret['rc']
  6100. if ret['err'] != ['']:
  6101. self.last_error = ret['err']
  6102. self.last_rc = rc
  6103. elif runas is not None:
  6104. rc = self.pbs_api_as('holdjob', jobid, runas, data=holdtype,
  6105. logerr=logerr)
  6106. else:
  6107. c = self._connect(self.hostname)
  6108. if c < 0:
  6109. return c
  6110. rc = 0
  6111. for ajob in jobid:
  6112. tmp_rc = pbs_holdjob(c, ajob, holdtype, extend)
  6113. if tmp_rc != 0:
  6114. rc = tmp_rc
  6115. if rc != 0:
  6116. raise PbsHoldError(rc=rc, rv=False, msg=self.geterrmsg(),
  6117. post=self._disconnect, conn=c)
  6118. if c:
  6119. self._disconnect(c)
  6120. return rc
  6121. def rlsjob(self, jobid, holdtype, extend=None, runas=None, logerr=True):
  6122. """
  6123. Release a job. Raises ``PbsReleaseError`` on error.
  6124. :param jobid: job or list of jobs to release
  6125. :type jobid: str or list
  6126. :param holdtype: The type of hold to release on the job
  6127. :type holdtype: str
  6128. :param extend: extend options
  6129. :param runas: run as user
  6130. :type runas: str or None
  6131. :param logerr: If True (default) logs run_cmd errors
  6132. :type logerr: bool
  6133. :raises: PbsReleaseError
  6134. """
  6135. prefix = 'release on ' + self.shortname
  6136. if runas is not None:
  6137. prefix += ' as ' + str(runas)
  6138. prefix += ': '
  6139. if jobid is not None:
  6140. if not isinstance(jobid, list):
  6141. jobid = jobid.split(',')
  6142. prefix += ', '.join(jobid)
  6143. if holdtype is not None:
  6144. prefix += ' with hold_list = %s' % (holdtype)
  6145. self.logger.info(prefix)
  6146. c = None
  6147. if self.get_op_mode() == PTL_CLI:
  6148. pcmd = [os.path.join(self.client_conf['PBS_EXEC'], 'bin', 'qrls')]
  6149. if holdtype is not None:
  6150. pcmd += ['-h']
  6151. if holdtype != PTL_NOARG:
  6152. pcmd += [holdtype]
  6153. if jobid is not None:
  6154. pcmd += jobid
  6155. if not self.default_client_pbs_conf:
  6156. pcmd = ['PBS_CONF_FILE=' + self.client_pbs_conf_file] + pcmd
  6157. as_script = True
  6158. else:
  6159. as_script = False
  6160. ret = self.du.run_cmd(self.client, pcmd, runas=runas,
  6161. as_script=as_script, level=logging.INFOCLI,
  6162. logerr=logerr)
  6163. rc = ret['rc']
  6164. if ret['err'] != ['']:
  6165. self.last_error = ret['err']
  6166. self.last_rc = rc
  6167. elif runas is not None:
  6168. rc = self.pbs_api_as('rlsjob', jobid, runas, data=holdtype)
  6169. else:
  6170. c = self._connect(self.hostname)
  6171. if c < 0:
  6172. return c
  6173. rc = 0
  6174. for ajob in jobid:
  6175. tmp_rc = pbs_rlsjob(c, ajob, holdtype, extend)
  6176. if tmp_rc != 0:
  6177. rc = tmp_rc
  6178. if rc != 0:
  6179. raise PbsHoldError(rc=rc, rv=False, msg=self.geterrmsg(),
  6180. post=self._disconnect, conn=c)
  6181. if c:
  6182. self._disconnect(c)
  6183. return rc
  6184. def rerunjob(self, jobid=None, extend=None, runas=None, logerr=True):
  6185. """
  6186. Rerun a job. Raises ``PbsRerunError`` on error.
  6187. :param jobid: job or list of jobs to release
  6188. :type jobid: str or list
  6189. :param extend: extend options
  6190. :param runas: run as user
  6191. :type runas: str or None
  6192. :param logerr: If True (default) logs run_cmd errors
  6193. :type logerr: bool
  6194. :raises: PbsRerunError
  6195. """
  6196. prefix = 'rerun on ' + self.shortname
  6197. if runas is not None:
  6198. prefix += ' as ' + str(runas)
  6199. prefix += ': '
  6200. if jobid is not None:
  6201. if not isinstance(jobid, list):
  6202. jobid = jobid.split(',')
  6203. prefix += ', '.join(jobid)
  6204. if extend is not None:
  6205. prefix += extend
  6206. self.logger.info(prefix)
  6207. c = None
  6208. if self.get_op_mode() == PTL_CLI:
  6209. pcmd = [os.path.join(self.client_conf['PBS_EXEC'], 'bin',
  6210. 'qrerun')]
  6211. if extend:
  6212. pcmd += ['-W', extend]
  6213. if jobid is not None:
  6214. pcmd += jobid
  6215. if not self.default_client_pbs_conf:
  6216. pcmd = ['PBS_CONF_FILE=' + self.client_pbs_conf_file] + pcmd
  6217. as_script = True
  6218. else:
  6219. as_script = False
  6220. ret = self.du.run_cmd(self.client, pcmd, runas=runas,
  6221. as_script=as_script, level=logging.INFOCLI,
  6222. logerr=logerr)
  6223. rc = ret['rc']
  6224. if ret['err'] != ['']:
  6225. self.last_error = ret['err']
  6226. self.last_rc = rc
  6227. elif runas is not None:
  6228. rc = self.pbs_api_as('rerunjob', jobid, runas, extend=extend)
  6229. else:
  6230. c = self._connect(self.hostname)
  6231. if c < 0:
  6232. return c
  6233. rc = 0
  6234. for ajob in jobid:
  6235. tmp_rc = pbs_rerunjob(c, ajob, extend)
  6236. if tmp_rc != 0:
  6237. rc = tmp_rc
  6238. if rc != 0:
  6239. raise PbsRerunError(rc=rc, rv=False, msg=self.geterrmsg(),
  6240. post=self._disconnect, conn=c)
  6241. if c:
  6242. self._disconnect(c)
  6243. return rc
  6244. def orderjob(self, jobid1=None, jobid2=None, extend=None, runas=None,
  6245. logerr=True):
  6246. """
  6247. reorder position of ``jobid1`` and ``jobid2``. Raises
  6248. ``PbsOrderJob`` on error.
  6249. :param jobid1: first jobid
  6250. :type jobid1: str or None
  6251. :param jobid2: second jobid
  6252. :type jobid2: str or None
  6253. :param extend: extend options
  6254. :param runas: run as user
  6255. :type runas: str or None
  6256. :param logerr: If True (default) logs run_cmd errors
  6257. :type logerr: bool
  6258. :raises: PbsOrderJob
  6259. """
  6260. prefix = 'orderjob on ' + self.shortname
  6261. if runas is not None:
  6262. prefix += ' as ' + str(runas)
  6263. prefix += ': '
  6264. prefix += str(jobid1) + ', ' + str(jobid2)
  6265. if extend is not None:
  6266. prefix += ' ' + str(extend)
  6267. self.logger.info(prefix)
  6268. c = None
  6269. if self.get_op_mode() == PTL_CLI:
  6270. pcmd = [os.path.join(self.client_conf['PBS_EXEC'], 'bin',
  6271. 'qorder')]
  6272. if jobid1 is not None:
  6273. pcmd += [jobid1]
  6274. if jobid2 is not None:
  6275. pcmd += [jobid2]
  6276. if not self.default_client_pbs_conf:
  6277. pcmd = ['PBS_CONF_FILE=' + self.client_pbs_conf_file] + pcmd
  6278. as_script = True
  6279. else:
  6280. as_script = False
  6281. ret = self.du.run_cmd(self.client, pcmd, runas=runas,
  6282. as_script=as_script, level=logging.INFOCLI,
  6283. logerr=logerr)
  6284. rc = ret['rc']
  6285. if ret['err'] != ['']:
  6286. self.last_error = ret['err']
  6287. self.last_rc = rc
  6288. elif runas is not None:
  6289. rc = self.pbs_api_as('orderjob', jobid1, runas, data=jobid2,
  6290. extend=extend)
  6291. else:
  6292. c = self._connect(self.hostname)
  6293. if c < 0:
  6294. return c
  6295. rc = pbs_orderjob(c, jobid1, jobid2, extend)
  6296. if rc != 0:
  6297. raise PbsOrderError(rc=rc, rv=False, msg=self.geterrmsg(),
  6298. post=self._disconnect, conn=c)
  6299. if c:
  6300. self._disconnect(c)
  6301. return rc
  6302. def runjob(self, jobid=None, location=None, async=False, extend=None,
  6303. runas=None, logerr=False):
  6304. """
  6305. Run a job on given nodes. Raises ``PbsRunError`` on error.
  6306. :param jobid: job or list of jobs to run
  6307. :type jobid: str or list
  6308. :param location: An execvnode on which to run the job
  6309. :type location: str or None
  6310. :param async: If true the call will return immediately
  6311. assuming success.
  6312. :type async: bool
  6313. :param extend: extend options
  6314. :param runas: run as user
  6315. :type runas: str or None
  6316. :param logerr: If True (default) logs run_cmd errors
  6317. :type logerr: bool
  6318. :raises: PbsRunError
  6319. """
  6320. if async:
  6321. prefix = 'Async run on ' + self.shortname
  6322. else:
  6323. prefix = 'run on ' + self.shortname
  6324. if runas is not None:
  6325. prefix += ' as ' + str(runas)
  6326. prefix += ': '
  6327. if jobid is not None:
  6328. if not isinstance(jobid, list):
  6329. jobid = jobid.split(',')
  6330. prefix += ', '.join(jobid)
  6331. if location is not None:
  6332. prefix += ' with location = %s' % (location)
  6333. self.logger.info(prefix)
  6334. if self.has_diag:
  6335. return 0
  6336. c = None
  6337. if self.get_op_mode() == PTL_CLI:
  6338. pcmd = [os.path.join(self.client_conf['PBS_EXEC'], 'bin', 'qrun')]
  6339. if async:
  6340. pcmd += ['-a']
  6341. if location is not None:
  6342. pcmd += ['-H']
  6343. if location != PTL_NOARG:
  6344. pcmd += [location]
  6345. if jobid:
  6346. pcmd += jobid
  6347. if not self.default_client_pbs_conf:
  6348. pcmd = ['PBS_CONF_FILE=' + self.client_pbs_conf_file] + pcmd
  6349. as_script = True
  6350. else:
  6351. as_script = False
  6352. ret = self.du.run_cmd(self.client, pcmd, runas=runas,
  6353. as_script=as_script, level=logging.INFOCLI,
  6354. logerr=logerr)
  6355. rc = ret['rc']
  6356. if ret['err'] != ['']:
  6357. self.last_error = ret['err']
  6358. self.last_rc = rc
  6359. elif runas is not None:
  6360. rc = self.pbs_api_as(
  6361. 'runjob', jobid, runas, data=location, extend=extend)
  6362. else:
  6363. c = self._connect(self.hostname)
  6364. if c < 0:
  6365. return c
  6366. rc = 0
  6367. for ajob in jobid:
  6368. if async:
  6369. tmp_rc = pbs_asyrunjob(c, ajob, location, extend)
  6370. else:
  6371. tmp_rc = pbs_runjob(c, ajob, location, extend)
  6372. if tmp_rc != 0:
  6373. rc = tmp_rc
  6374. if rc != 0:
  6375. raise PbsRunError(rc=rc, rv=False, msg=self.geterrmsg(),
  6376. post=self._disconnect, conn=c)
  6377. if c:
  6378. self._disconnect(c)
  6379. return rc
  6380. def movejob(self, jobid=None, destination=None, extend=None, runas=None,
  6381. logerr=True):
  6382. """
  6383. Move a job or list of job ids to a given destination queue.
  6384. Raises ``PbsMoveError`` on error.
  6385. :param jobid: A job or list of job ids to move
  6386. :type jobid: str or list
  6387. :param destination: The destination queue@server
  6388. :type destination: str or None
  6389. :param extend: extend options
  6390. :param runas: run as user
  6391. :type runas: str or None
  6392. :param logerr: If True (default) logs run_cmd errors
  6393. :type logerr: bool
  6394. :raises: PbsMoveError
  6395. """
  6396. prefix = 'movejob on ' + self.shortname
  6397. if runas is not None:
  6398. prefix += ' as ' + str(runas)
  6399. prefix += ': '
  6400. if jobid is not None:
  6401. if not isinstance(jobid, list):
  6402. jobid = jobid.split(',')
  6403. prefix += ', '.join(jobid)
  6404. if destination is not None:
  6405. prefix += ' destination = %s' % (destination)
  6406. self.logger.info(prefix)
  6407. c = None
  6408. rc = 0
  6409. if self.get_op_mode() == PTL_CLI:
  6410. pcmd = [os.path.join(self.client_conf['PBS_EXEC'], 'bin', 'qmove')]
  6411. if destination is not None:
  6412. pcmd += [destination]
  6413. if jobid is not None:
  6414. pcmd += jobid
  6415. if not self.default_client_pbs_conf:
  6416. pcmd = ['PBS_CONF_FILE=' + self.client_pbs_conf_file] + pcmd
  6417. as_script = True
  6418. else:
  6419. as_script = False
  6420. ret = self.du.run_cmd(self.client, pcmd, runas=runas,
  6421. logerr=logerr, as_script=as_script,
  6422. level=logging.INFOCLI)
  6423. rc = ret['rc']
  6424. if ret['err'] != ['']:
  6425. self.last_error = ret['err']
  6426. self.last_rc = rc
  6427. elif runas is not None:
  6428. rc = self.pbs_api_as('movejob', jobid, runas, data=destination,
  6429. extend=extend)
  6430. else:
  6431. c = self._connect(self.hostname)
  6432. if c < 0:
  6433. return c
  6434. for ajob in jobid:
  6435. tmp_rc = pbs_movejob(c, ajob, destination, extend)
  6436. if tmp_rc != 0:
  6437. rc = tmp_rc
  6438. if rc != 0:
  6439. raise PbsMoveError(rc=rc, rv=False, msg=self.geterrmsg(),
  6440. post=self._disconnect, conn=c)
  6441. if c:
  6442. self._disconnect(c)
  6443. return rc
  6444. def qterm(self, manner=None, extend=None, server_name=None, runas=None,
  6445. logerr=True):
  6446. """
  6447. Terminate the ``pbs_server`` daemon
  6448. :param manner: one of ``(SHUT_IMMEDIATE | SHUT_DELAY |
  6449. SHUT_QUICK)`` and can be\
  6450. combined with SHUT_WHO_SCHED, SHUT_WHO_MOM,
  6451. SHUT_WHO_SECDRY, \
  6452. SHUT_WHO_IDLESECDRY, SHUT_WHO_SECDONLY. \
  6453. :param extend: extend options
  6454. :param server_name: name of the pbs server
  6455. :type server_name: str or None
  6456. :param runas: run as user
  6457. :type runas: str or None
  6458. :param logerr: If True (default) logs run_cmd errors
  6459. :type logerr: bool
  6460. :raises: PbsQtermError
  6461. """
  6462. prefix = 'terminate ' + self.shortname
  6463. if runas is not None:
  6464. prefix += ' as ' + str(runas)
  6465. prefix += ': with manner '
  6466. attrs = manner
  6467. if attrs is None:
  6468. prefix += "None "
  6469. elif isinstance(attrs, str):
  6470. prefix += attrs
  6471. else:
  6472. if ((attrs & SHUT_QUICK) == SHUT_QUICK):
  6473. prefix += "quick "
  6474. if ((attrs & SHUT_IMMEDIATE) == SHUT_IMMEDIATE):
  6475. prefix += "immediate "
  6476. if ((attrs & SHUT_DELAY) == SHUT_DELAY):
  6477. prefix += "delay "
  6478. if ((attrs & SHUT_WHO_SCHED) == SHUT_WHO_SCHED):
  6479. prefix += "schedular "
  6480. if ((attrs & SHUT_WHO_MOM) == SHUT_WHO_MOM):
  6481. prefix += "mom "
  6482. if ((attrs & SHUT_WHO_SECDRY) == SHUT_WHO_SECDRY):
  6483. prefix += "secondary server "
  6484. if ((attrs & SHUT_WHO_IDLESECDRY) == SHUT_WHO_IDLESECDRY):
  6485. prefix += "idle secondary "
  6486. if ((attrs & SHUT_WHO_SECDONLY) == SHUT_WHO_SECDONLY):
  6487. prefix += "shoutdown secondary only "
  6488. self.logger.info(prefix)
  6489. if self.has_diag:
  6490. return 0
  6491. c = None
  6492. rc = 0
  6493. if self.get_op_mode() == PTL_CLI:
  6494. pcmd = [os.path.join(self.client_conf['PBS_EXEC'], 'bin', 'qterm')]
  6495. _conf = self.default_client_pbs_conf
  6496. pcmd += self.utils.convert_to_cli(manner, op=IFL_TERMINATE,
  6497. hostname=self.hostname,
  6498. dflt_conf=_conf)
  6499. if server_name is not None:
  6500. pcmd += [server_name]
  6501. if not self.default_client_pbs_conf:
  6502. pcmd = ['PBS_CONF_FILE=' + self.client_pbs_conf_file] + pcmd
  6503. as_script = True
  6504. else:
  6505. as_script = False
  6506. ret = self.du.run_cmd(self.client, pcmd, runas=runas,
  6507. level=logging.INFOCLI, as_script=as_script)
  6508. rc = ret['rc']
  6509. if ret['err'] != ['']:
  6510. self.last_error = ret['err']
  6511. self.last_rc = rc
  6512. elif runas is not None:
  6513. attrs = {'manner': manner, 'server_name': server_name}
  6514. rc = self.pbs_api_as('terminate', None, runas, data=attrs,
  6515. extend=extend)
  6516. else:
  6517. if server_name is None:
  6518. server_name = self.hostname
  6519. c = self._connect(self.hostname)
  6520. rc = pbs_terminate(c, manner, extend)
  6521. if rc != 0:
  6522. raise PbsQtermError(rc=rc, rv=False, msg=self.geterrmsg(),
  6523. post=self._disconnect, conn=c, force=True)
  6524. if c:
  6525. self._disconnect(c, force=True)
  6526. return rc
  6527. teminate = qterm
  6528. def geterrmsg(self):
  6529. """
  6530. Get the error message
  6531. """
  6532. mode = self.get_op_mode()
  6533. if mode == PTL_CLI:
  6534. return self.last_error
  6535. elif self._conn is not None and self._conn >= 0:
  6536. m = pbs_geterrmsg(self._conn)
  6537. if m is not None:
  6538. m = m.split('\n')
  6539. return m
  6540. #
  6541. # End IFL Wrappers
  6542. #
  6543. def qdisable(self, queue=None, runas=None, logerr=True):
  6544. """
  6545. Disable queue. ``CLI`` mode only
  6546. :param queue: The name of the queue or list of queue to
  6547. disable
  6548. :type queue: str or list
  6549. :param runas: Optional name of user to run command as
  6550. :type runas: str or None
  6551. :param logerr: Set to False ot disable logging command
  6552. errors.Defaults to True.
  6553. :type logerr: bool
  6554. :raises: PbsQdisableError
  6555. """
  6556. prefix = 'qdisable on ' + self.shortname
  6557. if runas is not None:
  6558. prefix += ' as ' + str(runas)
  6559. prefix += ': '
  6560. if queue is not None:
  6561. if not isinstance(queue, list):
  6562. queue = queue.split(',')
  6563. prefix += ', '.join(queue)
  6564. self.logger.info(prefix)
  6565. if self.get_op_mode() == PTL_CLI:
  6566. pcmd = [os.path.join(self.client_conf['PBS_EXEC'], 'bin',
  6567. 'qdisable')]
  6568. if queue is not None:
  6569. pcmd += queue
  6570. if not self.default_client_pbs_conf:
  6571. pcmd = ['PBS_CONF_FILE=' + self.client_pbs_conf_file] + pcmd
  6572. as_script = True
  6573. else:
  6574. as_script = False
  6575. ret = self.du.run_cmd(self.client, pcmd, runas=runas,
  6576. as_script=as_script, level=logging.INFOCLI,
  6577. logerr=logerr)
  6578. if ret['err'] != ['']:
  6579. self.last_error = ret['err']
  6580. self.last_rc = ret['rc']
  6581. if self.last_rc != 0:
  6582. raise PbsQdisableError(rc=self.last_rc, rv=False,
  6583. msg=self.last_error)
  6584. else:
  6585. _msg = 'qdisable: currently not supported in API mode'
  6586. raise PbsQdisableError(rv=False, rc=1, msg=_msg)
  6587. def qenable(self, queue=None, runas=None, logerr=True):
  6588. """
  6589. Enable queue. ``CLI`` mode only
  6590. :param queue: The name of the queue or list of queue to
  6591. enable
  6592. :type queue: str or list
  6593. :param runas: Optional name of user to run command as
  6594. :type runas: str or None
  6595. :param logerr: Set to False ot disable logging command
  6596. errors.Defaults to True.
  6597. :type logerr: bool
  6598. :raises: PbsQenableError
  6599. """
  6600. prefix = 'qenable on ' + self.shortname
  6601. if runas is not None:
  6602. prefix += ' as ' + str(runas)
  6603. prefix += ': '
  6604. if queue is not None:
  6605. if not isinstance(queue, list):
  6606. queue = queue.split(',')
  6607. prefix += ', '.join(queue)
  6608. self.logger.info(prefix)
  6609. if self.get_op_mode() == PTL_CLI:
  6610. pcmd = [os.path.join(self.client_conf['PBS_EXEC'], 'bin',
  6611. 'qenable')]
  6612. if queue is not None:
  6613. pcmd += queue
  6614. if not self.default_client_pbs_conf:
  6615. pcmd = ['PBS_CONF_FILE=' + self.client_pbs_conf_file] + pcmd
  6616. as_script = True
  6617. else:
  6618. as_script = False
  6619. ret = self.du.run_cmd(self.client, pcmd, runas=runas,
  6620. as_script=as_script, level=logging.INFOCLI,
  6621. logerr=logerr)
  6622. if ret['err'] != ['']:
  6623. self.last_error = ret['err']
  6624. self.last_rc = ret['rc']
  6625. if self.last_rc != 0:
  6626. raise PbsQenableError(rc=self.last_rc, rv=False,
  6627. msg=self.last_error)
  6628. else:
  6629. _msg = 'qenable: currently not supported in API mode'
  6630. raise PbsQenableError(rv=False, rc=1, msg=_msg)
  6631. def qstart(self, queue=None, runas=None, logerr=True):
  6632. """
  6633. Start queue. ``CLI`` mode only
  6634. :param queue: The name of the queue or list of queue
  6635. to start
  6636. :type queue: str or list
  6637. :param runas: Optional name of user to run command as
  6638. :type runas: str or None
  6639. :param logerr: Set to False ot disable logging command
  6640. errors.Defaults to True.
  6641. :type logerr: bool
  6642. :raises: PbsQstartError
  6643. """
  6644. prefix = 'qstart on ' + self.shortname
  6645. if runas is not None:
  6646. prefix += ' as ' + str(runas)
  6647. prefix += ': '
  6648. if queue is not None:
  6649. if not isinstance(queue, list):
  6650. queue = queue.split(',')
  6651. prefix += ', '.join(queue)
  6652. self.logger.info(prefix)
  6653. if self.get_op_mode() == PTL_CLI:
  6654. pcmd = [os.path.join(self.client_conf['PBS_EXEC'], 'bin',
  6655. 'qstart')]
  6656. if queue is not None:
  6657. pcmd += queue
  6658. if not self.default_client_pbs_conf:
  6659. pcmd = ['PBS_CONF_FILE=' + self.client_pbs_conf_file] + pcmd
  6660. as_script = True
  6661. else:
  6662. as_script = False
  6663. ret = self.du.run_cmd(self.client, pcmd, runas=runas,
  6664. as_script=as_script, level=logging.INFOCLI,
  6665. logerr=logerr)
  6666. if ret['err'] != ['']:
  6667. self.last_error = ret['err']
  6668. self.last_rc = ret['rc']
  6669. if self.last_rc != 0:
  6670. raise PbsQstartError(rc=self.last_rc, rv=False,
  6671. msg=self.last_error)
  6672. else:
  6673. _msg = 'qstart: currently not supported in API mode'
  6674. raise PbsQstartError(rv=False, rc=1, msg=_msg)
  6675. def qstop(self, queue=None, runas=None, logerr=True):
  6676. """
  6677. Stop queue. ``CLI`` mode only
  6678. :param queue: The name of the queue or list of queue to stop
  6679. :type queue: str or list
  6680. :param runas: Optional name of user to run command as
  6681. :type runas: str or None
  6682. :param logerr: Set to False ot disable logging command errors.
  6683. Defaults to True.
  6684. :type logerr: bool
  6685. :raises: PbsQstopError
  6686. """
  6687. prefix = 'qstop on ' + self.shortname
  6688. if runas is not None:
  6689. prefix += ' as ' + str(runas)
  6690. prefix += ': '
  6691. if queue is not None:
  6692. if not isinstance(queue, list):
  6693. queue = queue.split(',')
  6694. prefix += ', '.join(queue)
  6695. self.logger.info(prefix)
  6696. if self.get_op_mode() == PTL_CLI:
  6697. pcmd = [os.path.join(self.client_conf['PBS_EXEC'], 'bin',
  6698. 'qstop')]
  6699. if queue is not None:
  6700. pcmd += queue
  6701. if not self.default_client_pbs_conf:
  6702. pcmd = ['PBS_CONF_FILE=' + self.client_pbs_conf_file] + pcmd
  6703. as_script = True
  6704. else:
  6705. as_script = False
  6706. ret = self.du.run_cmd(self.client, pcmd, runas=runas,
  6707. as_script=as_script, level=logging.INFOCLI,
  6708. logerr=logerr)
  6709. if ret['err'] != ['']:
  6710. self.last_error = ret['err']
  6711. self.last_rc = ret['rc']
  6712. if self.last_rc != 0:
  6713. raise PbsQstopError(rc=self.last_rc, rv=False,
  6714. msg=self.last_error)
  6715. else:
  6716. _msg = 'qstop: currently not supported in API mode'
  6717. raise PbsQstopError(rv=False, rc=1, msg=_msg)
  6718. def parse_resources(self):
  6719. """
  6720. Parse server resources as defined in the resourcedef file
  6721. Populates instance variable self.resources
  6722. :returns: The resources as a dictionary
  6723. """
  6724. if not self.has_diag:
  6725. self.manager(MGR_CMD_LIST, RSC)
  6726. return self.resources
  6727. def remove_resource(self, name):
  6728. """
  6729. Remove an entry from resourcedef
  6730. :param name: The name of the resource to remove
  6731. :type name: str
  6732. :param restart: Whether to restart the server or not.
  6733. Applicable to update_mode 'file'
  6734. operations only.
  6735. :param update_mode: one of 'file' or 'auto' (the default).
  6736. If 'file', updates the resourcedef file
  6737. only and will not use the qmgr
  6738. operations on resources introduced in
  6739. 12.3. If 'auto', will automatically
  6740. handle the update on resourcedef or
  6741. using qmgr based on the version of the
  6742. Server.
  6743. """
  6744. self.parse_resources()
  6745. if not self.has_diag:
  6746. if name in self.resources:
  6747. self.manager(MGR_CMD_DELETE, RSC, id=name)
  6748. def add_resource(self, name, type=None, flag=None):
  6749. """
  6750. Define a server resource
  6751. :param name: The name of the resource to add to the
  6752. resourcedef file
  6753. :type name: str
  6754. :param type: The type of the resource, one of string,
  6755. long, boolean, float
  6756. :param flag: The target of the resource, one of n, h, q,
  6757. or none
  6758. :type flag: str or None
  6759. :param restart: Whether to restart the server after adding
  6760. a resource.Applicable to update_mode 'file'
  6761. operations only.
  6762. :param update_mode: one of 'file' or 'auto' (the default).
  6763. If 'file', updates the resourcedef file
  6764. only and will not use the qmgr
  6765. operations on resources introduced in
  6766. 12.3. If 'auto', will automatically
  6767. handle the update on resourcedef or
  6768. using qmgr based on the version of the
  6769. Server.
  6770. :returns: True on success False on error
  6771. """
  6772. rv = self.parse_resources()
  6773. if rv is None:
  6774. return False
  6775. resource_exists = False
  6776. if name in self.resources:
  6777. msg = [self.logprefix + "resource " + name]
  6778. if type:
  6779. msg += ["type: " + type]
  6780. if flag:
  6781. msg += ["flag: " + flag]
  6782. msg += [" already defined"]
  6783. self.logger.info(" ".join(msg))
  6784. (t, f) = (self.resources[name].type, self.resources[name].flag)
  6785. if type == t and flag == f:
  6786. return True
  6787. self.logger.info("resource: redefining resource " + name +
  6788. " type: " + str(type) + " and flag: " + str(flag))
  6789. del self.resources[name]
  6790. resource_exists = True
  6791. r = Resource(name, type, flag)
  6792. self.resources[name] = r
  6793. a = {}
  6794. if type:
  6795. a['type'] = type
  6796. if flag:
  6797. a['flag'] = flag
  6798. if resource_exists:
  6799. self.manager(MGR_CMD_SET, RSC, a, id=name)
  6800. else:
  6801. self.manager(MGR_CMD_CREATE, RSC, a, id=name)
  6802. return True
  6803. def write_resourcedef(self, resources=None, filename=None, restart=True):
  6804. """
  6805. Write into resource def file
  6806. :param resources: PBS resources
  6807. :type resources: dictionary
  6808. :param filename: resourcedef file name
  6809. :type filename: str or None
  6810. """
  6811. if resources is None:
  6812. resources = self.resources
  6813. if isinstance(resources, Resource):
  6814. resources = {resources.name: resources}
  6815. fn = self.du.create_temp_file()
  6816. with open(fn, 'w+') as f:
  6817. for r in resources.values():
  6818. f.write(r.attributes['id'])
  6819. if r.attributes['type'] is not None:
  6820. f.write(' type=' + r.attributes['type'])
  6821. if r.attributes['flag'] is not None:
  6822. f.write(' flag=' + r.attributes['flag'])
  6823. f.write('\n')
  6824. if filename is None:
  6825. dest = os.path.join(self.pbs_conf['PBS_HOME'], 'server_priv',
  6826. 'resourcedef')
  6827. else:
  6828. dest = filename
  6829. self.du.run_copy(self.hostname, fn, dest, sudo=True,
  6830. preserve_permission=False)
  6831. os.remove(fn)
  6832. if restart:
  6833. return self.restart()
  6834. return True
  6835. def parse_resourcedef(self, file=None):
  6836. """
  6837. Parse an arbitrary resource definition file passed as
  6838. input and return a dictionary of resources
  6839. :param file: resource definition file
  6840. :type file: str or None
  6841. :returns: Dictionary of resource
  6842. :raises: PbsResourceError
  6843. """
  6844. if file is None:
  6845. file = os.path.join(self.pbs_conf['PBS_HOME'], 'server_priv',
  6846. 'resourcedef')
  6847. ret = self.du.cat(self.hostname, file, logerr=False, sudo=True)
  6848. if ret['rc'] != 0 or len(ret['out']) == 0:
  6849. # Most probable error is that file does not exist, we'll let it
  6850. # be created
  6851. return {}
  6852. resources = {}
  6853. lines = ret['out']
  6854. try:
  6855. for l in lines:
  6856. l = l.strip()
  6857. if l == '' or l.startswith('#'):
  6858. continue
  6859. name = None
  6860. rtype = None
  6861. flag = None
  6862. res = l.split()
  6863. e0 = res[0]
  6864. if len(res) > 1:
  6865. e1 = res[1].split('=')
  6866. else:
  6867. e1 = None
  6868. if len(res) > 2:
  6869. e2 = res[2].split('=')
  6870. else:
  6871. e2 = None
  6872. if e1 is not None and e1[0] == 'type':
  6873. rtype = e1[1]
  6874. elif e2 is not None and e2[0] == 'type':
  6875. rtype = e2[1]
  6876. if e1 is not None and e1[0] == 'flag':
  6877. flag = e1[0]
  6878. elif e2 is not None and e2[0] == 'flag':
  6879. flag = e2[1]
  6880. name = e0
  6881. r = Resource(name, rtype, flag)
  6882. resources[name] = r
  6883. except:
  6884. raise PbsResourceError(rc=1, rv=False,
  6885. msg="error in parse_resources")
  6886. return resources
  6887. def pbs_api_as(self, cmd=None, obj=None, user=None, **kwargs):
  6888. """
  6889. Generic handler to run an ``API`` call impersonating
  6890. a given user.This method is only used for impersonation
  6891. over the ``API`` because ``CLI`` impersonation takes place
  6892. through the generic ``DshUtils`` run_cmd mechanism.
  6893. :param cmd: PBS command
  6894. :type cmd: str or None
  6895. :param user: PBS user or current user
  6896. :type user: str or None
  6897. :raises: eval
  6898. """
  6899. fn = None
  6900. objid = None
  6901. _data = None
  6902. if user is None:
  6903. user = self.du.get_current_user()
  6904. else:
  6905. # user may be a PbsUser object, cast it to string for the remainder
  6906. # of the function
  6907. user = str(user)
  6908. if cmd == 'submit':
  6909. if obj is None:
  6910. return None
  6911. _data = copy.copy(obj)
  6912. # the following attributes cause problems 'pickling',
  6913. # since they are not needed we unset them
  6914. _data.attrl = None
  6915. _data.attropl = None
  6916. _data.logger = None
  6917. _data.utils = None
  6918. elif cmd in ('alterjob', 'holdjob', 'sigjob', 'msgjob', 'rlsjob',
  6919. 'rerunjob', 'orderjob', 'runjob', 'movejob',
  6920. 'select', 'delete', 'status', 'manager', 'terminate',
  6921. 'deljob', 'delresv', 'alterresv'):
  6922. objid = obj
  6923. if 'data' in kwargs:
  6924. _data = kwargs['data']
  6925. if _data is not None:
  6926. fn = self.du.create_temp_file()
  6927. with open(fn, 'w+b') as tmpfile:
  6928. cPickle.dump(_data, tmpfile)
  6929. os.chmod(fn, 0755)
  6930. if self._is_local:
  6931. os.chdir(tempfile.gettempdir())
  6932. else:
  6933. self.du.run_copy(self.hostname, fn, fn)
  6934. if not self._is_local:
  6935. p_env = '"import os; print os.environ[\'PTL_EXEC\']"'
  6936. ret = self.du.run_cmd(self.hostname, ['python', '-c', p_env],
  6937. logerr=False)
  6938. if ret['out']:
  6939. runcmd = [os.path.join(ret['out'][0], 'pbs_as')]
  6940. else:
  6941. runcmd = ['pbs_as']
  6942. elif 'PTL_EXEC' in os.environ:
  6943. runcmd = [os.path.join(os.environ['PTL_EXEC'], 'pbs_as')]
  6944. else:
  6945. runcmd = ['pbs_as']
  6946. runcmd += ['-c', cmd, '-u', user]
  6947. if objid is not None:
  6948. runcmd += ['-o']
  6949. if isinstance(objid, list):
  6950. runcmd += [','.join(objid)]
  6951. else:
  6952. runcmd += [objid]
  6953. if fn is not None:
  6954. runcmd += ['-f', fn]
  6955. if 'hostname' in kwargs:
  6956. hostname = kwargs['hostname']
  6957. else:
  6958. hostname = self.hostname
  6959. runcmd += ['-s', hostname]
  6960. if 'extend' in kwargs and kwargs['extend'] is not None:
  6961. runcmd += ['-e', kwargs['extend']]
  6962. ret = self.du.run_cmd(self.hostname, runcmd, logerr=False, runas=user)
  6963. out = ret['out']
  6964. if ret['err']:
  6965. if cmd in CMD_ERROR_MAP:
  6966. m = CMD_ERROR_MAP[cmd]
  6967. if m in ret['err'][0]:
  6968. if fn is not None:
  6969. os.remove(fn)
  6970. if not self._is_local:
  6971. self.du.rm(self.hostname, fn)
  6972. raise eval(str(ret['err'][0]))
  6973. self.logger.debug('err: ' + str(ret['err']))
  6974. if fn is not None:
  6975. os.remove(fn)
  6976. if not self._is_local:
  6977. self.du.rm(self.hostname, fn)
  6978. if cmd == 'submit':
  6979. if out:
  6980. return out[0].strip()
  6981. else:
  6982. return None
  6983. elif cmd in ('alterjob', 'holdjob', 'sigjob', 'msgjob', 'rlsjob',
  6984. 'rerunjob', 'orderjob', 'runjob', 'movejob', 'delete',
  6985. 'terminate', 'alterresv'):
  6986. if ret['out']:
  6987. return int(ret['out'][0])
  6988. else:
  6989. return 1
  6990. elif cmd in ('manager', 'select', 'status'):
  6991. return eval(out[0])
  6992. def alterresv(self, resvid, attrib, extend=None, runas=None,
  6993. logerr=True):
  6994. """
  6995. Alter attributes associated to a reservation. Raises
  6996. ``PbsResvAlterError`` on error.
  6997. :param resvid: identifier of the reservation.
  6998. :type resvid: str.
  6999. :param attrib: A dictionary of attributes to set.
  7000. :type attrib: dictionary.
  7001. :param extend: extend options.
  7002. :param runas: run as user.
  7003. :type runas: str or None.
  7004. :param logerr: If False, CLI commands do not log error,
  7005. i.e. silent mode.
  7006. :type logerr: bool.
  7007. :raises: PbsResvAlterError.
  7008. """
  7009. prefix = 'reservation alter on ' + self.shortname
  7010. if runas is not None:
  7011. prefix += ' as ' + str(runas)
  7012. prefix += ': ' + resvid
  7013. if attrib is not None:
  7014. prefix += ' %s' % (str(attrib))
  7015. self.logger.info(prefix)
  7016. c = None
  7017. resvid = resvid.split()
  7018. if self.get_op_mode() == PTL_CLI:
  7019. pcmd = [os.path.join(self.client_conf['PBS_EXEC'], 'bin',
  7020. 'pbs_ralter')]
  7021. if attrib is not None:
  7022. _conf = self.default_client_pbs_conf
  7023. pcmd += self.utils.convert_to_cli(attrib, op=IFL_RALTER,
  7024. hostname=self.client,
  7025. dflt_conf=_conf)
  7026. pcmd += resvid
  7027. if not self.default_client_pbs_conf:
  7028. pcmd = ['PBS_CONF_FILE=' + self.client_pbs_conf_file] + pcmd
  7029. as_script = True
  7030. else:
  7031. as_script = False
  7032. ret = self.du.run_cmd(self.client, pcmd, runas=runas,
  7033. as_script=as_script, level=logging.INFOCLI,
  7034. logerr=logerr)
  7035. rc = ret['rc']
  7036. if ret['err'] != ['']:
  7037. self.last_error = ret['err']
  7038. if ret['out'] != ['']:
  7039. self.last_out = ret['out']
  7040. self.last_rc = rc
  7041. elif runas is not None:
  7042. rc = self.pbs_api_as('alterresv', resvid, runas, data=attrib)
  7043. else:
  7044. c = self._connect(self.hostname)
  7045. if c < 0:
  7046. return c
  7047. a = self.utils.convert_to_attrl(attrib)
  7048. rc = pbs_modify_resv(c, resvid, a, extend)
  7049. if rc != 0:
  7050. raise PbsResvAlterError(rc=rc, rv=False, msg=self.geterrmsg(),
  7051. post=self._disconnect, conn=c)
  7052. else:
  7053. return rc
  7054. if c:
  7055. self._disconnect(c)
  7056. def expect(self, obj_type, attrib=None, id=None, op=EQ, attrop=PTL_OR,
  7057. attempt=0, max_attempts=None, interval=None, count=None,
  7058. extend=None, offset=0, runas=None, level=logging.INFO,
  7059. msg=None):
  7060. """
  7061. expect an attribute to match a given value as per an
  7062. operation.
  7063. :param obj_type: The type of object to query, JOB, SERVER,
  7064. SCHEDULER, QUEUE NODE
  7065. :type obj_type: str
  7066. :param attrib: Attributes to query, can be a string, a list,
  7067. or a dict
  7068. :type attrib: str or list or dictionary
  7069. :param id: The id of the object to act upon
  7070. :param op: An operation to perform on the queried data,
  7071. e.g., EQ, SET, LT,..
  7072. :param attrop: Operation on multiple attributes, either
  7073. PTL_AND, PTL_OR when an PTL_AND is used, only
  7074. batch objects having all matches are
  7075. returned, otherwise an OR is applied
  7076. :param attempt: The number of times this function has been
  7077. called
  7078. :type attempt: int
  7079. :param max_attempts: The maximum number of attempts to
  7080. perform.C{param_max_attempts}: 5
  7081. :type max_attempts: int or None
  7082. :param interval: The interval time btween attempts.
  7083. C{param_interval}: 1s
  7084. :param count: If True, attrib will be accumulated using
  7085. function counter
  7086. :type count: bool
  7087. :param extend: passed to the stat call
  7088. :param offset: the time to wait before the initial check.
  7089. Defaults to 0.
  7090. :type offset: int
  7091. :param runas: query as a given user. Defaults to current
  7092. user
  7093. :type runas: str or None
  7094. :param msg: Message from last call of this function, this
  7095. message will be used while raising
  7096. PtlExpectError.
  7097. :type msg: str or None
  7098. :returns: True if attributes are as expected
  7099. :raises: PtlExpectError if attributes are not as expected
  7100. """
  7101. if attempt == 0 and offset > 0:
  7102. self.logger.log(level, self.logprefix + 'expect offset set to ' +
  7103. str(offset))
  7104. time.sleep(offset)
  7105. if attrib is None:
  7106. attrib = {}
  7107. if ATTR_version in attrib and max_attempts is None:
  7108. max_attempts = 3
  7109. if max_attempts is None:
  7110. max_attempts = int(self.ptl_conf['expect_max_attempts'])
  7111. if interval is None:
  7112. interval = self.ptl_conf['expect_interval']
  7113. if attempt >= max_attempts:
  7114. _msg = "expected on " + self.logprefix + msg
  7115. raise PtlExpectError(rc=1, rv=False, msg=_msg)
  7116. if obj_type == SERVER and id is None:
  7117. id = self.hostname
  7118. if isinstance(attrib, str):
  7119. attrib = {attrib: ''}
  7120. elif isinstance(attrib, list):
  7121. d = {}
  7122. for l in attrib:
  7123. d[l] = ''
  7124. attrib = d
  7125. # Add check for substate=42 for jobstate=R, if not added explicitly.
  7126. if obj_type == JOB:
  7127. add_attribs = {'substate': False}
  7128. substate = False
  7129. for k, v in attrib.items():
  7130. if k == 'job_state' and ((isinstance(v, tuple) and
  7131. 'R' in v[-1]) or v == 'R'):
  7132. add_attribs['substate'] = 42
  7133. elif k == 'job_state=R':
  7134. add_attribs['substate=42'] = v
  7135. elif 'substate' in k:
  7136. substate = True
  7137. if add_attribs['substate'] and not substate:
  7138. attrib['substate'] = add_attribs['substate']
  7139. attrop = PTL_AND
  7140. del add_attribs, substate
  7141. prefix = 'expect on ' + self.logprefix
  7142. msg = []
  7143. attrs_to_ignore = []
  7144. for k, v in attrib.items():
  7145. args = None
  7146. if isinstance(v, tuple):
  7147. operator = v[0]
  7148. if len(v) > 2:
  7149. args = v[2:]
  7150. val = v[1]
  7151. else:
  7152. operator = op
  7153. val = v
  7154. if operator not in PTL_OP_TO_STR:
  7155. self.logger.log(level, "Operator not supported by expect(), "
  7156. "cannot verify change in " + str(k))
  7157. attrs_to_ignore.append(k)
  7158. continue
  7159. msg += [k, PTL_OP_TO_STR[operator].strip()]
  7160. if callable(val):
  7161. msg += ['callable(' + val.__name__ + ')']
  7162. if args is not None:
  7163. msg.extend(map(lambda x: str(x), args))
  7164. else:
  7165. msg += [str(val)]
  7166. msg += [PTL_ATTROP_TO_STR[attrop]]
  7167. # Delete the attributes that we cannot verify
  7168. for k in attrs_to_ignore:
  7169. del(attrib[k])
  7170. if attrs_to_ignore and len(attrib) < 1 and op == SET:
  7171. return True
  7172. # remove the last converted PTL_ATTROP_TO_STR
  7173. if len(msg) > 1:
  7174. msg = msg[:-1]
  7175. if len(attrib) == 0:
  7176. msg += [PTL_OP_TO_STR[op]]
  7177. msg += [PBS_OBJ_MAP[obj_type]]
  7178. if id is not None:
  7179. msg += [str(id)]
  7180. if attempt > 0:
  7181. msg += ['attempt:', str(attempt + 1)]
  7182. # Default count to True if the attribute contains an '=' in its name
  7183. # for example 'job_state=R' implies that a count of job_state is needed
  7184. if count is None and self.utils.operator_in_attribute(attrib):
  7185. count = True
  7186. if count:
  7187. newattr = self.utils.convert_attributes_by_op(attrib)
  7188. if len(newattr) == 0:
  7189. newattr = attrib
  7190. statlist = [self.counter(obj_type, newattr, id, extend, op=op,
  7191. attrop=attrop, level=logging.DEBUG,
  7192. runas=runas)]
  7193. else:
  7194. try:
  7195. statlist = self.status(obj_type, attrib, id=id,
  7196. level=logging.DEBUG, extend=extend,
  7197. runas=runas, logerr=False)
  7198. except PbsStatusError:
  7199. statlist = []
  7200. if (statlist is None or len(statlist) == 0 or
  7201. statlist[0] is None or len(statlist[0]) == 0):
  7202. if op == UNSET or list(set(attrib.values())) == [0]:
  7203. self.logger.log(level, prefix + " ".join(msg) + ' ... OK')
  7204. return True
  7205. else:
  7206. time.sleep(interval)
  7207. msg = " no data for " + " ".join(msg)
  7208. self.logger.log(level, prefix + msg)
  7209. return self.expect(obj_type, attrib, id, op, attrop,
  7210. attempt + 1, max_attempts, interval, count,
  7211. extend, level=level, msg=msg)
  7212. if attrib is None:
  7213. time.sleep(interval)
  7214. return self.expect(obj_type, attrib, id, op, attrop, attempt + 1,
  7215. max_attempts, interval, count, extend,
  7216. runas=runas, level=level, msg=" ".join(msg))
  7217. for k, v in attrib.items():
  7218. varargs = None
  7219. if isinstance(v, tuple):
  7220. op = v[0]
  7221. if len(v) > 2:
  7222. varargs = v[2:]
  7223. v = v[1]
  7224. for stat in statlist:
  7225. if k not in stat:
  7226. if op == UNSET:
  7227. continue
  7228. # Sometimes users provide the wrong case for attributes
  7229. # Convert to lowercase and compare
  7230. attrs_lower = {
  7231. ks.lower(): [ks, vs] for ks, vs in stat.iteritems()}
  7232. k_lower = k.lower()
  7233. if k_lower not in attrs_lower:
  7234. self.logger.error("Attribute %s not found" % k)
  7235. return False
  7236. stat_v = attrs_lower[k_lower][1]
  7237. stat_k = attrs_lower[k_lower][0]
  7238. else:
  7239. stat_v = stat[k]
  7240. stat_k = k
  7241. if stat_k == ATTR_version:
  7242. m = self.version_tag.match(stat_v)
  7243. if m:
  7244. stat_v = m.group('version')
  7245. else:
  7246. time.sleep(interval)
  7247. return self.expect(obj_type, attrib, id, op, attrop,
  7248. attempt + 1, max_attempts, interval,
  7249. count, extend, runas=runas,
  7250. level=level, msg=" ".join(msg))
  7251. # functions/methods are invoked and their return value
  7252. # used on expect
  7253. if callable(v):
  7254. if varargs is not None:
  7255. rv = v(stat_v, *varargs)
  7256. else:
  7257. rv = v(stat_v)
  7258. if isinstance(rv, bool):
  7259. if op == NOT:
  7260. if not rv:
  7261. continue
  7262. if rv:
  7263. continue
  7264. else:
  7265. v = rv
  7266. stat_v = self.utils.decode_value(stat_v)
  7267. v = self.utils.decode_value(str(v))
  7268. if stat_k == ATTR_version:
  7269. stat_v = LooseVersion(str(stat_v))
  7270. v = LooseVersion(str(v))
  7271. if op == EQ and stat_v == v:
  7272. continue
  7273. elif op == SET and count and stat_v == v:
  7274. continue
  7275. elif op == SET and count in (False, None):
  7276. continue
  7277. elif op == NE and stat_v != v:
  7278. continue
  7279. elif op == LT:
  7280. if stat_v < v:
  7281. continue
  7282. elif op == GT:
  7283. if stat_v > v:
  7284. continue
  7285. elif op == LE:
  7286. if stat_v <= v:
  7287. continue
  7288. elif op == GE:
  7289. if stat_v >= v:
  7290. continue
  7291. elif op == MATCH_RE:
  7292. if re.search(str(v), str(stat_v)):
  7293. continue
  7294. elif op == MATCH:
  7295. if str(stat_v).find(str(v)) != -1:
  7296. continue
  7297. msg += [' got: ' + stat_k + ' = ' + str(stat_v)]
  7298. self.logger.info(prefix + " ".join(msg))
  7299. time.sleep(interval)
  7300. # run custom actions defined for this object type
  7301. if self.actions:
  7302. for act_obj in self.actions.get_actions_by_type(obj_type):
  7303. if act_obj.enabled:
  7304. act_obj.action(self, obj_type, attrib, id, op,
  7305. attrop)
  7306. return self.expect(obj_type, attrib, id, op, attrop,
  7307. attempt + 1, max_attempts, interval, count,
  7308. extend, level=level, msg=" ".join(msg))
  7309. self.logger.log(level, prefix + " ".join(msg) + ' ... OK')
  7310. return True
  7311. def is_history_enabled(self):
  7312. """
  7313. Short-hand method to return the value of job_history_enable
  7314. """
  7315. a = ATTR_JobHistoryEnable
  7316. attrs = self.status(SERVER, level=logging.DEBUG)[0]
  7317. if ((a in attrs.keys()) and attrs[a] == 'True'):
  7318. return True
  7319. return False
  7320. def cleanup_jobs(self, extend=None, runas=None):
  7321. """
  7322. Helper function to delete all jobs.
  7323. By default this method will determine whether
  7324. job_history_enable is on and will cleanup all history
  7325. jobs. Specifying an extend parameter could override
  7326. this behavior.
  7327. :param runas: Clean the job as
  7328. :type runas: str or None
  7329. """
  7330. delete_xt = 'force'
  7331. select_xt = None
  7332. if self.is_history_enabled():
  7333. delete_xt += 'deletehist'
  7334. select_xt = 'x'
  7335. job_ids = self.select(extend=select_xt)
  7336. if len(job_ids) > 0:
  7337. try:
  7338. self.deljob(id=job_ids, extend=delete_xt, runas=runas,
  7339. wait=True)
  7340. except:
  7341. pass
  7342. rv = self.expect(JOB, {'job_state': 0}, count=True, op=SET)
  7343. if not rv:
  7344. return self.cleanup_jobs(extend=extend, runas=runas)
  7345. return rv
  7346. def cleanup_reservations(self, extend=None, runas=None):
  7347. """
  7348. Helper function to delete all reservations
  7349. """
  7350. reservations = self.status(RESV, level=logging.DEBUG)
  7351. while reservations is not None and len(reservations) != 0:
  7352. resvs = [r['id'] for r in reservations]
  7353. if len(resvs) > 0:
  7354. try:
  7355. self.delresv(resvs, logerr=False, runas=runas)
  7356. except:
  7357. pass
  7358. reservations = self.status(RESV, level=logging.DEBUG)
  7359. def cleanup_jobs_and_reservations(self, extend='forcedeletehist'):
  7360. """
  7361. Helper function to delete all jobs and reservations
  7362. :param extend: Optional extend parameter that is passed
  7363. to delete. It defaults to 'deletehist' which
  7364. is used in qdel and pbs_deljob() to force
  7365. delete all jobs, including history jobs
  7366. :param extend: str
  7367. """
  7368. rv = self.cleanup_jobs(extend)
  7369. self.cleanup_reservations()
  7370. return rv
  7371. def update_attributes(self, obj_type, bs):
  7372. """
  7373. Populate objects from batch status data
  7374. """
  7375. if bs is None:
  7376. return
  7377. for binfo in bs:
  7378. if 'id' not in binfo:
  7379. continue
  7380. id = binfo['id']
  7381. obj = None
  7382. if obj_type == JOB:
  7383. if ATTR_owner in binfo:
  7384. user = binfo[ATTR_owner].split('@')[0]
  7385. else:
  7386. user = None
  7387. if id in self.jobs:
  7388. self.jobs[id].attributes.update(binfo)
  7389. if self.jobs[id].username != user:
  7390. self.jobs[id].username = user
  7391. else:
  7392. self.jobs[id] = Job(user, binfo)
  7393. obj = self.jobs[id]
  7394. elif obj_type in (VNODE, NODE):
  7395. if id in self.nodes:
  7396. self.nodes[id].attributes.update(binfo)
  7397. else:
  7398. self.nodes[id] = MoM(id, binfo, diagmap={NODE: None},
  7399. server=self)
  7400. obj = self.nodes[id]
  7401. elif obj_type == SERVER:
  7402. self.attributes.update(binfo)
  7403. obj = self
  7404. elif obj_type == QUEUE:
  7405. if id in self.queues:
  7406. self.queues[id].attributes.update(binfo)
  7407. else:
  7408. self.queues[id] = Queue(id, binfo, server=self)
  7409. obj = self.queues[id]
  7410. elif obj_type == RESV:
  7411. if id in self.reservations:
  7412. self.reservations[id].attributes.update(binfo)
  7413. else:
  7414. self.reservations[id] = Reservation(id, binfo)
  7415. obj = self.reservations[id]
  7416. elif obj_type == HOOK:
  7417. if id in self.hooks:
  7418. self.hooks[id].attributes.update(binfo)
  7419. else:
  7420. self.hooks[id] = Hook(id, binfo, server=self)
  7421. obj = self.hooks[id]
  7422. elif obj_type == PBS_HOOK:
  7423. if id in self.pbshooks:
  7424. self.pbshooks[id].attributes.update(binfo)
  7425. else:
  7426. self.pbshooks[id] = Hook(id, binfo, server=self)
  7427. obj = self.pbshooks[id]
  7428. elif obj_type == SCHED:
  7429. if id in self.schedulers:
  7430. self.schedulers[id].attributes.update(binfo)
  7431. if 'sched_priv' in binfo:
  7432. self.schedulers[id].setup_sched_priv(
  7433. binfo['sched_priv'])
  7434. else:
  7435. if 'sched_host' not in binfo:
  7436. hostname = self.hostname
  7437. else:
  7438. hostname = binfo['sched_host']
  7439. if SCHED in self.diagmap:
  7440. diag = self.diag
  7441. diagmap = self.diagmap
  7442. else:
  7443. diag = None
  7444. diagmap = {}
  7445. spriv = None
  7446. if 'sched_priv' in binfo:
  7447. spriv = binfo['sched_priv']
  7448. self.schedulers[id] = Scheduler(hostname=hostname,
  7449. server=self,
  7450. diag=diag,
  7451. diagmap=diagmap,
  7452. id=id,
  7453. sched_priv=spriv)
  7454. self.schedulers[id].attributes.update(binfo)
  7455. obj = self.schedulers[id]
  7456. elif obj_type == RSC:
  7457. if id in self.resources:
  7458. self.resources[id].attributes.update(binfo)
  7459. else:
  7460. rtype = None
  7461. rflag = None
  7462. if 'type' in binfo:
  7463. rtype = binfo['type']
  7464. if 'flag' in binfo:
  7465. rflag = binfo['flag']
  7466. self.resources[id] = Resource(id, rtype, rflag)
  7467. if obj is not None:
  7468. self.utils.update_attributes_list(obj)
  7469. obj.__dict__.update(binfo)
  7470. def counter(self, obj_type=None, attrib=None, id=None, extend=None,
  7471. op=None, attrop=None, bslist=None, level=logging.INFO,
  7472. idonly=True, grandtotal=False, db_access=None, runas=None,
  7473. resolve_indirectness=False):
  7474. """
  7475. Accumulate properties set on an object. For example, to
  7476. count number of free nodes:
  7477. ``server.counter(VNODE,{'state':'free'})``
  7478. :param obj_type: The type of object to query, one of the
  7479. * objects
  7480. :param attrib: Attributes to query, can be a string, a
  7481. list, a dictionary
  7482. :type attrib: str or list or dictionary
  7483. :param id: The id of the object to act upon
  7484. :param extend: The extended parameter to pass to the stat
  7485. call
  7486. :param op: The operation used to match attrib to what is
  7487. queried. SET or None
  7488. :type op: str or None
  7489. :param attrop: Operation on multiple attributes, either
  7490. PTL_AND, PTL_OR
  7491. :param bslist: Optional, use a batch status dict list
  7492. instead of an obj_type
  7493. :param idonly: if true, return the name/id of the matching
  7494. objects
  7495. :type idonly: bool
  7496. :param db_access: credentials to access db, either a path
  7497. to file or dictionary
  7498. :type db_access: str or dictionary
  7499. :param runas: run as user
  7500. :type runas: str or None
  7501. """
  7502. self.logit('counter: ', obj_type, attrib, id, level=level)
  7503. return self._filter(obj_type, attrib, id, extend, op, attrop, bslist,
  7504. PTL_COUNTER, idonly, grandtotal, db_access,
  7505. runas=runas,
  7506. resolve_indirectness=resolve_indirectness)
  7507. def filter(self, obj_type=None, attrib=None, id=None, extend=None, op=None,
  7508. attrop=None, bslist=None, idonly=True, grandtotal=False,
  7509. db_access=None, runas=None, resolve_indirectness=False):
  7510. """
  7511. Filter objects by properties. For example, to filter all
  7512. free nodes:``server.filter(VNODE,{'state':'free'})``
  7513. For each attribute queried, if idonly is True, a list of
  7514. matching object names is returned; if idonly is False, then
  7515. the value of each attribute queried is returned.
  7516. This is unlike Python's built-in 'filter' that returns a
  7517. subset of objects matching from a pool of objects. The
  7518. Python filtering mechanism remains very useful in some
  7519. situations and should be used programmatically to achieve
  7520. desired filtering goals that can not be met easily with
  7521. PTL's filter method.
  7522. :param obj_type: The type of object to query, one of the
  7523. * objects
  7524. :param attrib: Attributes to query, can be a string, a
  7525. list, a dictionary
  7526. :type attrib: str or list or dictionary
  7527. :param id: The id of the object to act upon
  7528. :param extend: The extended parameter to pass to the stat
  7529. call
  7530. :param op: The operation used to match attrib to what is
  7531. queried. SET or None
  7532. :type op: str or None
  7533. :param bslist: Optional, use a batch status dict list
  7534. instead of an obj_type
  7535. :type bslist: List or None
  7536. :param idonly: if true, return the name/id of the matching
  7537. objects
  7538. :type idonly: bool
  7539. :param db_access: credentials to access db, either path to
  7540. file or dictionary
  7541. :type db_access: str or dictionary
  7542. :param runas: run as user
  7543. :type runas: str or None
  7544. """
  7545. self.logit('filter: ', obj_type, attrib, id)
  7546. return self._filter(obj_type, attrib, id, extend, op, attrop, bslist,
  7547. PTL_FILTER, idonly, db_access, runas=runas,
  7548. resolve_indirectness=resolve_indirectness)
  7549. def _filter(self, obj_type=None, attrib=None, id=None, extend=None,
  7550. op=None, attrop=None, bslist=None, mode=PTL_COUNTER,
  7551. idonly=True, grandtotal=False, db_access=None, runas=None,
  7552. resolve_indirectness=False):
  7553. if bslist is None:
  7554. try:
  7555. _a = resolve_indirectness
  7556. tmp_bsl = self.status(obj_type, attrib, id,
  7557. level=logging.DEBUG, extend=extend,
  7558. db_access=db_access, runas=runas,
  7559. resolve_indirectness=_a)
  7560. del _a
  7561. except PbsStatusError:
  7562. return None
  7563. bslist = self.utils.filter_batch_status(tmp_bsl, attrib)
  7564. del tmp_bsl
  7565. if bslist is None:
  7566. return None
  7567. if isinstance(attrib, str):
  7568. attrib = attrib.split(',')
  7569. total = {}
  7570. for bs in bslist:
  7571. if isinstance(attrib, list):
  7572. # when filtering on multiple values, ensure that they are
  7573. # all present on the object, otherwise skip
  7574. if attrop == PTL_AND:
  7575. match = True
  7576. for k in attrib:
  7577. if k not in bs:
  7578. match = False
  7579. if not match:
  7580. continue
  7581. for a in attrib:
  7582. if a in bs:
  7583. if op == SET:
  7584. k = a
  7585. else:
  7586. # Since this is a list of attributes, no operator
  7587. # was provided so we settle on "equal"
  7588. k = a + '=' + str(bs[a])
  7589. if mode == PTL_COUNTER:
  7590. amt = 1
  7591. if grandtotal:
  7592. amt = self.utils.decode_value(bs[a])
  7593. if not isinstance(amt, (int, float)):
  7594. amt = 1
  7595. if a in total:
  7596. total[a] += amt
  7597. else:
  7598. total[a] = amt
  7599. else:
  7600. if k in total:
  7601. total[k] += amt
  7602. else:
  7603. total[k] = amt
  7604. elif mode == PTL_FILTER:
  7605. if k in total:
  7606. if idonly:
  7607. total[k].append(bs['id'])
  7608. else:
  7609. total[k].append(bs)
  7610. else:
  7611. if idonly:
  7612. total[k] = [bs['id']]
  7613. else:
  7614. total[k] = [bs]
  7615. else:
  7616. self.logger.error("Unhandled mode " + str(mode))
  7617. return None
  7618. elif isinstance(attrib, dict):
  7619. tmptotal = {} # The running count that will be used for total
  7620. # when filtering on multiple values, ensure that they are
  7621. # all present on the object, otherwise skip
  7622. match = True
  7623. for k, v in attrib.items():
  7624. if k not in bs:
  7625. match = False
  7626. if attrop == PTL_AND:
  7627. break
  7628. else:
  7629. continue
  7630. amt = self.utils.decode_value(bs[k])
  7631. if isinstance(v, tuple):
  7632. op = v[0]
  7633. val = self.utils.decode_value(v[1])
  7634. elif op == SET:
  7635. val = None
  7636. pass
  7637. else:
  7638. op = EQ
  7639. val = self.utils.decode_value(v)
  7640. if ((op == LT and amt < val) or
  7641. (op == LE and amt <= val) or
  7642. (op == EQ and amt == val) or
  7643. (op == GE and amt >= val) or
  7644. (op == GT and amt > val) or
  7645. (op == NE and amt != val) or
  7646. (op == MATCH and str(amt).find(str(val)) != -1) or
  7647. (op == MATCH_RE and
  7648. re.search(str(val), str(amt))) or
  7649. (op == SET)):
  7650. # There is a match, proceed to track the attribute
  7651. self._filter_helper(bs, k, val, amt, op, mode,
  7652. tmptotal, idonly, grandtotal)
  7653. elif attrop == PTL_AND:
  7654. match = False
  7655. if mode == PTL_COUNTER:
  7656. # requesting specific key/value pairs should result
  7657. # in 0 available elements
  7658. tmptotal[str(k) + PTL_OP_TO_STR[op] + str(val)] = 0
  7659. break
  7660. elif mode == PTL_COUNTER:
  7661. tmptotal[str(k) + PTL_OP_TO_STR[op] + str(val)] = 0
  7662. if attrop != PTL_AND or (attrop == PTL_AND and match):
  7663. for k, v in tmptotal.items():
  7664. if k not in total:
  7665. total[k] = v
  7666. else:
  7667. total[k] += v
  7668. return total
  7669. def _filter_helper(self, bs, k, v, amt, op, mode, total, idonly,
  7670. grandtotal):
  7671. # default operation to '='
  7672. if op is None or op not in PTL_OP_TO_STR:
  7673. op = '='
  7674. op_str = PTL_OP_TO_STR[op]
  7675. if op == SET:
  7676. # override PTL_OP_TO_STR fro SET operations
  7677. op_str = ''
  7678. v = ''
  7679. ky = k + op_str + str(v)
  7680. if mode == PTL_COUNTER:
  7681. incr = 1
  7682. if grandtotal:
  7683. if not isinstance(amt, (int, float)):
  7684. incr = 1
  7685. else:
  7686. incr = amt
  7687. if ky in total:
  7688. total[ky] += incr
  7689. else:
  7690. total[ky] = incr
  7691. elif mode == PTL_FILTER:
  7692. if ky in total:
  7693. if idonly:
  7694. total[ky].append(bs['id'])
  7695. else:
  7696. total[ky].append(bs)
  7697. else:
  7698. if idonly:
  7699. total[ky] = [bs['id']]
  7700. else:
  7701. total[ky] = [bs]
  7702. def logit(self, msg, obj_type, attrib, id, level=logging.INFO):
  7703. """
  7704. Generic logging routine for ``IFL`` commands
  7705. :param msg: The message to log
  7706. :type msg: str
  7707. :param obj_type: object type, i.e *
  7708. :param attrib: attributes to log
  7709. :param id: name of object to log
  7710. :type id: str or list
  7711. :param level: log level, defaults to ``INFO``
  7712. """
  7713. s = []
  7714. if self.logger is not None:
  7715. if obj_type is None:
  7716. obj_type = MGR_OBJ_NONE
  7717. s = [msg + PBS_OBJ_MAP[obj_type]]
  7718. if id:
  7719. if isinstance(id, list):
  7720. s += [' ' + ",".join(id)]
  7721. else:
  7722. s += [' ' + str(id)]
  7723. if attrib:
  7724. s += [' ' + str(attrib)]
  7725. self.logger.log(level, "".join(s))
  7726. def equivalence_classes(self, obj_type=None, attrib={}, bslist=None,
  7727. op=RESOURCES_AVAILABLE, show_zero_resources=True,
  7728. db_access=None, resolve_indirectness=False):
  7729. """
  7730. :param obj_type: PBS Object to query, one of *
  7731. :param attrib: attributes to build equivalence classes
  7732. out of.
  7733. :type attrib: dictionary
  7734. :param bslist: Optional, list of dictionary representation
  7735. of a batch status
  7736. :type bslist: List
  7737. :param op: set to RESOURCES_AVAILABLE uses the dynamic
  7738. amount of resources available, i.e., available -
  7739. assigned, otherwise uses static amount of
  7740. resources available
  7741. :param db_acccess: set to either file containing credentials
  7742. to DB access or dictionary containing
  7743. ``{'dbname':...,'user':...,'port':...}``
  7744. :type db_access: str or dictionary
  7745. """
  7746. if attrib is None:
  7747. attrib = {}
  7748. if len(attrib) == 0 and obj_type is not None:
  7749. if obj_type in (VNODE, NODE):
  7750. attrib = ['resources_available.ncpus',
  7751. 'resources_available.mem', 'state']
  7752. elif obj_type == JOB:
  7753. attrib = ['Resource_List.select',
  7754. 'queue', 'array_indices_submitted']
  7755. elif obj_type == RESV:
  7756. attrib = ['Resource_List.select']
  7757. else:
  7758. return {}
  7759. if bslist is None and obj_type is not None:
  7760. # To get the resources_assigned we must stat the entire object so
  7761. # bypass the specific attributes that would filter out assigned
  7762. if op == RESOURCES_AVAILABLE:
  7763. bslist = self.status(obj_type, None, level=logging.DEBUG,
  7764. db_access=db_access,
  7765. resolve_indirectness=resolve_indirectness)
  7766. else:
  7767. bslist = self.status(obj_type, attrib, level=logging.DEBUG,
  7768. db_access=db_access,
  7769. resolve_indirectness=resolve_indirectness)
  7770. if bslist is None or len(bslist) == 0:
  7771. return {}
  7772. # automatically convert an objectlist into a batch status dict list
  7773. # for ease of use.
  7774. if not isinstance(bslist[0], dict):
  7775. bslist = self.utils.objlist_to_dictlist(bslist)
  7776. if isinstance(attrib, str):
  7777. attrib = attrib.split(',')
  7778. self.logger.debug("building equivalence class")
  7779. equiv = {}
  7780. for bs in bslist:
  7781. cls = ()
  7782. skip_cls = False
  7783. # attrs will be part of the EquivClass object
  7784. attrs = {}
  7785. # Filter the batch attributes by the attribs requested
  7786. for a in attrib:
  7787. if a in bs:
  7788. amt = self.utils.decode_value(bs[a])
  7789. if a.startswith('resources_available.'):
  7790. val = a.replace('resources_available.', '')
  7791. if (op == RESOURCES_AVAILABLE and
  7792. 'resources_assigned.' + val in bs):
  7793. amt = (int(amt) - int(self.utils.decode_value(
  7794. bs['resources_assigned.' + val])))
  7795. # this case where amt goes negative is not a bug, it
  7796. # may happen when computing whats_available due to the
  7797. # fact that the computation is subtractive, it does
  7798. # add back resources when jobs/reservations end but
  7799. # is only concerned with what is available now for
  7800. # a given duration, that is why in the case where
  7801. # amount goes negative we set it to 0
  7802. if amt < 0:
  7803. amt = 0
  7804. # TODO: not a failproof way to catch a memory type
  7805. # but PbsTypeSize should return the right value if
  7806. # it fails to parse it as a valid memory value
  7807. if a.endswith('mem'):
  7808. try:
  7809. amt = PbsTypeSize().encode(amt)
  7810. except:
  7811. # we guessed the type incorrectly
  7812. pass
  7813. else:
  7814. val = a
  7815. if amt == 0 and not show_zero_resources:
  7816. skip_cls = True
  7817. break
  7818. # Build the key of the equivalence class
  7819. cls += (val + '=' + str(amt),)
  7820. attrs[val] = amt
  7821. # Now that we are done with this object, add it to an equiv class
  7822. if len(cls) > 0 and not skip_cls:
  7823. if cls in equiv:
  7824. equiv[cls].add_entity(bs['id'])
  7825. else:
  7826. equiv[cls] = EquivClass(cls, attrs, [bs['id']])
  7827. return equiv.values()
  7828. def show_equivalence_classes(self, eq=None, obj_type=None, attrib={},
  7829. bslist=None, op=RESOURCES_AVAILABLE,
  7830. show_zero_resources=True, db_access=None,
  7831. resolve_indirectness=False):
  7832. """
  7833. helper function to show the equivalence classes
  7834. :param eq: equivalence classes as compute by
  7835. equivalence_classes see equivalence_classes
  7836. for remaining parameters description
  7837. :param db_acccess: set to either file containing credentials
  7838. to DB access or dictionary containing
  7839. ``{'dbname':...,'user':...,'port':...}``
  7840. :type db_access: str or dictionary
  7841. """
  7842. if eq is None:
  7843. equiv = self.equivalence_classes(obj_type, attrib, bslist, op,
  7844. show_zero_resources, db_access,
  7845. resolve_indirectness)
  7846. else:
  7847. equiv = eq
  7848. equiv = sorted(equiv, key=lambda e: len(e.entities))
  7849. for e in equiv:
  7850. # e.show()
  7851. print str(e)
  7852. def whats_available(self, attrib=None, jobs=None, resvs=None, nodes=None):
  7853. """
  7854. Returns what's available as a list of node equivalence
  7855. classes listed by availability over time.
  7856. :param attrib: attributes to consider
  7857. :type attrib: List
  7858. :param jobs: jobs to consider, if None, jobs are queried
  7859. locally
  7860. :param resvs: reservations to consider, if None, they are
  7861. queried locally
  7862. :param nodes: nodes to consider, if None, they are queried
  7863. locally
  7864. """
  7865. if attrib is None:
  7866. attrib = ['resources_available.ncpus',
  7867. 'resources_available.mem', 'state']
  7868. if resvs is None:
  7869. self.status(RESV)
  7870. resvs = self.reservations
  7871. if jobs is None:
  7872. self.status(JOB)
  7873. jobs = self.jobs
  7874. if nodes is None:
  7875. self.status(NODE)
  7876. nodes = self.nodes
  7877. nodes_id = nodes.keys()
  7878. avail_nodes_by_time = {}
  7879. def alloc_resource(self, node, resources):
  7880. # helper function. Must work on a scratch copy of nodes otherwise
  7881. # resources_available will get corrupted
  7882. for rsc, value in resources.items():
  7883. if isinstance(value, int) or value.isdigit():
  7884. avail = node.attributes['resources_available.' + rsc]
  7885. nvalue = int(avail) - int(value)
  7886. node.attributes['resources_available.' + rsc] = nvalue
  7887. # Account for reservations
  7888. for resv in resvs.values():
  7889. resvnodes = resv.execvnode('resv_nodes')
  7890. if resvnodes:
  7891. starttime = self.utils.convert_stime_to_seconds(
  7892. resv.attributes['reserve_start'])
  7893. for node in resvnodes:
  7894. for n, resc in node.items():
  7895. tm = int(starttime) - int(self.ctime)
  7896. if tm < 0 or n not in nodes_id:
  7897. continue
  7898. if tm not in avail_nodes_by_time:
  7899. avail_nodes_by_time[tm] = []
  7900. if nodes[n].attributes['sharing'] in ('default_excl',
  7901. 'force_excl'):
  7902. avail_nodes_by_time[tm].append(nodes[n])
  7903. try:
  7904. nodes_id.remove(n)
  7905. except:
  7906. pass
  7907. else:
  7908. ncopy = copy.copy(nodes[n])
  7909. ncopy.attributes = copy.deepcopy(
  7910. nodes[n].attributes)
  7911. avail_nodes_by_time[tm].append(ncopy)
  7912. self.alloc_resource(nodes[n], resc)
  7913. # go on to look at the calendar of scheduled jobs to run and set
  7914. # the node availability according to when the job is estimated to
  7915. # start on the node
  7916. for job in self.jobs.values():
  7917. if (job.attributes['job_state'] != 'R' and
  7918. 'estimated.exec_vnode' in job.attributes):
  7919. estimatednodes = job.execvnode('estimated.exec_vnode')
  7920. if estimatednodes:
  7921. st = job.attributes['estimated.start_time']
  7922. # Tweak for nas format of estimated time that has
  7923. # num seconds from epoch followed by datetime
  7924. if st.split()[0].isdigit():
  7925. starttime = st.split()[0]
  7926. else:
  7927. starttime = self.utils.convert_stime_to_seconds(st)
  7928. for node in estimatednodes:
  7929. for n, resc in node.items():
  7930. tm = int(starttime) - int(self.ctime)
  7931. if (tm < 0 or n not in nodes_id or
  7932. nodes[n].state != 'free'):
  7933. continue
  7934. if tm not in avail_nodes_by_time:
  7935. avail_nodes_by_time[tm] = []
  7936. if (nodes[n].attributes['sharing'] in
  7937. ('default_excl', 'force_excl')):
  7938. avail_nodes_by_time[tm].append(nodes[n])
  7939. try:
  7940. nodes_id.remove(n)
  7941. except:
  7942. pass
  7943. else:
  7944. ncopy = copy.copy(nodes[n])
  7945. ncopy.attributes = copy.deepcopy(
  7946. nodes[n].attributes)
  7947. avail_nodes_by_time[tm].append(ncopy)
  7948. self.alloc_resource(nodes[n], resc)
  7949. # remaining nodes are free "forever"
  7950. for node in nodes_id:
  7951. if self.nodes[node].state == 'free':
  7952. if 'infinity' not in avail_nodes_by_time:
  7953. avail_nodes_by_time['infinity'] = [nodes[node]]
  7954. else:
  7955. avail_nodes_by_time['infinity'].append(nodes[node])
  7956. # if there is a dedicated time, move the availaility time up to that
  7957. # time as necessary
  7958. if self.schedulers[self.dflt_sched_name] is None:
  7959. self.schedulers[self.dflt_sched_name] = Scheduler(server=self)
  7960. self.schedulers[self.dflt_sched_name].parse_dedicated_time()
  7961. if self.schedulers[self.dflt_sched_name].dedicated_time:
  7962. dedtime = self.schedulers[
  7963. self.dflt_sched_name].dedicated_time[0]['from'] - int(
  7964. self.ctime)
  7965. if dedtime <= int(time.time()):
  7966. dedtime = None
  7967. else:
  7968. dedtime = None
  7969. # finally, build the equivalence classes off of the nodes availability
  7970. # over time
  7971. self.logger.debug("Building equivalence classes")
  7972. whazzup = {}
  7973. if 'state' in attrib:
  7974. attrib.remove('state')
  7975. for tm, nds in avail_nodes_by_time.items():
  7976. equiv = self.equivalence_classes(VNODE, attrib, bslist=nds,
  7977. show_zero_resources=False)
  7978. if dedtime and (tm > dedtime or tm == 'infinity'):
  7979. tm = dedtime
  7980. if tm != 'infinity':
  7981. tm = str(datetime.timedelta(seconds=int(tm)))
  7982. whazzup[tm] = equiv
  7983. return whazzup
  7984. def show_whats_available(self, wa=None, attrib=None, jobs=None,
  7985. resvs=None, nodes=None):
  7986. """
  7987. helper function to show availability as computed by
  7988. whats_available
  7989. :param wa: a dictionary of available attributes. see
  7990. whats_available for a\
  7991. description of the remaining parameters
  7992. :type wa: Dictionary
  7993. """
  7994. if wa is None:
  7995. wa = self.whats_available(attrib, jobs, resvs, nodes)
  7996. if len(wa) > 0:
  7997. print "%24s\t%s" % ("Duration of availability", "Resources")
  7998. print "-------------------------\t----------"
  7999. swa = sorted(wa.items(), key=lambda x: x[0])
  8000. for (k, eq_classes) in swa:
  8001. for eq_cl in eq_classes:
  8002. print "%24s\t%s" % (str(k), str(eq_cl))
  8003. def utilization(self, resources=None, nodes=None, jobs=None, entity={}):
  8004. """
  8005. Return utilization of consumable resources on a set of
  8006. nodes
  8007. :param nodes: A list of dictionary of nodes on which to
  8008. compute utilization.Defaults to nodes
  8009. resulting from a stat call to the current
  8010. server.
  8011. :type nodes: List
  8012. :param resources: comma-separated list of resources to
  8013. compute utilization on. The name of the
  8014. resource is for example, ncpus or mem
  8015. :type resources: List
  8016. :param entity: An optional dictionary of entities to
  8017. compute utilization of,
  8018. ``e.g. {'user':u1, 'group':g1, 'project'=p1}``
  8019. :type entity: Dictionary
  8020. The utilization is returned as a dictionary of percentage
  8021. utilization for each resource.
  8022. Non-consumable resources are silently ignored.
  8023. """
  8024. if nodes is None:
  8025. nodes = self.status(NODE)
  8026. if jobs is None:
  8027. jobs = self.status(JOB)
  8028. if resources is None:
  8029. rescs = ['ncpus', 'mem']
  8030. else:
  8031. rescs = resources
  8032. utilization = {}
  8033. resavail = {}
  8034. resassigned = {}
  8035. usednodes = 0
  8036. totnodes = 0
  8037. nodes_set = set()
  8038. for res in rescs:
  8039. resavail[res] = 0
  8040. resassigned[res] = 0
  8041. # If an entity is specified utilization must be collected from the
  8042. # Jobs usage, otherwise we can get the information directly from
  8043. # the nodes.
  8044. if len(entity) > 0 and jobs is not None:
  8045. for job in jobs:
  8046. if 'job_state' in job and job['job_state'] != 'R':
  8047. continue
  8048. entity_match = True
  8049. for k, v in entity.items():
  8050. if k not in job or job[k] != v:
  8051. entity_match = False
  8052. break
  8053. if entity_match:
  8054. for res in rescs:
  8055. r = 'Resource_List.' + res
  8056. if r in job:
  8057. tmpr = int(self.utils.decode_value(job[r]))
  8058. resassigned[res] += tmpr
  8059. if 'exec_host' in job:
  8060. hosts = ResourceResv.get_hosts(job['exec_host'])
  8061. nodes_set |= set(hosts)
  8062. for node in nodes:
  8063. # skip nodes in non-schedulable state
  8064. nstate = node['state']
  8065. if ('down' in nstate or 'unavailable' in nstate or
  8066. 'unknown' in nstate or 'Stale' in nstate):
  8067. continue
  8068. totnodes += 1
  8069. # If an entity utilization was requested, all used nodes were
  8070. # already filtered into the nodes_set specific to that entity, we
  8071. # simply add them up. If no entity was requested, it suffices to
  8072. # have the node have a jobs attribute to count it towards total
  8073. # used nodes
  8074. if len(entity) > 0:
  8075. if node['id'] in nodes_set:
  8076. usednodes += 1
  8077. elif 'jobs' in node:
  8078. usednodes += 1
  8079. for res in rescs:
  8080. avail = 'resources_available.' + res
  8081. if avail in node:
  8082. val = self.utils.decode_value(node[avail])
  8083. if isinstance(val, int):
  8084. resavail[res] += val
  8085. # When entity matching all resources assigned are
  8086. # accounted for by the job usage
  8087. if len(entity) == 0:
  8088. assigned = 'resources_assigned.' + res
  8089. if assigned in node:
  8090. val = self.utils.decode_value(node[assigned])
  8091. if isinstance(val, int):
  8092. resassigned[res] += val
  8093. for res in rescs:
  8094. if res in resavail:
  8095. if res in resassigned:
  8096. if resavail[res] > 0:
  8097. utilization[res] = [resassigned[res], resavail[res]]
  8098. # Only report nodes utilization if no specific resources were requested
  8099. if resources is None:
  8100. utilization['nodes'] = [usednodes, totnodes]
  8101. return utilization
  8102. def create_vnodes(self, name=None, attrib=None, num=1, mom=None,
  8103. additive=False, sharednode=True, restart=True,
  8104. delall=True, natvnode=None, usenatvnode=False,
  8105. attrfunc=None, fname=None, vnodes_per_host=1,
  8106. createnode=True, expect=True):
  8107. """
  8108. helper function to create vnodes.
  8109. :param name: prefix name of the vnode(s) to create
  8110. :type name: str or None
  8111. :param attrib: attributes to assign to each node
  8112. :param num: the number of vnodes to create. Defaults to 1
  8113. :type num: int
  8114. :param mom: the MoM object on which the vnode definition is
  8115. to be inserted
  8116. :param additive: If True, vnodes are added to the existing
  8117. vnode defs.Defaults to False.
  8118. :type additive: bool
  8119. :param sharednode: If True, all vnodes will share the same
  8120. host.Defaults to True.
  8121. :type sharednode: bool
  8122. :param restart: If True the MoM will be restarted.
  8123. :type restart: bool
  8124. :param delall: If True delete all server nodes prior to
  8125. inserting vnodes
  8126. :type delall: bool
  8127. :param natvnode: name of the natural vnode.i.e. The node
  8128. name in qmgr -c "create node <name>"
  8129. :type natvnode: str or None
  8130. :param usenatvnode: count the natural vnode as an
  8131. allocatable node.
  8132. :type usenatvnode: bool
  8133. :param attrfunc: an attribute=value function generator,
  8134. see create_vnode_def
  8135. :param fname: optional name of the vnode def file
  8136. :type fname: str or None
  8137. :param vnodes_per_host: number of vnodes per host
  8138. :type vnodes_per_host: int
  8139. :param createnode: whether to create the node via manage or
  8140. not. Defaults to True
  8141. :type createnode: bool
  8142. :param expect: whether to expect attributes to be set or
  8143. not. Defaults to True
  8144. :type expect: bool
  8145. :returns: True on success and False otherwise
  8146. """
  8147. if mom is None or name is None or attrib is None:
  8148. self.logger.error("name, attributes, and mom object are required")
  8149. return False
  8150. if natvnode is None:
  8151. natvnode = mom.shortname
  8152. if delall:
  8153. try:
  8154. rv = self.manager(MGR_CMD_DELETE, NODE, None, "")
  8155. if rv != 0:
  8156. return False
  8157. except PbsManagerError:
  8158. pass
  8159. vdef = mom.create_vnode_def(name, attrib, num, sharednode,
  8160. usenatvnode=usenatvnode, attrfunc=attrfunc,
  8161. vnodes_per_host=vnodes_per_host)
  8162. mom.insert_vnode_def(vdef, fname=fname, additive=additive,
  8163. restart=restart)
  8164. new_vnodelist = []
  8165. if usenatvnode:
  8166. new_vnodelist.append(natvnode)
  8167. num_check = num - 1
  8168. else:
  8169. num_check = num
  8170. for i in range(num_check):
  8171. new_vnodelist.append("%s[%s]" % (name, i))
  8172. if createnode:
  8173. try:
  8174. statm = self.status(NODE, id=natvnode)
  8175. except:
  8176. statm = []
  8177. if len(statm) >= 1:
  8178. _m = 'Mom %s already exists, not creating' % (natvnode)
  8179. self.logger.info(_m)
  8180. else:
  8181. if mom.pbs_conf and 'PBS_MOM_SERVICE_PORT' in mom.pbs_conf:
  8182. m_attr = {'port': mom.pbs_conf['PBS_MOM_SERVICE_PORT']}
  8183. else:
  8184. m_attr = None
  8185. self.manager(MGR_CMD_CREATE, NODE, m_attr, natvnode)
  8186. # only expect if vnodes were added rather than the nat vnode modified
  8187. if expect and num > 0:
  8188. attrs = {'state': 'free'}
  8189. attrs.update(attrib)
  8190. for vn in new_vnodelist:
  8191. self.expect(VNODE, attrs, id=vn)
  8192. return True
  8193. def create_moms(self, name=None, attrib=None, num=1, delall=True,
  8194. createnode=True, conf_prefix='pbs.conf_m',
  8195. home_prefix='pbs_m', momhosts=None, init_port=15011,
  8196. step_port=2):
  8197. """
  8198. Create MoM configurations and optionall add them to the
  8199. server. Unique ``pbs.conf`` files are defined and created
  8200. on each hosts on which MoMs are to be created.
  8201. :param name: Optional prefix name of the nodes to create.
  8202. Defaults to the name of the MoM host.
  8203. :type name: str or None
  8204. :param attrib: Optional node attributes to assign to the
  8205. MoM.
  8206. :param num: Number of MoMs to create
  8207. :type num: int
  8208. :param delall: Whether to delete all nodes on the server.
  8209. Defaults to True.
  8210. :type delall: bool
  8211. :param createnode: Whether to create the nodes and add them
  8212. to the server.Defaults to True.
  8213. :type createnode: bool
  8214. :param conf_prefix: The prefix of the PBS conf file.Defaults
  8215. to pbs.conf_m
  8216. :type conf_prefix: str
  8217. :param home_prefix: The prefix of the PBS_HOME directory.
  8218. Defaults to pbs_m
  8219. :type home_prefix: str
  8220. :param momhosts: A list of hosts on which to deploy num
  8221. MoMs.
  8222. :type momhosts: List
  8223. :param init_port: The initial port number to start assigning
  8224. ``PBS_MOM_SERIVCE_PORT to.
  8225. Default 15011``.
  8226. :type init_port: int
  8227. :param step_port: The increments at which ports are
  8228. allocated. Defaults to 2.
  8229. :type step_port: int
  8230. .. note:: Since PBS requires that
  8231. PBS_MANAGER_SERVICE_PORT = PBS_MOM_SERVICE_PORT+1
  8232. The step number must be greater or equal to 2.
  8233. """
  8234. if not self.isUp():
  8235. logging.error("An up and running PBS server on " + self.hostname +
  8236. " is required")
  8237. return False
  8238. if delall:
  8239. try:
  8240. rc = self.manager(MGR_CMD_DELETE, NODE, None, "")
  8241. except PbsManagerError, e:
  8242. rc = e.rc
  8243. if rc:
  8244. if len(self.status(NODE)) > 0:
  8245. self.logger.error("create_moms: Error deleting all nodes")
  8246. return False
  8247. pi = PBSInitServices()
  8248. if momhosts is None:
  8249. momhosts = [self.hostname]
  8250. if attrib is None:
  8251. attrib = {}
  8252. error = False
  8253. for hostname in momhosts:
  8254. _pconf = self.du.parse_pbs_config(hostname)
  8255. if 'PBS_HOME' in _pconf:
  8256. _hp = _pconf['PBS_HOME']
  8257. if _hp.endswith('/'):
  8258. _hp = _hp[:-1]
  8259. _hp = os.path.dirname(_hp)
  8260. else:
  8261. _hp = '/var/spool'
  8262. _np_conf = _pconf
  8263. _np_conf['PBS_START_SERVER'] = '0'
  8264. _np_conf['PBS_START_SCHED'] = '0'
  8265. _np_conf['PBS_START_MOM'] = '1'
  8266. for i in xrange(0, num * step_port, step_port):
  8267. _np = os.path.join(_hp, home_prefix + str(i))
  8268. _n_pbsconf = os.path.join('/etc', conf_prefix + str(i))
  8269. _np_conf['PBS_HOME'] = _np
  8270. port = init_port + i
  8271. _np_conf['PBS_MOM_SERVICE_PORT'] = str(port)
  8272. _np_conf['PBS_MANAGER_SERVICE_PORT'] = str(port + 1)
  8273. self.du.set_pbs_config(hostname, fout=_n_pbsconf,
  8274. confs=_np_conf)
  8275. pi.initd(hostname, conf_file=_n_pbsconf, op='start')
  8276. m = MoM(hostname, pbsconf_file=_n_pbsconf)
  8277. if m.isUp():
  8278. m.stop()
  8279. if hostname != self.hostname:
  8280. m.add_config({'$clienthost': self.hostname})
  8281. try:
  8282. m.start()
  8283. except PbsServiceError:
  8284. # The service failed to start
  8285. self.logger.error("Service failed to start using port " +
  8286. str(port) + "...skipping")
  8287. self.du.rm(hostname, _n_pbsconf)
  8288. continue
  8289. if createnode:
  8290. attrib['Mom'] = hostname
  8291. attrib['port'] = port
  8292. if name is None:
  8293. name = hostname.split('.')[0]
  8294. _n = name + '-' + str(i)
  8295. rc = self.manager(MGR_CMD_CREATE, NODE, attrib, id=_n)
  8296. if rc != 0:
  8297. self.logger.error("error creating node " + _n)
  8298. error = True
  8299. if error:
  8300. return False
  8301. return True
  8302. def create_hook(self, name, attrs):
  8303. """
  8304. Helper function to create a hook by name.
  8305. :param name: The name of the hook to create
  8306. :type name: str
  8307. :param attrs: The attributes to create the hook with.
  8308. :type attrs: str
  8309. :returns: False if hook already exists
  8310. :raises: PbsManagerError, otherwise return True.
  8311. """
  8312. hooks = self.status(HOOK)
  8313. if ((hooks is None or len(hooks) == 0) or
  8314. (name not in map(lambda x: x['id'], hooks))):
  8315. self.manager(MGR_CMD_CREATE, HOOK, None, name)
  8316. else:
  8317. self.logger.error('hook named ' + name + ' exists')
  8318. return False
  8319. self.manager(MGR_CMD_SET, HOOK, attrs, id=name, expect=True)
  8320. return True
  8321. def import_hook(self, name, body):
  8322. """
  8323. Helper function to import hook body into hook by name.
  8324. The hook must have been created prior to calling this
  8325. function.
  8326. :param name: The name of the hook to import body to
  8327. :type name: str
  8328. :param body: The body of the hook as a string.
  8329. :type body: str
  8330. :returns: True on success.
  8331. :raises: PbsManagerError
  8332. """
  8333. fn = self.du.create_temp_file(body=body)
  8334. if not self._is_local:
  8335. tmpdir = self.du.get_tempdir(self.hostname)
  8336. rfile = os.path.join(tmpdir, os.path.basename(fn))
  8337. self.du.run_copy(self.hostname, fn, rfile)
  8338. else:
  8339. rfile = fn
  8340. a = {'content-type': 'application/x-python',
  8341. 'content-encoding': 'default',
  8342. 'input-file': rfile}
  8343. self.manager(MGR_CMD_IMPORT, HOOK, a, name)
  8344. os.remove(rfile)
  8345. if not self._is_local:
  8346. self.du.rm(self.hostname, rfile)
  8347. self.logger.info('server ' + self.shortname +
  8348. ': imported hook body\n---\n' + body + '---')
  8349. return True
  8350. def create_import_hook(self, name, attrs=None, body=None, overwrite=True):
  8351. """
  8352. Helper function to create a hook, import content into it,
  8353. set the event and enable it.
  8354. :param name: The name of the hook to create
  8355. :type name: str
  8356. :param attrs: The attributes to create the hook with.
  8357. Event and Enabled are mandatory. No defaults.
  8358. :type attrs: str
  8359. :param body: The hook body as a string
  8360. :type body: str
  8361. :param overwrite: If True, if a hook of the same name
  8362. already exists, bypass its creation.
  8363. Defaults to True
  8364. :returns: True on success and False otherwise
  8365. """
  8366. # Check for log messages 20 seconds earlier, to account for
  8367. # server and mom system time differences
  8368. t = int(time.time()) - 20
  8369. if 'event' not in attrs:
  8370. self.logger.error('attrs must specify at least an event and key')
  8371. return False
  8372. hook_exists = False
  8373. hooks = self.status(HOOK)
  8374. for h in hooks:
  8375. if h['id'] == name:
  8376. hook_exists = True
  8377. if not hook_exists or not overwrite:
  8378. rv = self.create_hook(name, attrs)
  8379. if not rv:
  8380. return False
  8381. else:
  8382. if attrs is None:
  8383. attrs = {'enabled': 'true'}
  8384. rc = self.manager(MGR_CMD_SET, HOOK, attrs, id=name)
  8385. if rc != 0:
  8386. return False
  8387. # In 12.0 A MoM hook must be enabled and the event set prior to
  8388. # importing, otherwise the MoM does not get the hook content
  8389. ret = self.import_hook(name, body)
  8390. # In case of mom hooks, make sure that the hook related files
  8391. # are successfully copied to the MoM
  8392. try:
  8393. if 'exec' in attrs['event']:
  8394. hook_py = name + '.PY'
  8395. hook_hk = name + '.HK'
  8396. pyfile = os.path.join(self.pbs_conf['PBS_HOME'],
  8397. "server_priv", "hooks", hook_py)
  8398. hfile = os.path.join(self.pbs_conf['PBS_HOME'],
  8399. "server_priv", "hooks", hook_hk)
  8400. logmsg = hook_py + ";copy hook-related file request received"
  8401. cmd = os.path.join(self.client_conf['PBS_EXEC'], 'bin',
  8402. 'pbsnodes') + ' -a'
  8403. cmd_out = self.du.run_cmd(self.hostname, cmd, sudo=True)
  8404. if cmd_out['rc'] == 0:
  8405. for i in cmd_out['out']:
  8406. if re.match(r'\s+Mom = ', i):
  8407. mom_names = i.split(' = ')[1].split(',')
  8408. for m in mom_names:
  8409. if m in self.moms:
  8410. self.log_match(
  8411. "successfully sent hook file %s to %s"
  8412. % (hfile, m), interval=1)
  8413. self.log_match(
  8414. "successfully sent hook file %s to %s"
  8415. % (pyfile, m), interval=1)
  8416. self.moms[m].log_match(logmsg, starttime=t)
  8417. else:
  8418. return False
  8419. except PtlLogMatchError:
  8420. return False
  8421. return ret
  8422. def evaluate_formula(self, jobid=None, formula=None, full=True,
  8423. include_running_jobs=False, exclude_subjobs=True):
  8424. """
  8425. Evaluate the job sort formula
  8426. :param jobid: If set, evaluate the formula for the given
  8427. jobid, if not set,formula is evaluated for
  8428. all jobs in state Q
  8429. :type jobid: str or None
  8430. :param formula: If set use the given formula. If not set,
  8431. the server's formula, if any, is used
  8432. :param full: If True, returns a dictionary of job
  8433. identifiers as keys and the evaluated formula
  8434. as values. Returns None if no formula is used.
  8435. Each job id formula is returned as a tuple
  8436. (s,e) where s is the formula expression
  8437. associated to the job and e is the evaluated
  8438. numeric value of that expression, for example,
  8439. if job_sort_formula is ncpus + mem
  8440. a job requesting 2 cpus and 100kb of memory
  8441. would return ('2 + 100', 102). If False, if
  8442. a jobid is specified, return the integer
  8443. value of the evaluated formula.
  8444. :type full: bool
  8445. :param include_running_jobs: If True, reports formula
  8446. value of running jobs.
  8447. Defaults to False.
  8448. :type include_running_jobs: bool
  8449. :param exclude_subjobs: If True, only report formula of
  8450. parent job array
  8451. :type exclude_subjobs: bool
  8452. """
  8453. _f_builtins = ['queue_priority', 'job_priority', 'eligible_time',
  8454. 'fair_share_perc']
  8455. if formula is None:
  8456. d = self.status(SERVER, 'job_sort_formula')
  8457. if len(d) > 0 and 'job_sort_formula' in d[0]:
  8458. formula = d[0]['job_sort_formula']
  8459. else:
  8460. return None
  8461. template_formula = self.utils._make_template_formula(formula)
  8462. # to split up the formula into keywords, first convert all possible
  8463. # operators into spaces and split the string.
  8464. # TODO: The list of operators may need to be expanded
  8465. T = string.maketrans('()%+*/-', ' ' * 7)
  8466. fres = string.translate(formula, T).split()
  8467. if jobid:
  8468. d = self.status(JOB, id=jobid, extend='t')
  8469. else:
  8470. d = self.status(JOB, extend='t')
  8471. ret = {}
  8472. for job in d:
  8473. if not include_running_jobs and job['job_state'] != 'Q':
  8474. continue
  8475. f_value = {}
  8476. # initialize the formula values to 0
  8477. for res in fres:
  8478. f_value[res] = 0
  8479. if 'queue_priority' in fres:
  8480. queue = self.status(JOB, 'queue', id=job['id'])[0]['queue']
  8481. d = self.status(QUEUE, 'Priority', id=queue)
  8482. if d and 'Priority' in d[0]:
  8483. qprio = int(d[0]['Priority'])
  8484. f_value['queue_priority'] = qprio
  8485. else:
  8486. continue
  8487. if 'job_priority' in fres:
  8488. if 'Priority' in job:
  8489. jprio = int(job['Priority'])
  8490. f_value['job_priority'] = jprio
  8491. else:
  8492. continue
  8493. if 'eligible_time' in fres:
  8494. if 'eligible_time' in job:
  8495. f_value['eligible_time'] = self.utils.convert_duration(
  8496. job['eligible_time'])
  8497. if 'fair_share_perc' in fres:
  8498. if self.schedulers[self.dflt_sched_name] is None:
  8499. self.schedulers[self.dflt_sched_name] = Scheduler(
  8500. server=self)
  8501. if 'fairshare_entity' in self.schedulers[
  8502. self.dflt_sched_name
  8503. ].sched_config:
  8504. entity = self.schedulers[
  8505. self.dflt_sched_name
  8506. ].sched_config['fairshare_entity']
  8507. else:
  8508. self.logger.error(self.logprefix +
  8509. ' no fairshare entity in sched config')
  8510. continue
  8511. if entity not in job:
  8512. self.logger.error(self.logprefix +
  8513. ' job does not have property ' + entity)
  8514. continue
  8515. try:
  8516. fs_info = self.schedulers[
  8517. self.dflt_sched_name
  8518. ].query_fairshare(
  8519. name=job[entity])
  8520. if fs_info is not None and 'TREEROOT' in fs_info.perc:
  8521. f_value['fair_share_perc'] = \
  8522. (fs_info.perc['TREEROOT'] / 100)
  8523. except PbsFairshareError:
  8524. f_value['fair_share_perc'] = 0
  8525. for job_res, val in job.items():
  8526. val = self.utils.decode_value(val)
  8527. if job_res.startswith('Resource_List.'):
  8528. job_res = job_res.replace('Resource_List.', '')
  8529. if job_res in fres and job_res not in _f_builtins:
  8530. f_value[job_res] = val
  8531. tf = string.Template(template_formula)
  8532. tfstr = tf.safe_substitute(f_value)
  8533. if (jobid is not None or not exclude_subjobs or
  8534. (exclude_subjobs and not self.utils.is_subjob(job['id']))):
  8535. ret[job['id']] = (tfstr, eval(tfstr))
  8536. if not full and jobid is not None and jobid in ret:
  8537. return ret[job['id']][1]
  8538. return ret
  8539. def _parse_limits(self, container=None, dictlist=None, id=None,
  8540. db_access=None):
  8541. """
  8542. Helper function to parse limits syntax on a given
  8543. container.
  8544. :param container: The PBS object to query, one of ``QUEUE``
  8545. or ``SERVER``.Metascheduling node group
  8546. limits are not yet queri-able
  8547. :type container: str or None
  8548. :param dictlist: A list of dictionaries off of a batch
  8549. status
  8550. :type diclist: List
  8551. :param id: Optional id of the object to query
  8552. :param db_acccess: set to either file containing credentials
  8553. to DB access or dictionary containing
  8554. ``{'dbname':...,'user':...,'port':...}``
  8555. :type db_access: str or dictionary
  8556. """
  8557. if container is None:
  8558. self.logger.error('parse_limits expect container to be set')
  8559. return {}
  8560. if dictlist is None:
  8561. d = self.status(container, db_access=db_access)
  8562. else:
  8563. d = dictlist
  8564. if not d:
  8565. return {}
  8566. limits = {}
  8567. for obj in d:
  8568. # filter the id here instead of during the stat call so that
  8569. # we can call a full stat once rather than one stat per object
  8570. if id is not None and obj['id'] != id:
  8571. continue
  8572. for k, v in obj.items():
  8573. if k.startswith('max_run'):
  8574. v = v.split(',')
  8575. for rval in v:
  8576. rval = rval.strip("'")
  8577. l = self.utils.parse_fgc_limit(k + '=' + rval)
  8578. if l is None:
  8579. self.logger.error("Couldn't parse limit: " +
  8580. k + str(rval))
  8581. continue
  8582. (lim_type, resource, etype, ename, value) = l
  8583. if (etype, ename) not in self.entities:
  8584. entity = Entity(etype, ename)
  8585. self.entities[(etype, ename)] = entity
  8586. else:
  8587. entity = self.entities[(etype, ename)]
  8588. lim = Limit(lim_type, resource, entity, value,
  8589. container, obj['id'])
  8590. if container in limits:
  8591. limits[container].append(lim)
  8592. else:
  8593. limits[container] = [lim]
  8594. entity.set_limit(lim)
  8595. return limits
  8596. def parse_server_limits(self, server=None, db_access=None):
  8597. """
  8598. Parse all server limits
  8599. :param server: list of dictionary of server data
  8600. :type server: List
  8601. :param db_acccess: set to either file containing credentials
  8602. to DB access or dictionary containing
  8603. ``{'dbname':...,'user':...,'port':...}``
  8604. :type db_access: str or dictionary
  8605. """
  8606. return self._parse_limits(SERVER, server, db_access=db_access)
  8607. def parse_queue_limits(self, queues=None, id=None, db_access=None):
  8608. """
  8609. Parse queue limits
  8610. :param queues: list of dictionary of queue data
  8611. :type queues: List
  8612. :param id: The id of the queue to parse limit for. If None,
  8613. all queue limits are parsed
  8614. :param db_acccess: set to either file containing credentials
  8615. to DB access or dictionary containing
  8616. ``{'dbname':...,'user':...,'port':...}``
  8617. :type db_access: str or dictionary
  8618. """
  8619. return self._parse_limits(QUEUE, queues, id=id, db_access=db_access)
  8620. def parse_all_limits(self, server=None, queues=None, db_access=None):
  8621. """
  8622. Parse all server and queue limits
  8623. :param server: list of dictionary of server data
  8624. :type server: List
  8625. :param queues: list of dictionary of queue data
  8626. :type queues: List
  8627. :param db_acccess: set to either file containing credentials
  8628. to DB access or dictionary containing
  8629. ``{'dbname':...,'user':...,'port':...}``
  8630. :type db_access: str or dictionary
  8631. """
  8632. if hasattr(self, 'limits'):
  8633. del self.limits
  8634. slim = self.parse_server_limits(server, db_access=db_access)
  8635. qlim = self.parse_queue_limits(queues, id=None, db_access=db_access)
  8636. self.limits = dict(slim.items() + qlim.items())
  8637. del slim
  8638. del qlim
  8639. return self.limits
  8640. def limits_info(self, etype=None, ename=None, server=None, queues=None,
  8641. jobs=None, db_access=None, over=False):
  8642. """
  8643. Collect limit information for each entity on which a
  8644. ``server/queue`` limit is applied.
  8645. :param etype: entity type, one of u, g, p, o
  8646. :type etype: str or None
  8647. :param ename: entity name
  8648. :type ename: str or None
  8649. :param server: optional list of dictionary representation
  8650. of server object
  8651. :type server: List
  8652. :param queues: optional list of dictionary representation
  8653. of queues object
  8654. :type queues: List
  8655. :param jobs: optional list of dictionary representation of
  8656. jobs object
  8657. :type jobs: List
  8658. :param db_acccess: set to either file containing credentials
  8659. to DB access or dictionary containing
  8660. ``{'dbname':...,'user':...,'port':...}``
  8661. :type db_access: str or dictionary
  8662. :param over: If True, show only entities that are over their
  8663. limit.Default is False.
  8664. :type over: bool
  8665. :returns: A list of dictionary similar to that returned by
  8666. a converted batch_status object, i.e., can be
  8667. displayed using the Utils.show method
  8668. """
  8669. def create_linfo(lim, entity_type, id, used):
  8670. """
  8671. Create limit information
  8672. :param lim: Limit to apply
  8673. :param entity_type: Type of entity
  8674. """
  8675. tmp = {}
  8676. tmp['id'] = entity_type + ':' + id
  8677. c = [PBS_OBJ_MAP[lim.container]]
  8678. if lim.container_id:
  8679. c += [':', lim.container_id]
  8680. tmp['container'] = "".join(c)
  8681. s = [str(lim.limit_type)]
  8682. if lim.resource:
  8683. s += ['.', lim.resource]
  8684. tmp['limit_type'] = "".join(s)
  8685. tmp['usage/limit'] = "".join([str(used), '/', str(lim.value)])
  8686. tmp['remainder'] = int(lim.value) - int(used)
  8687. return tmp
  8688. def calc_usage(jobs, attr, name=None, resource=None):
  8689. """
  8690. Calculate the usage for the entity
  8691. :param attr: Job attribute
  8692. :param name: Entity name
  8693. :type name: str or None
  8694. :param resource: PBS resource
  8695. :type resource: str or None
  8696. :returns: The usage
  8697. """
  8698. usage = {}
  8699. # initialize usage of the named entity
  8700. if name is not None and name not in ('PBS_GENERIC', 'PBS_ALL'):
  8701. usage[name] = 0
  8702. for j in jobs:
  8703. entity = j[attr]
  8704. if entity not in usage:
  8705. if resource:
  8706. usage[entity] = int(
  8707. self.utils.decode_value(
  8708. j['Resource_List.' + resource]))
  8709. else:
  8710. usage[entity] = 1
  8711. else:
  8712. if resource:
  8713. usage[entity] += int(
  8714. self.utils.decode_value(
  8715. j['Resource_List.' + resource]))
  8716. else:
  8717. usage[entity] += 1
  8718. return usage
  8719. self.parse_all_limits(server, queues, db_access)
  8720. entities_p = self.entities.values()
  8721. linfo = []
  8722. cache = {}
  8723. if jobs is None:
  8724. jobs = self.status(JOB)
  8725. for entity in sorted(entities_p, key=lambda e: e.name):
  8726. for lim in entity.limits:
  8727. _t = entity.type
  8728. # skip non-matching entity types. We can't skip the entity
  8729. # name due to proper handling of the PBS_GENERIC limits
  8730. # we also can't skip overall limits
  8731. if (_t != 'o') and (etype is not None and etype != _t):
  8732. continue
  8733. _n = entity.name
  8734. a = {}
  8735. if lim.container == QUEUE and lim.container_id is not None:
  8736. a['queue'] = (EQ, lim.container_id)
  8737. if lim.resource:
  8738. resource = 'Resource_List.' + lim.resource
  8739. a[resource] = (GT, 0)
  8740. a['job_state'] = (EQ, 'R')
  8741. a['substate'] = (EQ, 42)
  8742. if etype == 'u' and ename is not None:
  8743. a['euser'] = (EQ, ename)
  8744. else:
  8745. a['euser'] = (SET, '')
  8746. if etype == 'g' and ename is not None:
  8747. a['egroup'] = (EQ, ename)
  8748. else:
  8749. a['egroup'] = (SET, '')
  8750. if etype == 'p' and ename is not None:
  8751. a['project'] = (EQ, ename)
  8752. else:
  8753. a['project'] = (SET, '')
  8754. # optimization: cache filtered results
  8755. d = None
  8756. for v in cache.keys():
  8757. if cmp(a, eval(v)) == 0:
  8758. d = cache[v]
  8759. break
  8760. if d is None:
  8761. d = self.filter(JOB, a, bslist=jobs, attrop=PTL_AND,
  8762. idonly=False, db_access=db_access)
  8763. cache[str(a)] = d
  8764. if not d or 'job_state=R' not in d:
  8765. # in the absence of jobs, display limits defined with usage
  8766. # of 0
  8767. if ename is not None:
  8768. _u = {ename: 0}
  8769. else:
  8770. _u = {_n: 0}
  8771. else:
  8772. if _t in ('u', 'o'):
  8773. _u = calc_usage(
  8774. d['job_state=R'], 'euser', _n, lim.resource)
  8775. # an overall limit applies across all running jobs
  8776. if _t == 'o':
  8777. all_used = sum(_u.values())
  8778. for k in _u.keys():
  8779. _u[k] = all_used
  8780. elif _t == 'g':
  8781. _u = calc_usage(
  8782. d['job_state=R'], 'egroup', _n, lim.resource)
  8783. elif _t == 'p':
  8784. _u = calc_usage(
  8785. d['job_state=R'], 'project', _n, lim.resource)
  8786. for k, used in _u.items():
  8787. if not over or (int(used) > int(lim.value)):
  8788. if ename is not None and k != ename:
  8789. continue
  8790. if _n in ('PBS_GENERIC', 'PBS_ALL'):
  8791. if k not in ('PBS_GENERIC', 'PBS_ALL'):
  8792. k += '/' + _n
  8793. elif _n != k:
  8794. continue
  8795. tmp_linfo = create_linfo(lim, _t, k, used)
  8796. linfo.append(tmp_linfo)
  8797. del a
  8798. del cache
  8799. return linfo
  8800. def __insert_jobs_in_db(self, jobs, hostname=None):
  8801. """
  8802. An experimental interface that converts jobs from file
  8803. into entries in the PBS database that can be recovered
  8804. upon server restart if all other ``objects``, ``queues``,
  8805. ``resources``, etc... are already defined.
  8806. The interface to PBS used in this method is incomplete
  8807. and will most likely cause serious issues. Use only for
  8808. development purposes
  8809. """
  8810. if not jobs:
  8811. return []
  8812. if hostname is None:
  8813. hostname = socket.gethostname()
  8814. # a very crude, and not quite maintainale way to get the flag value
  8815. # of an attribute. This is one of the reasons why this conversion
  8816. # of jobs is highly experimental
  8817. flag_map = {'ctime': 9, 'qtime': 9, 'hop_count': 9, 'queue_rank': 9,
  8818. 'queue_type': 9, 'etime': 9, 'job_kill_delay': 9,
  8819. 'run_version': 9, 'job_state': 9, 'exec_host': 9,
  8820. 'exec_host2': 9, 'exec_vnode': 9, 'mtime': 9, 'stime': 9,
  8821. 'substate': 9, 'hashname': 9, 'comment': 9, 'run_count': 9,
  8822. 'schedselect': 13}
  8823. state_map = {'Q': 1, 'H': 2, 'W': 3, 'R': 4, 'E': 5, 'X': 6, 'B': 7}
  8824. job_attr_stmt = ("INSERT INTO pbs.job_attr (ji_jobid, attr_name, "
  8825. "attr_resource, attr_value, attr_flags)")
  8826. job_stmt = ("INSERT INTO pbs.job (ji_jobid, ji_sv_name, ji_state, "
  8827. "ji_substate,ji_svrflags, ji_numattr,"
  8828. " ji_ordering, ji_priority, ji_stime, ji_endtbdry, "
  8829. "ji_queue, ji_destin, ji_un_type, ji_momaddr, "
  8830. "ji_momport, ji_exitstat, ji_quetime, ji_rteretry, "
  8831. "ji_fromsock, ji_fromaddr, ji_4jid, ji_4ash, "
  8832. "ji_credtype, ji_qrank, ji_savetm, ji_creattm)")
  8833. all_stmts = []
  8834. for job in jobs:
  8835. keys = []
  8836. values = []
  8837. flags = []
  8838. for k, v in job.items():
  8839. if k in ('id', 'Mail_Points', 'Mail_Users'):
  8840. continue
  8841. keys.append(k)
  8842. if not v.isdigit():
  8843. values.append("'" + v + "'")
  8844. else:
  8845. values.append(v)
  8846. if k in flag_map:
  8847. flags.append(flag_map[k])
  8848. elif k.startswith('Resource_List'):
  8849. flags.append(15)
  8850. else:
  8851. flags.append(11)
  8852. jobid = job['id'].split('.')[0] + '.' + hostname
  8853. for i in range(len(keys)):
  8854. stmt = job_attr_stmt
  8855. stmt += " VALUES('" + jobid + "', "
  8856. if '.' in keys[i]:
  8857. k, v = keys[i].split('.')
  8858. stmt += "'" + k + "', '" + v + "'" + ", "
  8859. else:
  8860. stmt += "'" + keys[i] + "', ''" + ", "
  8861. stmt += values[i] + "," + str(flags[i])
  8862. stmt += ");"
  8863. self.logger.debug(stmt)
  8864. all_stmts.append(stmt)
  8865. js = job['job_state']
  8866. svrflags = 1
  8867. state = 1
  8868. if js in state_map:
  8869. state = state_map[js]
  8870. if state == 4:
  8871. # Other states svrflags aren't handled and will
  8872. # cause issues, another reason this is highly experimental
  8873. svrflags = 12289
  8874. tm = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
  8875. stmt = job_stmt
  8876. stmt += " VALUES('" + jobid + "', 1, "
  8877. stmt += str(state) + ", " + job['substate']
  8878. stmt += ", " + str(svrflags)
  8879. stmt += ", 0, 0, 0"
  8880. if 'stime' in job:
  8881. print job['stime']
  8882. st = time.strptime(job['stime'], "%a %b %d %H:%M:%S %Y")
  8883. stmt += ", " + str(time.mktime(st))
  8884. else:
  8885. stmt += ", 0"
  8886. stmt += ", 0"
  8887. stmt += ", '" + job['queue'] + "'"
  8888. if 'exec_host2' in job:
  8889. stmt += ", " + job['exec_host2']
  8890. else:
  8891. stmt += ", ''"
  8892. stmt += ", 0, 0, 0, 0, 0, 0, 0, 0, '', '', 0, 0"
  8893. stmt += ", '" + tm + "', '" + tm + "');"
  8894. self.logger.debug(stmt)
  8895. all_stmts.append(stmt)
  8896. return all_stmts
  8897. def clusterize(self, conf_file=None, hosts=None, import_jobs=False,
  8898. db_creds_file=None):
  8899. """
  8900. Mimic a ``pbs_diag`` snapshot onto a set of hosts running
  8901. a PBS ``server``,``scheduler``, and ``MoM``.
  8902. This method clones the following information from the diag:
  8903. ``Server attributes``
  8904. ``Server resourcedef``
  8905. ``Hooks``
  8906. ``Scheduler configuration``
  8907. ``Scheduler resource_group``
  8908. ``Scheduler holiday file``
  8909. ``Per Queue attributes``
  8910. Nodes are copied as a vnode definition file inserted into
  8911. each host's MoM instance.
  8912. Currently no support for cloning the server 'sched' object,
  8913. nor to copy nodes to multi-mom instances.
  8914. Jobs are copied over only if import_jobs is True, see below
  8915. for details
  8916. :param asdiag: Path to the pbs_diag snapshot to use
  8917. :type asdiag: str
  8918. :param conf_file: Configuration file for the MoM instance
  8919. :param hosts: List of hosts on which to clone the diag
  8920. snapshot
  8921. :type hosts: List
  8922. :param include_jobs: [Experimental] if True jobs from the
  8923. pbs_diag are imported into the host's
  8924. database. There are several caveats to
  8925. this option:
  8926. The scripts are not imported
  8927. The users and groups are not created on
  8928. the local system.There are no actual
  8929. processes created on the MoM for each
  8930. job so operations on the job such as
  8931. signals or delete will fail (delete -W
  8932. force will still work)
  8933. :type include_jobs: bool
  8934. :param db_creds_file: Path to file containing credentials
  8935. to access the DB
  8936. :type db_creds_file: str or None
  8937. """
  8938. if not self.has_diag:
  8939. return
  8940. if hosts is None:
  8941. return
  8942. for h in hosts:
  8943. svr = Server(h)
  8944. sched = Scheduler(server=svr, diag=self.diag, diagmap=self.diagmap)
  8945. try:
  8946. svr.manager(MGR_CMD_DELETE, NODE, None, id="")
  8947. except:
  8948. pass
  8949. svr.revert_to_defaults(delqueues=True, delhooks=True)
  8950. local = svr.pbs_conf['PBS_HOME']
  8951. diag_rdef = os.path.join(self.diag, 'server_priv', 'resourcedef')
  8952. diag_sc = os.path.join(self.diag, 'sched_priv', 'sched_config')
  8953. diag_rg = os.path.join(self.diag, 'sched_priv', 'resource_group')
  8954. diag_hldy = os.path.join(self.diag, 'sched_priv', 'holidays')
  8955. nodes = os.path.join(self.diag, 'pbsnodes_va.out')
  8956. diag_hooks = os.path.join(self.diag, 'qmgr_ph.out')
  8957. diag_ps = os.path.join(self.diag, 'qmgr_ps.out')
  8958. local_rdef = os.path.join(local, 'server_priv', 'resourcedef')
  8959. local_sc = os.path.join(local, 'sched_priv', 'sched_config')
  8960. local_rg = os.path.join(local, 'sched_priv', 'resource_group')
  8961. local_hldy = os.path.join(local, 'sched_priv', 'holidays')
  8962. _fcopy = [(diag_rdef, local_rdef), (diag_sc, local_sc),
  8963. (diag_rg, local_rg), (diag_hldy, local_hldy)]
  8964. # Restart since resourcedef may have changed
  8965. svr.restart()
  8966. if os.path.isfile(diag_ps):
  8967. tmp_ps = open(diag_ps)
  8968. cmd = [os.path.join(svr.pbs_conf['PBS_EXEC'], 'bin', 'qmgr')]
  8969. self.du.run_cmd(h, cmd, stdin=tmp_ps, sudo=True, logerr=False)
  8970. tmp_ps.close()
  8971. # Unset any site-sensitive attributes
  8972. for a in ['pbs_license_info', 'manager', 'operators',
  8973. 'mail_from', 'acl_roots', 'acl_hosts']:
  8974. try:
  8975. svr.manager(MGR_CMD_UNSET, SERVER, a, sudo=True)
  8976. except:
  8977. pass
  8978. for (d, l) in _fcopy:
  8979. if os.path.isfile(d):
  8980. self.logger.info('copying ' + d + ' to ' + l)
  8981. self.du.run_copy(h, src=d, dest=l, sudo=True)
  8982. diag_sched = self.status(SCHED)
  8983. for ds in diag_sched:
  8984. for k, v in ds.items():
  8985. if k != 'id':
  8986. try:
  8987. svr.manager(MGR_CMD_SET, SCHED, {k: v},
  8988. logerr=False)
  8989. except PbsManagerError:
  8990. self.logger.warning(
  8991. 'Skipping sched attribute ' + k)
  8992. sched.signal('-HUP')
  8993. if os.path.isfile(nodes):
  8994. f = open(nodes)
  8995. lines = f.readlines()
  8996. f.close()
  8997. dl = self.utils.convert_to_dictlist(lines)
  8998. vdef = self.utils.dictlist_to_vnodedef(dl)
  8999. if vdef:
  9000. try:
  9001. svr.manager(MGR_CMD_DELETE, NODE, None, "")
  9002. except:
  9003. pass
  9004. MoM(h, pbsconf_file=conf_file).insert_vnode_def(vdef)
  9005. svr.restart()
  9006. svr.manager(MGR_CMD_CREATE, NODE, id=svr.shortname)
  9007. # check if any node is associated to a queue.
  9008. # This is needed because the queues 'hasnodes' attribute
  9009. # does not get set through vnode def update and must be set
  9010. # via qmgr. It only needs to be set once, not for each node
  9011. qtoset = {}
  9012. for n in dl:
  9013. if 'queue' in n and n['queue'] not in qtoset:
  9014. qtoset[n['queue']] = n['id']
  9015. # before setting queue on nodes make sure that the vnode
  9016. # def is all set
  9017. svr.expect(NODE, {'state=free': (GE, len(dl))}, interval=3)
  9018. for k, v in qtoset.items():
  9019. svr.manager(MGR_CMD_SET, NODE, {'queue': k}, id=v)
  9020. # populate hooks
  9021. if os.path.isfile(diag_hooks):
  9022. tmp_hook = open(diag_hooks)
  9023. cmd = [os.path.join(svr.pbs_conf['PBS_EXEC'], 'bin', 'qmgr')]
  9024. self.du.run_cmd(h, cmd, stdin=tmp_hook, sudo=True)
  9025. tmp_hook.close()
  9026. # import jobs
  9027. if import_jobs is not None:
  9028. jobs = self.status(JOB)
  9029. sql_stmt = self.__insert_jobs_in_db(jobs, h)
  9030. print "\n".join(sql_stmt)
  9031. if db_creds_file is not None:
  9032. pass
  9033. class EquivClass(PBSObject):
  9034. """
  9035. Equivalence class holds information on a collection of entities
  9036. grouped according to a set of attributes
  9037. :param attributes: Dictionary of attributes
  9038. :type attributes: Dictionary
  9039. :param entities: List of entities
  9040. :type entities: List
  9041. """
  9042. def __init__(self, name, attributes={}, entities=[]):
  9043. self.name = name
  9044. self.attributes = attributes
  9045. self.entities = entities
  9046. self.logger = logging.getLogger(__name__)
  9047. def add_entity(self, entity):
  9048. """
  9049. Add entities
  9050. :param entity: Entity to add
  9051. :type entity: str
  9052. """
  9053. if entity not in self.entities:
  9054. self.entities.append(entity)
  9055. def __str__(self):
  9056. s = [str(len(self.entities)), ":", ":".join(self.name)]
  9057. return "".join(s)
  9058. def show(self, showobj=False):
  9059. """
  9060. Show the entities
  9061. :param showobj: If true then show the entities
  9062. :type showobj: bool
  9063. """
  9064. s = " && ".join(self.name) + ': '
  9065. if showobj:
  9066. s += str(self.entities)
  9067. else:
  9068. s += str(len(self.entities))
  9069. print s
  9070. return s
  9071. class Resource(PBSObject):
  9072. """
  9073. PBS resource referenced by name, type and flag
  9074. :param name: Resource name
  9075. :type name: str or None
  9076. :param type: Type of resource
  9077. """
  9078. def __init__(self, name=None, type=None, flag=None):
  9079. PBSObject.__init__(self, name)
  9080. self.set_name(name)
  9081. self.set_type(type)
  9082. self.set_flag(flag)
  9083. def set_name(self, name):
  9084. """
  9085. Set the resource name
  9086. """
  9087. self.name = name
  9088. self.attributes['id'] = name
  9089. def set_type(self, type):
  9090. """
  9091. Set the resource type
  9092. """
  9093. self.type = type
  9094. self.attributes['type'] = type
  9095. def set_flag(self, flag):
  9096. """
  9097. Set the flag
  9098. """
  9099. self.flag = flag
  9100. self.attributes['flag'] = flag
  9101. def __str__(self):
  9102. s = [self.attributes['id']]
  9103. if 'type' in self.attributes:
  9104. s.append('type=' + self.attributes['type'])
  9105. if 'flag' in self.attributes:
  9106. s.append('flag=' + self.attributes['flag'])
  9107. return " ".join(s)
  9108. class Holidays():
  9109. """
  9110. Descriptive calss for Holiday file.
  9111. """
  9112. def __init__(self):
  9113. self.year = {'id': "YEAR", 'value': None, 'valid': False}
  9114. self.weekday = {'id': "weekday", 'p': None, 'np': None, 'valid': None,
  9115. 'position': None}
  9116. self.monday = {'id': "monday", 'p': None, 'np': None, 'valid': None,
  9117. 'position': None}
  9118. self.tuesday = {'id': "tuesday", 'p': None, 'np': None, 'valid': None,
  9119. 'position': None}
  9120. self.wednesday = {'id': "wednesday", 'p': None, 'np': None,
  9121. 'valid': None, 'position': None}
  9122. self.thursday = {'id': "thursday", 'p': None, 'np': None,
  9123. 'valid': None, 'position': None}
  9124. self.friday = {'id': "friday", 'p': None, 'np': None, 'valid': None,
  9125. 'position': None}
  9126. self.saturday = {'id': "saturday", 'p': None, 'np': None,
  9127. 'valid': None, 'position': None}
  9128. self.sunday = {'id': "sunday", 'p': None, 'np': None, 'valid': None,
  9129. 'position': None}
  9130. self.days_set = [] # list of set days
  9131. self._days_map = {'weekday': self.weekday, 'monday': self.monday,
  9132. 'tuesday': self.tuesday, 'wednesday': self.wednesday,
  9133. 'thursday': self.thursday, 'friday': self.friday,
  9134. 'saturday': self.saturday, 'sunday': self.sunday}
  9135. self.holidays = [] # list of calendar holidays
  9136. def __str__(self):
  9137. """
  9138. Return the content to write to holidays file as a string
  9139. """
  9140. content = []
  9141. if self.year['valid']:
  9142. content.append(self.year['id'] + "\t" +
  9143. self.year['value'])
  9144. for i in range(0, len(self.days_set)):
  9145. content.append(self.days_set[i]['id'] + "\t" +
  9146. self.days_set[i]['p'] + "\t" +
  9147. self.days_set[i]['np'])
  9148. # Add calendar holidays
  9149. for day in self.holidays:
  9150. content.append(day)
  9151. return "\n".join(content)
  9152. class Scheduler(PBSService):
  9153. """
  9154. Container of Scheduler related properties
  9155. :param hostname: The hostname on which the scheduler instance
  9156. is operating
  9157. :type hostname: str or None
  9158. :param server: A PBS server instance to which this scheduler
  9159. is associated
  9160. :param pbsconf_file: path to a PBS configuration file
  9161. :type pbsconf_file: str or None
  9162. :param diagmap: A dictionary of PBS objects (node,server,etc)
  9163. to mapped files from PBS diag directory
  9164. :type diagmap: Dictionary
  9165. :param diag: path to PBS diag directory (This will overrides
  9166. diagmap)
  9167. :type diag: str or None
  9168. :param db_acccess: set to either file containing credentials
  9169. to DB access or dictionary containing
  9170. ``{'dbname':...,'user':...,'port':...}``
  9171. :type db_access: str or dictionary
  9172. """
  9173. # A vanilla scheduler configuration. This set may change based on
  9174. # updates to PBS
  9175. sched_dflt_config = {
  9176. "backfill": "true ALL",
  9177. "backfill_prime": "false ALL",
  9178. "help_starving_jobs": "true ALL",
  9179. "max_starve": "24:00:00",
  9180. "strict_ordering": "false ALL",
  9181. "provision_policy": "\"aggressive_provision\"",
  9182. "preempt_order": "\"SCR\"",
  9183. "fairshare_entity": "euser",
  9184. "dedicated_prefix": "ded",
  9185. "primetime_prefix": "p_",
  9186. "nonprimetime_prefix": "np_",
  9187. "preempt_queue_prio": "150",
  9188. "preempt_prio": "\"express_queue, normal_jobs\"",
  9189. "load_balancing": "false ALL",
  9190. "prime_exempt_anytime_queues": "false",
  9191. "round_robin": "False all",
  9192. "fairshare_usage_res": "cput",
  9193. "smp_cluster_dist": "pack",
  9194. "fair_share": "false ALL",
  9195. "preempt_sort": "min_time_since_start",
  9196. "node_sort_key": "\"sort_priority HIGH\" ALL",
  9197. "sort_queues": "true ALL",
  9198. "by_queue": "True ALL",
  9199. "preemptive_sched": "true ALL",
  9200. "resources": "\"ncpus, mem, arch, host, vnode, aoe\"",
  9201. "log_filter": "3328 ",
  9202. }
  9203. sched_config_options = ["node_group_key",
  9204. "dont_preempt_starving",
  9205. "fairshare_enforce_no_shares",
  9206. "strict_ordering",
  9207. "resource_unset_infinite",
  9208. "sync_time",
  9209. "unknown_shares",
  9210. "log_filter",
  9211. "dedicated_prefix",
  9212. "load_balancing",
  9213. "help_starving_jobs",
  9214. "max_starve",
  9215. "sort_queues",
  9216. "backfill",
  9217. "primetime_prefix",
  9218. "nonprimetime_prefix",
  9219. "backfill_prime",
  9220. "prime_exempt_anytime_queues",
  9221. "prime_spill",
  9222. "prime_exempt_anytime_queues",
  9223. "prime_spill",
  9224. "resources",
  9225. "mom_resources",
  9226. "smp_cluster_dist",
  9227. "preempt_queue_prio",
  9228. "preempt_suspend",
  9229. "preempt_checkpoint",
  9230. "preempt_requeue",
  9231. "preemptive_sched",
  9232. "dont_preempt_starving",
  9233. "node_group_key",
  9234. "dont_preempt_starving",
  9235. "fairshare_enforce_no_shares",
  9236. "strict_ordering",
  9237. "resource_unset_infinite",
  9238. "provision_policy",
  9239. "resv_confirm_ignore",
  9240. "allow_aoe_calendar",
  9241. "max_job_check",
  9242. "preempt_attempts",
  9243. "update_comments",
  9244. "sort_by",
  9245. "key",
  9246. "preempt_starving",
  9247. "preempt_fairshare",
  9248. "load_balancing_rr",
  9249. "assign_ssinodes",
  9250. "cpus_per_ssinode",
  9251. "mem_per_ssinode",
  9252. "strict_fifo",
  9253. "mem_per_ssinode",
  9254. "strict_fifo"
  9255. ]
  9256. fs_re = '(?P<name>[\S]+)[\s]*:[\s]*Grp:[\s]*(?P<Grp>[-]*[0-9]*)' + \
  9257. '[\s]*cgrp:[\s]*(?P<cgrp>[-]*[0-9]*)[\s]*' + \
  9258. 'Shares:[\s]*(?P<Shares>[-]*[0-9]*)[\s]*Usage:[\s]*' + \
  9259. '(?P<Usage>[0-9]+)[\s]*Perc:[\s]*(?P<Perc>.*)%'
  9260. fs_tag = re.compile(fs_re)
  9261. def __init__(self, hostname=None, server=None, pbsconf_file=None,
  9262. diagmap={}, diag=None, db_access=None, id='default',
  9263. sched_priv=None):
  9264. self.sched_config_file = None
  9265. self.dflt_holidays_file = None
  9266. self.holidays_file = None
  9267. self.sched_config = {}
  9268. self._sched_config_comments = {}
  9269. self._config_order = []
  9270. self.dedicated_time_file = None
  9271. self.dedicated_time = None
  9272. self.dedicated_time_as_str = None
  9273. self.fairshare_tree = None
  9274. self.resource_group = None
  9275. self.holidays_obj = None
  9276. self.server = None
  9277. self.server_dyn_res = None
  9278. self.logger = logging.getLogger(__name__)
  9279. self.db_access = None
  9280. if server is not None:
  9281. self.server = server
  9282. if diag is None and self.server.diag is not None:
  9283. diag = self.server.diag
  9284. if (len(diagmap) == 0) and (len(self.server.diagmap) != 0):
  9285. diagmap = self.server.diagmap
  9286. else:
  9287. self.server = Server(hostname, pbsconf_file=pbsconf_file,
  9288. db_access=db_access, diag=diag,
  9289. diagmap=diagmap)
  9290. if hostname is None:
  9291. hostname = self.server.hostname
  9292. PBSService.__init__(self, hostname, pbsconf_file=pbsconf_file,
  9293. diag=diag, diagmap=diagmap)
  9294. _m = ['scheduler ', self.shortname]
  9295. if pbsconf_file is not None:
  9296. _m += ['@', pbsconf_file]
  9297. _m += [': ']
  9298. self.logprefix = "".join(_m)
  9299. self.pi = PBSInitServices(hostname=self.hostname,
  9300. conf=self.pbs_conf_file)
  9301. self.pbs_conf = self.server.pbs_conf
  9302. self.sc_name = id
  9303. self.dflt_sched_config_file = os.path.join(self.pbs_conf['PBS_EXEC'],
  9304. 'etc', 'pbs_sched_config')
  9305. self.dflt_holidays_file = os.path.join(self.pbs_conf['PBS_EXEC'],
  9306. 'etc', 'pbs_holidays')
  9307. self.dflt_resource_group_file = os.path.join(self.pbs_conf['PBS_EXEC'],
  9308. 'etc',
  9309. 'pbs_resource_group')
  9310. self.setup_sched_priv(sched_priv)
  9311. self.db_access = db_access
  9312. self.version = None
  9313. def setup_sched_priv(self, sched_priv=None):
  9314. """
  9315. Initialize Scheduler() member variables on initialization or if
  9316. sched_priv changes
  9317. """
  9318. if sched_priv is None:
  9319. if 'sched_priv' in self.attributes:
  9320. sched_priv = self.attributes['sched_priv']
  9321. else:
  9322. sched_priv = os.path.join(self.pbs_conf['PBS_HOME'],
  9323. 'sched_priv')
  9324. self.sched_config_file = os.path.join(sched_priv, 'sched_config')
  9325. self.resource_group_file = os.path.join(sched_priv, 'resource_group')
  9326. self.holidays_file = os.path.join(sched_priv, 'holidays')
  9327. if not os.path.exists(sched_priv):
  9328. return
  9329. self.parse_sched_config()
  9330. self.fairshare_tree = self.query_fairshare()
  9331. rg = self.parse_resource_group(self.hostname, self.resource_group_file)
  9332. self.resource_group = rg
  9333. self.holidays_obj = Holidays()
  9334. self.holidays_parse_file(level=logging.DEBUG)
  9335. def initialise_service(self):
  9336. """
  9337. initialise the scheduler object
  9338. """
  9339. PBSService.initialise_service(self)
  9340. try:
  9341. attrs = self.server.status(SCHED, level=logging.DEBUG,
  9342. db_access=self.db_access,
  9343. id=self.sc_name)
  9344. if attrs is not None and len(attrs) > 0:
  9345. self.attributes = attrs[0]
  9346. except (PbsManagerError, PbsStatusError), e:
  9347. self.logger.error('Error querying scheduler %s' % e.msg)
  9348. def isUp(self):
  9349. """
  9350. Check for PBS scheduler up
  9351. """
  9352. return super(Scheduler, self)._isUp(self)
  9353. def signal(self, sig):
  9354. """
  9355. Send a signal to PBS scheduler
  9356. """
  9357. self.logger.info('scheduler ' + self.shortname + ': sent signal ' +
  9358. sig)
  9359. return super(Scheduler, self)._signal(sig, inst=self)
  9360. def get_pid(self):
  9361. """
  9362. Get the PBS scheduler pid
  9363. """
  9364. return super(Scheduler, self)._get_pid(inst=self)
  9365. def all_instance_pids(self):
  9366. """
  9367. Get the all pids for the instance
  9368. """
  9369. return super(Scheduler, self)._all_instance_pids(inst=self)
  9370. def start(self, sched_home=None, args=None, launcher=None):
  9371. """
  9372. Start the scheduler
  9373. :param sched_home: Path to scheduler log and home directory
  9374. :type sched_home: str
  9375. :param args: Arguments required to start the scheduler
  9376. :type args: str
  9377. :param launcher: Optional utility to invoke the launch of the service
  9378. :type launcher: str or list
  9379. """
  9380. if self.attributes['id'] != 'default':
  9381. cmd = [os.path.join(self.pbs_conf['PBS_EXEC'],
  9382. 'sbin', 'pbs_sched')]
  9383. cmd += ['-I', self.attributes['id']]
  9384. cmd += ['-S', str(self.attributes['sched_port'])]
  9385. if sched_home is not None:
  9386. cmd += ['-d', sched_home]
  9387. try:
  9388. ret = self.du.run_cmd(self.hostname, cmd, sudo=True,
  9389. logerr=False, level=logging.INFOCLI)
  9390. except PbsInitServicesError as e:
  9391. raise PbsServiceError(rc=e.rc, rv=e.rv, msg=e.msg)
  9392. self.server.manager(MGR_CMD_LIST, SCHED)
  9393. return ret
  9394. if args is not None or launcher is not None:
  9395. return super(Scheduler, self)._start(inst=self, args=args,
  9396. launcher=launcher)
  9397. else:
  9398. try:
  9399. rv = self.pi.start_sched()
  9400. self._update_pid(self)
  9401. except PbsInitServicesError as e:
  9402. raise PbsServiceError(rc=e.rc, rv=e.rv, msg=e.msg)
  9403. return rv
  9404. def stop(self, sig=None):
  9405. """
  9406. Stop the PBS scheduler
  9407. :param sig: Signal to stop the PBS scheduler
  9408. :type sig: str
  9409. """
  9410. if sig is not None:
  9411. self.logger.info(self.logprefix + 'stopping Scheduler on host ' +
  9412. self.hostname)
  9413. return super(Scheduler, self)._stop(sig, inst=self)
  9414. else:
  9415. try:
  9416. self.pi.stop_sched()
  9417. self.pid = None
  9418. except PbsInitServicesError as e:
  9419. raise PbsServiceError(rc=e.rc, rv=e.rv, msg=e.msg)
  9420. return True
  9421. def restart(self):
  9422. """
  9423. Restart the PBS scheduler
  9424. """
  9425. if self.isUp():
  9426. if not self.stop():
  9427. return False
  9428. return self.start()
  9429. def log_match(self, msg=None, id=None, n=50, tail=True, allmatch=False,
  9430. regexp=False, max_attempts=None, interval=None,
  9431. starttime=None, endtime=None, level=logging.INFO,
  9432. existence=True):
  9433. """
  9434. Match given ``msg`` in given ``n`` lines of Scheduler log
  9435. :param msg: log message to match, can be regex also when
  9436. ``regexp`` is True
  9437. :type msg: str
  9438. :param id: The id of the object to trace. Only used for
  9439. tracejob
  9440. :type id: str
  9441. :param n: 'ALL' or the number of lines to search through,
  9442. defaults to 50
  9443. :type n: str or int
  9444. :param tail: If true (default), starts from the end of
  9445. the file
  9446. :type tail: bool
  9447. :param allmatch: If True all matching lines out of then
  9448. parsed are returned as a list. Defaults
  9449. to False
  9450. :type allmatch: bool
  9451. :param regexp: If true msg is a Python regular expression.
  9452. Defaults to False
  9453. :type regexp: bool
  9454. :param max_attempts: the number of attempts to make to find
  9455. a matching entry
  9456. :type max_attempts: int
  9457. :param interval: the interval between attempts
  9458. :type interval: int
  9459. :param starttime: If set ignore matches that occur before
  9460. specified time
  9461. :type starttime: int
  9462. :param endtime: If set ignore matches that occur after
  9463. specified time
  9464. :type endtime: int
  9465. :param level: The logging level, defaults to INFO
  9466. :type level: int
  9467. :param existence: If True (default), check for existence of
  9468. given msg, else check for non-existence of
  9469. given msg.
  9470. :type existence: bool
  9471. :return: (x,y) where x is the matching line
  9472. number and y the line itself. If allmatch is True,
  9473. a list of tuples is returned.
  9474. :rtype: tuple
  9475. :raises PtlLogMatchError:
  9476. When ``existence`` is True and given
  9477. ``msg`` is not found in ``n`` line
  9478. Or
  9479. When ``existence`` is False and given
  9480. ``msg`` found in ``n`` line.
  9481. .. note:: The matching line number is relative to the record
  9482. number, not the absolute line number in the file.
  9483. """
  9484. return self._log_match(self, msg, id, n, tail, allmatch, regexp,
  9485. max_attempts, interval, starttime, endtime,
  9486. level=level, existence=existence)
  9487. def pbs_version(self):
  9488. """
  9489. Get the version of the scheduler instance
  9490. """
  9491. if self.version:
  9492. return self.version
  9493. version = self.log_match('pbs_version', tail=False)
  9494. if version:
  9495. version = version[1].strip().split('=')[1]
  9496. else:
  9497. version = "unknown"
  9498. self.version = LooseVersion(version)
  9499. return self.version
  9500. def parse_sched_config(self, schd_cnfg=None):
  9501. """
  9502. Parse a sceduling configuration file into a dictionary.
  9503. Special handling of identical keys ``(e.g., node_sort_key)``
  9504. is done by appending a delimiter, '%', between each value
  9505. of the key. When printed back to file, each delimited entry
  9506. gets written on a line of its own. For example, the python
  9507. dictionary entry:
  9508. ``{'node_sort_key':
  9509. ["ncpus HIGH unusued" prime", "node_priority HIH"
  9510. non-prime"]}``
  9511. will get written as:
  9512. ``node_sort_key: "ncpus HIGH unusued" prime``
  9513. ``node_sort_key: "node_priority HIGH" non-prime``
  9514. Returns sched_config dictionary that gets reinitialized
  9515. every time this method is called.
  9516. """
  9517. # sched_config is initialized
  9518. if self.sched_config:
  9519. del(self.sched_config)
  9520. self.sched_config = {}
  9521. self._sched_config_comments = {}
  9522. self._config_order = []
  9523. if schd_cnfg is None:
  9524. if self.sched_config_file is not None:
  9525. schd_cnfg = self.sched_config_file
  9526. else:
  9527. self.logger.error('no scheduler configuration file to parse')
  9528. return False
  9529. try:
  9530. conf_opts = self.du.cat(self.hostname, schd_cnfg,
  9531. sudo=(not self.has_diag),
  9532. level=logging.DEBUG2)['out']
  9533. except:
  9534. self.logger.error('error parsing scheduler configuration')
  9535. return False
  9536. _comment = []
  9537. conf_re = re.compile(
  9538. '[#]?[\s]*(?P<conf_id>[\w]+):[\s]*(?P<conf_val>.*)')
  9539. for line in conf_opts:
  9540. m = conf_re.match(line)
  9541. if m:
  9542. key = m.group('conf_id')
  9543. val = m.group('conf_val')
  9544. # line is a comment, it could be a commented out scheduling
  9545. # option, or the description of an option. It could also be
  9546. # that part of the description is an example setting of the
  9547. # option.
  9548. # We must keep track of commented out options in order to
  9549. # rewrite the configuration in the same order as it was defined
  9550. if line.startswith('#'):
  9551. if key in self.sched_config_options:
  9552. _comment += [line]
  9553. if key in self._sched_config_comments:
  9554. self._sched_config_comments[key] += _comment
  9555. _comment = []
  9556. else:
  9557. self._sched_config_comments[key] = _comment
  9558. _comment = []
  9559. if key not in self._config_order:
  9560. self._config_order.append(key)
  9561. else:
  9562. _comment += [line]
  9563. continue
  9564. if key not in self._sched_config_comments:
  9565. self._sched_config_comments[key] = _comment
  9566. else:
  9567. self._sched_config_comments[key] += _comment
  9568. if key not in self._config_order:
  9569. self._config_order.append(key)
  9570. _comment = []
  9571. if key in self.sched_config:
  9572. if isinstance(self.sched_config[key], list):
  9573. if isinstance(val, list):
  9574. self.sched_config[key].extend(val)
  9575. else:
  9576. self.sched_config[key].append(val)
  9577. else:
  9578. if isinstance(val, list):
  9579. self.sched_config[key] = [self.sched_config[key]]
  9580. self.sched_config[key].extend(val)
  9581. else:
  9582. self.sched_config[key] = [self.sched_config[key],
  9583. val]
  9584. else:
  9585. self.sched_config[key] = val
  9586. else:
  9587. _comment += [line]
  9588. self._sched_config_comments['PTL_SCHED_CONFIG_TAIL'] = _comment
  9589. return True
  9590. def check_defaults(self, config):
  9591. """
  9592. Check the values in argument config against default values
  9593. """
  9594. if len(config.keys()) == 0:
  9595. return
  9596. for k, v in self.sched_dflt_config.items():
  9597. if k in config:
  9598. s1 = v
  9599. s1 = s1.replace(" ", "")
  9600. s1 = s1.replace("\t", "").strip()
  9601. s2 = config[k]
  9602. s2 = s2.replace(" ", "")
  9603. s2 = s2.replace("\t", "").strip()
  9604. if s1 != s2:
  9605. self.logger.debug(k + ' non-default: ' + v +
  9606. ' != ' + config[k])
  9607. def apply_config(self, config=None, validate=True, path=None):
  9608. """
  9609. Apply the configuration specified by config
  9610. :param config: Configurations to set. Default: self.
  9611. sched_config
  9612. :param validate: If True (the default) validate that
  9613. settings did not yield an error.
  9614. Validation is done by parsing the
  9615. scheduler log which, in some cases may
  9616. be slow and therefore undesirable.
  9617. :type validate: bool
  9618. :param path: Optional path to file to which configuration
  9619. is written. If None, the configuration is
  9620. written to PBS_HOME/sched_priv/sched_config
  9621. :type path: str
  9622. :returns: True on success and False otherwise. Success
  9623. means that upon applying the new configuration
  9624. the scheduler did not emit an
  9625. "Error reading line" in its log file.
  9626. """
  9627. if config is None:
  9628. config = self.sched_config
  9629. if len(config) == 0:
  9630. return True
  9631. reconfig_time = int(time.time())
  9632. try:
  9633. fn = self.du.create_temp_file()
  9634. with open(fn, "w") as fd:
  9635. for k in self._config_order:
  9636. if k in config:
  9637. if k in self._sched_config_comments:
  9638. fd.write("\n".join(self._sched_config_comments[k]))
  9639. fd.write("\n")
  9640. v = config[k]
  9641. if isinstance(v, list):
  9642. for val in v:
  9643. fd.write(k + ": " + str(val) + "\n")
  9644. else:
  9645. fd.write(k + ": " + str(v) + "\n")
  9646. elif k in self._sched_config_comments:
  9647. fd.write("\n".join(self._sched_config_comments[k]))
  9648. fd.write("\n")
  9649. for k, v in self.sched_config.items():
  9650. if k not in self._config_order:
  9651. fd.write(k + ": " + str(v).strip() + "\n")
  9652. if 'PTL_SCHED_CONFIG_TAIL' in self._sched_config_comments:
  9653. fd.write("\n".join(
  9654. self._sched_config_comments['PTL_SCHED_CONFIG_TAIL']))
  9655. fd.write("\n")
  9656. if path is None:
  9657. if 'sched_priv' in self.attributes:
  9658. sched_priv = self.attributes['sched_priv']
  9659. else:
  9660. sched_priv = os.path.join(self.pbs_conf['PBS_HOME'],
  9661. "sched_priv")
  9662. sp = os.path.join(sched_priv, "sched_config")
  9663. else:
  9664. sp = path
  9665. self.du.run_copy(self.hostname, fn, sp, preserve_permission=False,
  9666. sudo=True)
  9667. os.remove(fn)
  9668. self.logger.debug(self.logprefix + "updated configuration")
  9669. except:
  9670. m = self.logprefix + 'error in apply_config '
  9671. self.logger.error(m + str(traceback.print_exc()))
  9672. raise PbsSchedConfigError(rc=1, rv=False, msg=m)
  9673. if validate:
  9674. self.get_pid()
  9675. self.signal('-HUP')
  9676. try:
  9677. self.log_match("Sched;reconfigure;Scheduler is reconfiguring",
  9678. n=10, starttime=reconfig_time)
  9679. self.log_match("Error reading line", n=10, max_attempts=2,
  9680. starttime=reconfig_time, existence=False)
  9681. except PtlLogMatchError:
  9682. _msg = 'Error in validating sched_config changes'
  9683. raise PbsSchedConfigError(rc=1, rv=False,
  9684. msg=_msg)
  9685. return True
  9686. def set_sched_config(self, confs={}, apply=True, validate=True):
  9687. """
  9688. set a ``sched_config`` property
  9689. :param confs: dictionary of key value sched_config entries
  9690. :type confs: Dictionary
  9691. :param apply: if True (the default), apply configuration.
  9692. :type apply: bool
  9693. :param validate: if True (the default), validate the
  9694. configuration settings.
  9695. :type validate: bool
  9696. """
  9697. self.parse_sched_config()
  9698. self.logger.info(self.logprefix + "config " + str(confs))
  9699. self.sched_config = dict(self.sched_config.items() + confs.items())
  9700. if apply:
  9701. try:
  9702. self.apply_config(validate=validate)
  9703. except PbsSchedConfigError:
  9704. for k in confs:
  9705. del self.sched_config[k]
  9706. self.apply_config(validate=validate)
  9707. return True
  9708. def add_server_dyn_res(self, custom_resource, script_body=None,
  9709. res_file=None, apply=True, validate=True):
  9710. """
  9711. Add a server dynamic resource script or file to the scheduler
  9712. configuration
  9713. :param custom_resource: The name of the custom resource to
  9714. define
  9715. :type custom_resource: str
  9716. :param script_body: The body of the server dynamic resource
  9717. :param res_file: Alternatively to passing the script body, use
  9718. the file instead
  9719. :type res_file: str or None
  9720. :param apply: if True (the default), apply configuration.
  9721. :type apply: bool
  9722. :param validate: if True (the default), validate the
  9723. configuration settings.
  9724. :type validate: bool
  9725. """
  9726. if res_file is not None:
  9727. f = open(file)
  9728. script_body = f.readlines()
  9729. f.close()
  9730. else:
  9731. res_file = self.du.create_temp_file(prefix='PtlPbsSchedConfig',
  9732. body=script_body)
  9733. self.server_dyn_res = res_file
  9734. self.logger.info(self.logprefix + "adding server dyn res " + res_file)
  9735. self.logger.info("-" * 30)
  9736. self.logger.info(script_body)
  9737. self.logger.info("-" * 30)
  9738. self.du.chmod(self.hostname, path=res_file, mode=0755)
  9739. a = {'server_dyn_res': '"' + custom_resource + ' !' + res_file + '"'}
  9740. self.set_sched_config(a, apply=apply, validate=validate)
  9741. def unset_sched_config(self, name, apply=True):
  9742. """
  9743. Delete a ``sched_config`` entry
  9744. :param name: the entry to delete from sched_config
  9745. :type name: str
  9746. :param apply: if True, apply configuration. Defaults to True
  9747. :type apply: bool
  9748. """
  9749. self.parse_sched_config()
  9750. if name not in self.sched_config:
  9751. return True
  9752. self.logger.info(self.logprefix + "unsetting config " + name)
  9753. del self.sched_config[name]
  9754. if apply:
  9755. return self.apply_config()
  9756. def set_dedicated_time_file(self, file):
  9757. """
  9758. Set the path to a dedicated time
  9759. """
  9760. self.logger.info(self.logprefix + " setting dedicated time file to " +
  9761. str(file))
  9762. self.dedicated_time_file = file
  9763. def revert_to_defaults(self):
  9764. """
  9765. Revert scheduler configuration to defaults.
  9766. :returns: True on success, False otherwise
  9767. """
  9768. self.logger.info(self.logprefix +
  9769. "reverting configuration to defaults")
  9770. ignore_attrs = ['id', 'pbs_version', 'sched_host',
  9771. 'state', 'sched_port']
  9772. unsetattrs = []
  9773. for k in self.attributes.keys():
  9774. if k not in ignore_attrs:
  9775. unsetattrs.append(k)
  9776. if len(unsetattrs) > 0:
  9777. self.server.manager(MGR_CMD_UNSET, SCHED, unsetattrs)
  9778. self.clear_dedicated_time(hup=False)
  9779. if self.du.cmp(self.hostname, self.dflt_resource_group_file,
  9780. self.resource_group_file, sudo=True) != 0:
  9781. self.du.run_copy(self.hostname, self.dflt_resource_group_file,
  9782. self.resource_group_file,
  9783. preserve_permission=False,
  9784. sudo=True)
  9785. if self.server_dyn_res is not None:
  9786. self.du.rm(self.hostname, self.server_dyn_res, force=True,
  9787. sudo=True)
  9788. self.server_dyn_res = None
  9789. rc = self.holidays_revert_to_default()
  9790. if self.du.cmp(self.hostname, self.dflt_sched_config_file,
  9791. self.sched_config_file, sudo=True) != 0:
  9792. self.du.run_copy(self.hostname, self.dflt_sched_config_file,
  9793. self.sched_config_file, preserve_permission=False,
  9794. sudo=True)
  9795. self.signal('-HUP')
  9796. # Revert fairshare usage
  9797. cmd = [os.path.join(self.pbs_conf['PBS_EXEC'], 'sbin', 'pbsfs'), '-e']
  9798. if self.sc_name is not 'default':
  9799. cmd += ['-I', self.sc_name]
  9800. self.du.run_cmd(cmd=cmd, sudo=True)
  9801. self.parse_sched_config()
  9802. if self.platform == 'cray' or self.platform == 'craysim':
  9803. self.add_resource('vntype')
  9804. self.add_resource('hbmem')
  9805. self.fairshare_tree = None
  9806. self.resource_group = None
  9807. return self.isUp()
  9808. def create_scheduler(self, sched_home=None):
  9809. """
  9810. Start scheduler with creating required directories for scheduler
  9811. :param sched_home: path of scheduler home and log directory
  9812. :type sched_home: str
  9813. """
  9814. if sched_home is None:
  9815. sched_home = self.server.pbs_conf['PBS_HOME']
  9816. sched_priv_dir = os.path.join(sched_home,
  9817. self.attributes['sched_priv'])
  9818. sched_logs_dir = os.path.join(sched_home,
  9819. self.attributes['sched_log'])
  9820. if not os.path.exists(sched_priv_dir):
  9821. self.du.mkdir(path=sched_priv_dir, sudo=True)
  9822. self.du.run_copy(self.hostname, self.dflt_resource_group_file,
  9823. self.resource_group_file, mode=0644,
  9824. sudo=True)
  9825. self.du.run_copy(self.hostname, self.dflt_holidays_file,
  9826. self.holidays_file, mode=0644, sudo=True)
  9827. self.du.run_copy(self.hostname, self.dflt_sched_config_file,
  9828. self.sched_config_file, mode=0644,
  9829. sudo=True)
  9830. if not os.path.exists(sched_logs_dir):
  9831. self.du.mkdir(path=sched_logs_dir, sudo=True)
  9832. self.setup_sched_priv(sched_priv=sched_priv_dir)
  9833. def save_configuration(self, outfile, mode='a'):
  9834. """
  9835. Save scheduler configuration
  9836. :param outfile: Path to a file to which configuration
  9837. is saved
  9838. :type outfile: str
  9839. :param mode: mode to use to access outfile. Defaults to
  9840. append, 'a'.
  9841. :type mode: str
  9842. :returns: True on success and False otherwise
  9843. """
  9844. conf = {}
  9845. sconf = {MGR_OBJ_SCHED: conf}
  9846. if 'sched_priv' in self.attributes:
  9847. sched_priv = self.attributes['sched_priv']
  9848. else:
  9849. sched_priv = os.path.join(
  9850. self.pbs_conf['PBS_HOME'], 'sched_priv')
  9851. sc = os.path.join(sched_priv, 'sched_config')
  9852. self._save_config_file(conf, sc)
  9853. rg = os.path.join(sched_priv, 'resource_group')
  9854. self._save_config_file(conf, rg)
  9855. dt = os.path.join(sched_priv, 'dedicated_time')
  9856. self._save_config_file(conf, dt)
  9857. hd = os.path.join(sched_priv, 'holidays')
  9858. self._save_config_file(conf, hd)
  9859. try:
  9860. f = open(outfile, mode)
  9861. cPickle.dump(sconf, f)
  9862. f.close()
  9863. except:
  9864. self.logger.error('error saving configuration ' + outfile)
  9865. return False
  9866. return True
  9867. def load_configuration(self, infile):
  9868. """
  9869. load configuration from saved file infile
  9870. """
  9871. self._load_configuration(infile, MGR_OBJ_SCHED)
  9872. def get_resources(self, exclude=[]):
  9873. """
  9874. returns a list of allocatable resources.
  9875. :param exclude: if set, excludes the named resources, if
  9876. they exist, from the resulting list
  9877. :type exclude: List
  9878. """
  9879. if 'resources' not in self.sched_config:
  9880. return None
  9881. resources = self.sched_config['resources']
  9882. resources = resources.replace('"', '')
  9883. resources = resources.replace(' ', '')
  9884. res = resources.split(',')
  9885. if len(exclude) > 0:
  9886. for e in exclude:
  9887. if e in res:
  9888. res.remove(e)
  9889. return res
  9890. def add_resource(self, name, apply=True):
  9891. """
  9892. Add a resource to ``sched_config``.
  9893. :param name: the resource name to add
  9894. :type name: str
  9895. :param apply: if True, apply configuration. Defaults to True
  9896. :type apply: bool
  9897. :returns: True on success and False otherwise.
  9898. Return True if the resource is already defined.
  9899. """
  9900. # if the sched_config has not been read in yet, parse it
  9901. if not self.sched_config:
  9902. self.parse_sched_config()
  9903. if 'resources' in self.sched_config:
  9904. resources = self.sched_config['resources']
  9905. resources = resources.replace('"', '')
  9906. splitres = [r.strip() for r in resources.split(",")]
  9907. if name in splitres:
  9908. return True
  9909. resources = '"' + resources + ', ' + name + '"'
  9910. else:
  9911. resources = '"' + name + '"'
  9912. return self.set_sched_config({'resources': resources}, apply=apply)
  9913. def remove_resource(self, name, apply=True):
  9914. """
  9915. Remove a resource to ``sched_config``.
  9916. :param name: the resource name to remove
  9917. :type name: str
  9918. :param apply: if True, apply configuration. Defaults to True
  9919. :type apply: bool
  9920. :returns: True on success and False otherwise
  9921. """
  9922. # if the sched_config has not been read in yet, parse it
  9923. if not self.sched_config:
  9924. self.parse_sched_config()
  9925. if 'resources' in self.sched_config:
  9926. resources = self.sched_config['resources']
  9927. resources = resources.replace('"', '')
  9928. splitres = [r.strip() for r in resources.split(",")]
  9929. if name not in splitres:
  9930. return True
  9931. newres = []
  9932. for r in splitres:
  9933. if r != name:
  9934. newres.append(r)
  9935. resources = '"' + ",".join(newres) + '"'
  9936. return self.set_sched_config({'resources': resources}, apply=apply)
  9937. def holidays_revert_to_default(self, level=logging.INFO):
  9938. """
  9939. Revert holidays file to default
  9940. """
  9941. self.logger.log(level, self.logprefix +
  9942. "reverting holidays file to default")
  9943. rc = None
  9944. # Copy over the holidays file from PBS_EXEC if it exists
  9945. if self.du.cmp(self.hostname, self.dflt_holidays_file,
  9946. self.holidays_file, sudo=True) != 0:
  9947. ret = self.du.run_copy(self.hostname, self.dflt_holidays_file,
  9948. self.holidays_file,
  9949. preserve_permission=False, sudo=True,
  9950. logerr=True)
  9951. rc = ret['rc']
  9952. # Update the internal data structures for the updated file
  9953. self.holidays_parse_file(level=level)
  9954. else:
  9955. rc = 1
  9956. return rc
  9957. def holidays_parse_file(self, path=None, obj=None, level=logging.INFO):
  9958. """
  9959. Parse the existing holidays file
  9960. :param path: optional path to the holidays file to parse
  9961. :type path: str or None
  9962. :param obj: optional holidays object to be used instead
  9963. of internal
  9964. :returns: The content of holidays file as a list of lines
  9965. """
  9966. self.logger.log(level, self.logprefix + "Parsing holidays file")
  9967. if obj is None:
  9968. obj = self.holidays_obj
  9969. days_map = obj._days_map
  9970. days_set = obj.days_set
  9971. if path is None:
  9972. path = self.holidays_file
  9973. lines = self.du.cat(self.hostname, path, sudo=True)['out']
  9974. content = [] # valid content to return
  9975. self.holidays_delete_entry(
  9976. 'a', apply=False, obj=obj, level=logging.DEBUG)
  9977. for line in lines:
  9978. entry = str(line).split()
  9979. if len(entry) == 0:
  9980. continue
  9981. tag = entry[0].lower()
  9982. if tag == "year": # initialize year
  9983. content.append("\t".join(entry))
  9984. obj.year['valid'] = True
  9985. if len(entry) > 1:
  9986. obj.year['value'] = entry[1]
  9987. elif tag in days_map.keys(): # initialize a day
  9988. content.append("\t".join(entry))
  9989. day = days_map[tag]
  9990. day['valid'] = True
  9991. days_set.append(day)
  9992. day['position'] = len(days_set) - 1
  9993. if len(entry) > 1:
  9994. day['p'] = entry[1]
  9995. if len(entry) > 2:
  9996. day['np'] = entry[2]
  9997. elif tag.isdigit(): # initialize a holiday
  9998. content.append("\t".join(entry))
  9999. obj.holidays.append(tag)
  10000. else:
  10001. pass
  10002. return content
  10003. def holidays_set_day(self, day_id, prime="", nonprime="", apply=True,
  10004. obj=None, level=logging.INFO):
  10005. """
  10006. Set prime time values for a day
  10007. :param day_id: the day to be set (string)
  10008. :type day_id: str
  10009. :param prime: the prime time value
  10010. :param nonprime: the non-prime time value
  10011. :param apply: to reflect the changes to file
  10012. :type apply: bool
  10013. :param obj: optional holidays object to be used instead
  10014. of internal
  10015. :returns: The position ``(0-7)`` of the set day
  10016. """
  10017. self.logger.log(level, self.logprefix +
  10018. "setting holidays file entry for %s",
  10019. day_id)
  10020. if obj is None:
  10021. obj = self.holidays_obj
  10022. day = obj._days_map[str(day_id).lower()]
  10023. days_set = obj.days_set
  10024. if day['valid'] is None: # Fresh entry
  10025. days_set.append(day)
  10026. day['position'] = len(days_set) - 1
  10027. elif day['valid'] is False: # Previously invalidated entry
  10028. days_set.insert(day['position'], day)
  10029. else:
  10030. pass
  10031. day['valid'] = True
  10032. day['p'] = str(prime)
  10033. day['np'] = str(nonprime)
  10034. self.logger.debug("holidays_set_day(): changed day struct: " +
  10035. str(day))
  10036. if apply:
  10037. self.holidays_write_file(obj=obj, level=logging.DEBUG)
  10038. return day['position']
  10039. def holidays_get_day(self, day_id, obj=None, level=logging.INFO):
  10040. """
  10041. :param obj: optional holidays object to be used instead
  10042. of internal
  10043. :param day_id: either a day's name or "all"
  10044. :type day_id: str
  10045. :returns: A copy of info about a day/all set days
  10046. """
  10047. self.logger.log(level, self.logprefix +
  10048. "getting holidays file entry for " +
  10049. day_id)
  10050. if obj is None:
  10051. obj = self.holidays_obj
  10052. days_set = obj.days_set
  10053. days_map = obj._days_map
  10054. if day_id == "all":
  10055. return days_set[:]
  10056. else:
  10057. return days_map[day_id].copy()
  10058. def holidays_reposition_day(self, day_id, new_pos, apply=True, obj=None,
  10059. level=logging.INFO):
  10060. """
  10061. Change position of a day ``(0-7)`` as it appears in the
  10062. holidays file
  10063. :param day_id: name of the day
  10064. :type day_id: str
  10065. :param new_pos: new position
  10066. :param apply: to reflect the changes to file
  10067. :type apply: bool
  10068. :param obj: optional holidays object to be used instead
  10069. of internal
  10070. :returns: The new position of the day
  10071. """
  10072. self.logger.log(level, self.logprefix +
  10073. "repositioning holidays file entry for " +
  10074. day_id + " to position " + str(new_pos))
  10075. if obj is None:
  10076. obj = self.holidays_obj
  10077. days_map = obj._days_map
  10078. days_set = obj.days_set
  10079. day = days_map[str(day_id).lower()]
  10080. if new_pos == day['position']:
  10081. return
  10082. # We also want to update order of invalid days, so add them to
  10083. # days_set temporarily
  10084. invalid_days = []
  10085. for name in days_map:
  10086. if days_map[name]['valid'] is False:
  10087. invalid_days.append(days_map[name])
  10088. days_set += invalid_days
  10089. # Sort the old list
  10090. days_set.sort(key=itemgetter('position'))
  10091. # Change position of 'day_id'
  10092. day['position'] = new_pos
  10093. days_set.remove(day)
  10094. days_set.insert(new_pos, day)
  10095. # Update the 'position' field
  10096. for i in range(0, len(days_set)):
  10097. days_set[i]['position'] = i
  10098. # Remove invalid days from days_set
  10099. len_days_set = len(days_set)
  10100. days_set = [days_set[i] for i in range(0, len_days_set)
  10101. if days_set[i] not in invalid_days]
  10102. self.logger.debug("holidays_reposition_day(): List of days after " +
  10103. " re-positioning " + str(day_id) + " is:\n" +
  10104. str(days_set))
  10105. if apply:
  10106. self.holidays_write_file(obj=obj, level=logging.DEBUG)
  10107. return new_pos
  10108. def holidays_unset_day(self, day_id, apply=True, obj=None,
  10109. level=logging.INFO):
  10110. """
  10111. Unset prime time values for a day
  10112. :param day_id: day to unset (string)
  10113. :type day_id: str
  10114. :param apply: to reflect the changes to file
  10115. :param obj: optional holidays object to be used instead
  10116. of internal
  10117. .. note:: we do not unset the 'valid' field here so the entry
  10118. will still be displayed but without any values
  10119. """
  10120. self.logger.log(level, self.logprefix +
  10121. "unsetting holidays file entry for " + day_id)
  10122. if obj is None:
  10123. obj = self.holidays_obj
  10124. day = obj._days_map[str(day_id).lower()]
  10125. day['p'] = ""
  10126. day['np'] = ""
  10127. if apply:
  10128. self.holidays_write_file(obj=obj, level=logging.DEBUG)
  10129. def holidays_invalidate_day(self, day_id, apply=True, obj=None,
  10130. level=logging.INFO):
  10131. """
  10132. Remove a day's entry from the holidays file
  10133. :param day_id: the day to remove (string)
  10134. :type day_id: str
  10135. :param apply: to reflect the changes to file
  10136. :type apply: bool
  10137. :param obj: optional holidays object to be used instead
  10138. of internal
  10139. """
  10140. self.logger.log(level, self.logprefix +
  10141. "invalidating holidays file entry for " +
  10142. day_id)
  10143. if obj is None:
  10144. obj = self.holidays_obj
  10145. days_map = obj._days_map
  10146. days_set = obj.days_set
  10147. day = days_map[str(day_id).lower()]
  10148. day['valid'] = False
  10149. days_set.remove(day)
  10150. if apply:
  10151. self.holidays_write_file(obj=obj, level=logging.DEBUG)
  10152. def holidays_validate_day(self, day_id, apply=True, obj=None,
  10153. level=logging.INFO):
  10154. """
  10155. Make valid a previously set day's entry
  10156. :param day_id: the day to validate (string)
  10157. :type day_id: str
  10158. :param apply: to reflect the changes to file
  10159. :type apply: bool
  10160. :param obj: optional holidays object to be used instead
  10161. of internal
  10162. .. note:: The day will retain its previous position in
  10163. the file
  10164. """
  10165. self.logger.log(level, self.logprefix +
  10166. "validating holidays file entry for " +
  10167. day_id)
  10168. if obj is None:
  10169. obj = self.holidays_obj
  10170. days_map = obj._days_map
  10171. days_set = obj.days_set
  10172. day = days_map[str(day_id).lower()]
  10173. if day in days_set: # do not insert a pre-existing day
  10174. self.logger.debug("holidays_validate_day(): " +
  10175. day_id + " is already valid!")
  10176. return
  10177. day['valid'] = True
  10178. days_set.insert(day['position'], day)
  10179. if apply:
  10180. self.holidays_write_file(obj=obj, level=logging.DEBUG)
  10181. def holidays_delete_entry(self, entry_type, idx=None, apply=True,
  10182. obj=None, level=logging.INFO):
  10183. """
  10184. Delete ``one/all`` entries from holidays file
  10185. :param entry_type: 'y':year, 'd':day, 'h':holiday or 'a': all
  10186. :type entry_type: str
  10187. :param idx: either a day of week (monday, tuesday etc.)
  10188. or Julian date of a holiday
  10189. :type idx: str or None
  10190. :param apply: to reflect the changes to file
  10191. :type apply: bool
  10192. :param obj: optional holidays object to be used instead of
  10193. internal
  10194. :returns: False if entry_type is invalid, otherwise True
  10195. .. note:: The day cannot be validated and will lose it's
  10196. position in the file
  10197. """
  10198. self.logger.log(level, self.logprefix +
  10199. "Deleting entries from holidays file")
  10200. if obj is None:
  10201. obj = self.holidays_obj
  10202. days_map = obj._days_map
  10203. days_set = obj.days_set
  10204. holiday_list = obj.holidays
  10205. year = obj.year
  10206. if entry_type not in ['a', 'y', 'd', 'h']:
  10207. return False
  10208. if entry_type == 'y' or entry_type == 'a':
  10209. self.logger.debug(self.logprefix +
  10210. "deleting year entry from holidays file")
  10211. # Delete year entry
  10212. year['value'] = None
  10213. year['valid'] = False
  10214. if entry_type == 'd' or entry_type == 'a':
  10215. # Delete one/all day entries
  10216. num_days_to_delete = 1
  10217. if entry_type == 'a':
  10218. self.logger.debug(self.logprefix +
  10219. "deleting all days from holidays file")
  10220. num_days_to_delete = len(days_set)
  10221. for i in range(0, num_days_to_delete):
  10222. if (entry_type == 'd'):
  10223. self.logger.debug(self.logprefix +
  10224. "deleting " + str(idx) +
  10225. " entry from holidays file")
  10226. day = days_map[str(idx).lower()]
  10227. else:
  10228. day = days_set[0]
  10229. day['p'] = None
  10230. day['np'] = None
  10231. day['valid'] = None
  10232. day['position'] = None
  10233. days_set.remove(day)
  10234. if entry_type == 'd':
  10235. # Correct 'position' field of every day
  10236. for i in range(0, len(days_set)):
  10237. days_set[i]['position'] = i
  10238. if entry_type == 'h' or entry_type == 'a':
  10239. # Delete one/all calendar holiday entries
  10240. if entry_type == 'a':
  10241. self.logger.debug(self.logprefix +
  10242. "deleting all holidays from holidays file")
  10243. del holiday_list[:]
  10244. else:
  10245. self.logger.debug(self.logprefix +
  10246. "deleting holiday on " + str(idx) +
  10247. " from holidays file")
  10248. holiday_list.remove(str(idx))
  10249. if apply:
  10250. self.holidays_write_file(obj=obj, level=logging.DEBUG)
  10251. return True
  10252. def holidays_set_year(self, new_year="", apply=True, obj=None,
  10253. level=logging.INFO):
  10254. """
  10255. Set the year value
  10256. :param newyear: year value to set
  10257. :type newyear: str
  10258. :param apply: to reflect the changes to file
  10259. :type apply: bool
  10260. :param obj: optional holidays object to be used instead
  10261. of internal
  10262. """
  10263. self.logger.log(level, self.logprefix +
  10264. "setting holidays file year entry to " +
  10265. str(new_year))
  10266. if obj is None:
  10267. obj = self.holidays_obj
  10268. year = obj.year
  10269. year['value'] = str(new_year)
  10270. year['valid'] = True
  10271. if apply:
  10272. self.holidays_write_file(obj=obj, level=logging.DEBUG)
  10273. def holidays_unset_year(self, apply=True, obj=None, level=logging.INFO):
  10274. """
  10275. Unset the year value
  10276. :param apply: to reflect the changes to file
  10277. :type apply: bool
  10278. :param obj: optional holidays object to be used instead
  10279. of internal
  10280. """
  10281. self.logger.log(level, self.logprefix +
  10282. "unsetting holidays file year entry")
  10283. if obj is None:
  10284. obj = self.holidays_obj
  10285. obj.year['value'] = ""
  10286. if apply:
  10287. self.holidays_write_file(obj=obj, level=logging.DEBUG)
  10288. def holidays_get_year(self, obj=None, level=logging.INFO):
  10289. """
  10290. :param obj: optional holidays object to be used instead
  10291. of internal
  10292. :returns: The year entry of holidays file
  10293. """
  10294. self.logger.log(level, self.logprefix +
  10295. "getting holidays file year entry")
  10296. if obj is None:
  10297. obj = self.holidays_obj
  10298. year = obj.year
  10299. return year.copy()
  10300. def holidays_add_holiday(self, date=None, apply=True, obj=None,
  10301. level=logging.INFO):
  10302. """
  10303. Add a calendar holiday to the holidays file
  10304. :param date: Date value for the holiday
  10305. :param apply: to reflect the changes to file
  10306. :type apply: bool
  10307. :param obj: optional holidays object to be used instead
  10308. of internal
  10309. """
  10310. self.logger.log(level, self.logprefix +
  10311. "adding holiday " + str(date) +
  10312. " to holidays file")
  10313. if obj is None:
  10314. obj = self.holidays_obj
  10315. holiday_list = obj.holidays
  10316. if date is not None:
  10317. holiday_list.append(str(date))
  10318. else:
  10319. pass
  10320. self.logger.debug("holidays list after adding one: " +
  10321. str(holiday_list))
  10322. if apply:
  10323. self.holidays_write_file(obj=obj, level=logging.DEBUG)
  10324. def holidays_get_holidays(self, obj=None, level=logging.INFO):
  10325. """
  10326. :param obj: optional holidays object to be used instead
  10327. of internal
  10328. :returns: The list of holidays in holidays file
  10329. """
  10330. self.logger.log(level, self.logprefix +
  10331. "retrieving list of holidays")
  10332. if obj is None:
  10333. obj = self.holidays_obj
  10334. holiday_list = obj.holidays
  10335. return holiday_list[:]
  10336. def _holidays_process_content(self, content, obj=None):
  10337. """
  10338. Process a user provided list of holidays file content
  10339. :param obj: optional holidays object to be used instead
  10340. of internal
  10341. """
  10342. self.logger.debug("_holidays_process_content(): " +
  10343. "Processing user provided holidays content:\n" +
  10344. str(content))
  10345. if obj is None:
  10346. obj = self.holidays_obj
  10347. days_map = obj._days_map
  10348. year = obj.year
  10349. holiday_list = obj.holidays
  10350. days_set = obj.days_set
  10351. self.holidays_delete_entry(
  10352. 'a', apply=False, obj=obj, level=logging.DEBUG)
  10353. if content is None:
  10354. self.logger.debug("Holidays file was wiped out")
  10355. return
  10356. for line in content:
  10357. entry = line.split()
  10358. if len(entry) == 0:
  10359. continue
  10360. tag = entry[0].lower()
  10361. if tag == "year": # initialize self.year
  10362. year['valid'] = True
  10363. if len(entry) > 1:
  10364. year['value'] = entry[1]
  10365. elif tag in days_map.keys(): # initialize self.<day>
  10366. day = days_map[tag]
  10367. day['valid'] = True
  10368. days_set.append(day)
  10369. day['position'] = len(days_set) - 1
  10370. if len(entry) > 1:
  10371. day['p'] = entry[1]
  10372. if len(entry) > 2:
  10373. day['np'] = entry[2]
  10374. elif tag.isdigit(): # initialize self.holiday
  10375. holiday_list.append(tag)
  10376. else:
  10377. pass
  10378. def holidays_write_file(self, content=None, out_path=None,
  10379. hup=True, obj=None, level=logging.INFO):
  10380. """
  10381. Write to the holidays file with content ``given/generated``
  10382. :param hup: SIGHUP the scheduler after writing the holidays
  10383. file
  10384. :type hup: bool
  10385. :param obj: optional holidays object to be used instead of
  10386. internal
  10387. """
  10388. self.logger.log(level, self.logprefix +
  10389. "Writing to the holidays file")
  10390. if obj is None:
  10391. obj = self.holidays_obj
  10392. if out_path is None:
  10393. out_path = self.holidays_file
  10394. if content is not None:
  10395. self._holidays_process_content(content, obj)
  10396. else:
  10397. content = str(obj)
  10398. self.logger.debug("content being written:\n" + str(content))
  10399. fn = self.du.create_temp_file(self.hostname, body=content)
  10400. ret = self.du.run_copy(self.hostname, fn, out_path,
  10401. preserve_permission=False, sudo=True)
  10402. self.du.rm(self.hostname, fn)
  10403. if ret['rc'] != 0:
  10404. raise PbsSchedConfigError(rc=ret['rc'], rv=ret['out'],
  10405. msg=('error applying holidays file' +
  10406. ret['err']))
  10407. if hup:
  10408. rv = self.signal('-HUP')
  10409. if not rv:
  10410. raise PbsSchedConfigError(rc=1, rv=False,
  10411. msg='error applying holidays file')
  10412. return True
  10413. def parse_dedicated_time(self, file=None):
  10414. """
  10415. Parse the dedicated_time file and populate dedicated times
  10416. as both a string dedicated_time array of dictionaries defined
  10417. as ``[{'from': datetime, 'to': datetime}, ...]`` as well as a
  10418. dedicated_time_as_str array with a string representation of
  10419. each entry
  10420. :param file: optional file to parse. Defaults to the one under
  10421. ``PBS_HOME/sched_priv``
  10422. :type file: str or None
  10423. :returns: The dedicated_time list of dictionaries or None on
  10424. error.Return an empty array if dedicated time file
  10425. is empty.
  10426. """
  10427. self.dedicated_time_as_str = []
  10428. self.dedicated_time = []
  10429. if file:
  10430. dt_file = file
  10431. elif self.dedicated_time_file:
  10432. dt_file = self.dedicated_time_file
  10433. else:
  10434. dt_file = os.path.join(self.pbs_conf['PBS_HOME'], 'sched_priv',
  10435. 'dedicated_time')
  10436. try:
  10437. lines = self.du.cat(self.hostname, dt_file, sudo=True)['out']
  10438. if lines is None:
  10439. return []
  10440. for line in lines:
  10441. if not line.startswith('#') and len(line) > 0:
  10442. self.dedicated_time_as_str.append(line)
  10443. (dtime_from, dtime_to) = self.utils.convert_dedtime(line)
  10444. self.dedicated_time.append({'from': dtime_from,
  10445. 'to': dtime_to})
  10446. except:
  10447. self.logger.error('error in parse_dedicated_time')
  10448. return None
  10449. return self.dedicated_time
  10450. def clear_dedicated_time(self, hup=True):
  10451. """
  10452. Clear the dedicated time file
  10453. """
  10454. self.parse_dedicated_time()
  10455. if ((len(self.dedicated_time) == 0) and
  10456. (len(self.dedicated_time_as_str) == 0)):
  10457. return True
  10458. if self.dedicated_time:
  10459. for d in self.dedicated_time:
  10460. del d
  10461. if self.dedicated_time_as_str:
  10462. for d in self.dedicated_time_as_str:
  10463. del d
  10464. self.dedicated_time = []
  10465. self.dedicated_time_as_str = []
  10466. dt = "# FORMAT: MM/DD/YYYY HH:MM MM/DD/YYYY HH:MM"
  10467. return self.add_dedicated_time(dt, hup=hup)
  10468. def add_dedicated_time(self, as_str=None, start=None, end=None, hup=True):
  10469. """
  10470. Append a dedicated time entry. The function can be called
  10471. in one of two ways, either by passing in start and end as
  10472. time values, or by passing as_str, a string that gets
  10473. appended to the dedicated time entries and formatted as
  10474. follows, note that no check on validity of the format will
  10475. be made the function uses strftime to parse the datetime
  10476. and will fail if the strftime can not convert the string.
  10477. ``MM/DD/YYYY HH:MM MM/DD/YYYY HH:MM``
  10478. :returns: True on success and False otherwise
  10479. """
  10480. if self.dedicated_time is None:
  10481. self.parse_dedicated_time()
  10482. if start is not None and end is not None:
  10483. dtime_from = time.strftime("%m/%d/%Y %H:%M", time.localtime(start))
  10484. dtime_to = time.strftime("%m/%d/%Y %H:%M", time.localtime(end))
  10485. dedtime = dtime_from + " " + dtime_to
  10486. elif as_str is not None:
  10487. (dtime_from, dtime_to) = self.utils.convert_dedtime(as_str)
  10488. dedtime = as_str
  10489. else:
  10490. self.logger.warning("no dedicated from/to specified")
  10491. return True
  10492. for d in self.dedicated_time_as_str:
  10493. if dedtime == d:
  10494. if dtime_from is None or dtime_to is None:
  10495. self.logger.info(self.logprefix +
  10496. "dedicated time already defined")
  10497. else:
  10498. self.logger.info(self.logprefix +
  10499. "dedicated time from " + dtime_from +
  10500. " to " + dtime_to + " already defined")
  10501. return True
  10502. if dtime_from is not None and dtime_to is not None:
  10503. self.logger.info(self.logprefix +
  10504. "adding dedicated time " + dedtime)
  10505. self.dedicated_time_as_str.append(dedtime)
  10506. if dtime_from is not None and dtime_to is not None:
  10507. self.dedicated_time.append({'from': dtime_from, 'to': dtime_to})
  10508. try:
  10509. fn = self.du.create_temp_file()
  10510. with open(fn, "w") as fd:
  10511. for l in self.dedicated_time_as_str:
  10512. fd.write(l + '\n')
  10513. ddfile = os.path.join(self.pbs_conf['PBS_HOME'], 'sched_priv',
  10514. 'dedicated_time')
  10515. self.du.run_copy(self.hostname, fn, ddfile, sudo=True,
  10516. preserve_permission=False)
  10517. os.remove(fn)
  10518. except:
  10519. raise PbsSchedConfigError(rc=1, rv=False,
  10520. msg='error adding dedicated time')
  10521. if hup:
  10522. ret = self.signal('-HUP')
  10523. if ret['rc'] != 0:
  10524. raise PbsSchedConfigError(rc=1, rv=False,
  10525. msg='error adding dedicated time')
  10526. return True
  10527. def terminate(self):
  10528. self.signal('-KILL')
  10529. def valgrind(self):
  10530. """
  10531. run scheduler instance through valgrind
  10532. """
  10533. if self.isUp():
  10534. self.terminate()
  10535. rv = CliUtils().check_bin('valgrind')
  10536. if not rv:
  10537. self.logger.error(self.logprefix + 'valgrind not available')
  10538. return None
  10539. cmd = ['valgrind']
  10540. cmd += ["--log-file=" + os.path.join(tempfile.gettempdir(),
  10541. 'schd.vlgrd')]
  10542. cmd += [os.path.join(self.pbs_conf['PBS_EXEC'], 'sbin', 'pbs_sched')]
  10543. return self.du.run_cmd(self.hostname, cmd, sudo=True)
  10544. def alloc_to_execvnode(self, chunks):
  10545. """
  10546. convert a resource allocation to an execvnode string representation
  10547. """
  10548. execvnode = []
  10549. for chunk in chunks:
  10550. execvnode += ["(" + chunk.vnode]
  10551. for res, val in chunk.resources.items():
  10552. execvnode += [":" + str(res) + "=" + str(val)]
  10553. for vchk in chunk.vchunk:
  10554. execvnode += ["+" + vchk.vnode]
  10555. for res, val in vchk.resources():
  10556. execvnode += [":" + str(res) + "=" + str(val)]
  10557. execvnode += [")+"]
  10558. if len(execvnode) != 0:
  10559. ev = execvnode[len(execvnode) - 1]
  10560. ev = ev[:-1]
  10561. execvnode[len(execvnode) - 1] = ev
  10562. return "".join(execvnode)
  10563. def cycles(self, start=None, end=None, firstN=None, lastN=None):
  10564. """
  10565. Analyze scheduler log and return cycle information
  10566. :param start: Optional setting of the start time to consider
  10567. :param end: Optional setting of the end time to consider
  10568. :param firstN: Optional setting to consider the given first
  10569. N cycles
  10570. :param lastN: Optional setting to consider only the given
  10571. last N cycles
  10572. """
  10573. try:
  10574. from ptl.utils.pbs_logutils import PBSSchedulerLog
  10575. except:
  10576. self.logger.error('error loading ptl.utils.pbs_logutils')
  10577. return None
  10578. if start is not None or end is not None:
  10579. analyze_path = os.path.dirname(self.logfile)
  10580. else:
  10581. analyze_path = self.logfile
  10582. sl = PBSSchedulerLog()
  10583. sl.analyze(analyze_path, start, end, self.hostname)
  10584. cycles = sl.cycles
  10585. if cycles is None or len(cycles) == 0:
  10586. return []
  10587. if lastN is not None:
  10588. return cycles[-lastN:]
  10589. elif firstN is not None:
  10590. return cycles[:firstN]
  10591. return cycles
  10592. def query_fairshare(self, name=None, id=None):
  10593. """
  10594. Parse fairshare data using ``pbsfs`` and populates
  10595. fairshare_tree.If name or id are specified, return the data
  10596. associated to that id.Otherwise return the entire fairshare
  10597. tree
  10598. """
  10599. if self.has_diag:
  10600. return None
  10601. tree = FairshareTree()
  10602. cmd = [os.path.join(self.pbs_conf['PBS_EXEC'], 'sbin', 'pbsfs')]
  10603. if self.sc_name != 'default':
  10604. cmd += ['-I', self.sc_name]
  10605. ret = self.du.run_cmd(self.hostname, cmd, sudo=True, logerr=False)
  10606. if ret['rc'] != 0:
  10607. raise PbsFairshareError(rc=ret['rc'], rv=None,
  10608. msg=str(ret['err']))
  10609. pbsfs = ret['out']
  10610. for p in pbsfs:
  10611. m = self.fs_tag.match(p)
  10612. if m:
  10613. usage = int(m.group('Usage'))
  10614. perc = float(m.group('Perc'))
  10615. nm = m.group('name')
  10616. cgrp = int(m.group('cgrp'))
  10617. pid = int(m.group('Grp'))
  10618. nd = tree.get_node(id=pid)
  10619. if nd:
  10620. pname = nd.parent_name
  10621. else:
  10622. pname = None
  10623. # if an entity has a negative cgroup it should belong
  10624. # to the unknown resource, we work around the fact that
  10625. # PBS Pro (up to 13.0) sets this cgroup id to -1 by
  10626. # reassigning it to 0
  10627. # TODO: cleanup once PBS code is updated
  10628. if cgrp < 0:
  10629. cgrp = 0
  10630. node = FairshareNode(name=nm,
  10631. id=cgrp,
  10632. parent_id=pid,
  10633. parent_name=pname,
  10634. nshares=int(m.group('Shares')),
  10635. usage=usage,
  10636. perc={'TREEROOT': perc})
  10637. if perc:
  10638. node.prio['TREEROOT'] = float(usage) / perc
  10639. if nm == name or id == cgrp:
  10640. return node
  10641. tree.add_node(node, apply=False)
  10642. # now that all nodes are known, update parent and child
  10643. # relationship of the tree
  10644. tree.update()
  10645. for node in tree.nodes.values():
  10646. pnode = node._parent
  10647. while pnode is not None and pnode.id != 0:
  10648. if pnode.perc['TREEROOT']:
  10649. node.perc[pnode.name] = \
  10650. (node.perc['TREEROOT'] * 100 / pnode.perc[
  10651. 'TREEROOT'])
  10652. if pnode.name in node.perc and node.perc[pnode.name]:
  10653. node.prio[pnode.name] = (
  10654. node.usage / node.perc[pnode.name])
  10655. pnode = pnode._parent
  10656. if name:
  10657. n = tree.get_node(name)
  10658. if n is None:
  10659. raise PbsFairshareError(rc=1, rv=None,
  10660. msg='Unknown entity ' + name)
  10661. return n
  10662. if id:
  10663. n = tree.get_node(id=id)
  10664. raise PbsFairshareError(rc=1, rv=None,
  10665. msg='Unknown entity ' + str(id))
  10666. return n
  10667. return tree
  10668. def set_fairshare_usage(self, name=None, usage=None):
  10669. """
  10670. Set the fairshare usage associated to a given entity.
  10671. :param name: The entity to set the fairshare usage of
  10672. :type name: str or :py:class:`~ptl.lib.pbs_testlib.PbsUser` or None
  10673. :param usage: The usage value to set
  10674. """
  10675. if self.has_diag:
  10676. return True
  10677. if name is None:
  10678. self.logger.error(self.logprefix + ' an entity name required')
  10679. return False
  10680. if isinstance(name, PbsUser):
  10681. name = str(name)
  10682. if usage is None:
  10683. self.logger.error(self.logprefix + ' a usage is required')
  10684. return False
  10685. cmd = [os.path.join(self.pbs_conf['PBS_EXEC'], 'sbin', 'pbsfs')]
  10686. if self.sc_name is not 'default':
  10687. cmd += ['-I', self.sc_name]
  10688. cmd += ['-s', name, str(usage)]
  10689. ret = self.du.run_cmd(self.hostname, cmd, sudo=True)
  10690. if ret['rc'] == 0:
  10691. return True
  10692. return False
  10693. def decay_fairshare_tree(self):
  10694. """
  10695. Decay the fairshare tree through pbsfs
  10696. """
  10697. if self.has_diag:
  10698. return True
  10699. cmd = [os.path.join(self.pbs_conf['PBS_EXEC'], 'sbin', 'pbsfs')]
  10700. if self.sc_name is not 'default':
  10701. cmd += ['-I', self.sc_name]
  10702. cmd += ['-d']
  10703. ret = self.du.run_cmd(self.hostname, cmd, sudo=True)
  10704. if ret['rc'] == 0:
  10705. self.fairshare_tree = self.query_fairshare()
  10706. return True
  10707. return False
  10708. def cmp_fairshare_entities(self, name1=None, name2=None):
  10709. """
  10710. Compare two fairshare entities. Wrapper of ``pbsfs -c e1 e2``
  10711. :param name1: name of first entity to compare
  10712. :type name1: str or :py:class:`~ptl.lib.pbs_testlib.PbsUser` or None
  10713. :param name2: name of second entity to compare
  10714. :type name2: str or :py:class:`~ptl.lib.pbs_testlib.PbsUser` or None
  10715. :returns: the name of the entity of higher priority or None on error
  10716. """
  10717. if self.has_diag:
  10718. return None
  10719. if name1 is None or name2 is None:
  10720. self.logger.erro(self.logprefix + 'two fairshare entity names ' +
  10721. 'required')
  10722. return None
  10723. if isinstance(name1, PbsUser):
  10724. name1 = str(name1)
  10725. if isinstance(name2, PbsUser):
  10726. name2 = str(name2)
  10727. cmd = [os.path.join(self.pbs_conf['PBS_EXEC'], 'sbin', 'pbsfs')]
  10728. if self.sc_name is not 'default':
  10729. cmd += ['-I', self.sc_name]
  10730. cmd += ['-c', name1, name2]
  10731. ret = self.du.run_cmd(self.hostname, cmd, sudo=True)
  10732. if ret['rc'] == 0:
  10733. return ret['out'][0]
  10734. return None
  10735. def parse_resource_group(self, hostname=None, resource_group=None):
  10736. """
  10737. Parse the Scheduler's ``resource_group`` file
  10738. :param hostname: The name of the host from which to parse
  10739. resource_group
  10740. :type hostname: str or None
  10741. :param resource_group: The path to a resource_group file
  10742. :type resource_group: str or None
  10743. :returns: A fairshare tree
  10744. """
  10745. if hostname is None:
  10746. hostname = self.hostname
  10747. # if resource_group is None:
  10748. resource_group = self.resource_group_file
  10749. # if has_diag is True acces to sched_priv may not require su privilege
  10750. ret = self.du.cat(hostname, resource_group, sudo=(not self.has_diag))
  10751. if ret['rc'] != 0:
  10752. self.logger.error(hostname + ' error reading ' + resource_group)
  10753. tree = FairshareTree(hostname, resource_group)
  10754. root = FairshareNode('root', -1, parent_id=0, nshares=100)
  10755. tree.add_node(root, apply=False)
  10756. lines = ret['out']
  10757. for line in lines:
  10758. line = line.strip()
  10759. if not line.startswith("#") and len(line) > 0:
  10760. # could have 5th column but we only need the first 4
  10761. (name, id, parent, nshares) = line.split()[:4]
  10762. node = FairshareNode(name, id, parent_name=parent,
  10763. nshares=nshares)
  10764. tree.add_node(node, apply=False)
  10765. tree.update()
  10766. return tree
  10767. def add_to_resource_group(self, name, fairshare_id, parent, nshares,
  10768. validate=True):
  10769. """
  10770. Add an entry to the resource group file
  10771. :param name: The name of the entity to add
  10772. :type name: str or :py:class:`~ptl.lib.pbs_testlib.PbsUser`
  10773. :param fairshare_id: The numeric identifier of the entity to add
  10774. :type fairshare_id: int
  10775. :param parent: The name of the parent group
  10776. :type parent: str
  10777. :param nshares: The number of shares associated to the entity
  10778. :type nshares: int
  10779. :param validate: if True (the default), validate the
  10780. configuration settings.
  10781. :type validate: bool
  10782. """
  10783. if self.resource_group is None:
  10784. self.resource_group = self.parse_resource_group(
  10785. self.hostname, self.resource_group_file)
  10786. if not self.resource_group:
  10787. self.resource_group = FairshareTree(
  10788. self.hostname, self.resource_group_file)
  10789. if isinstance(name, PbsUser):
  10790. name = str(name)
  10791. reconfig_time = int(time.time())
  10792. rc = self.resource_group.create_node(name, fairshare_id,
  10793. parent_name=parent,
  10794. nshares=nshares)
  10795. if validate:
  10796. self.get_pid()
  10797. self.signal('-HUP')
  10798. try:
  10799. self.log_match("Sched;reconfigure;Scheduler is reconfiguring",
  10800. n=10, starttime=reconfig_time)
  10801. self.log_match("fairshare;resgroup: error ", n=10,
  10802. starttime=reconfig_time, existence=False,
  10803. max_attempts=2)
  10804. except PtlLogMatchError:
  10805. _msg = 'Error in validating resource_group changes'
  10806. raise PbsSchedConfigError(rc=1, rv=False,
  10807. msg=_msg)
  10808. return rc
  10809. def job_formula(self, jobid=None, starttime=None, max_attempts=5):
  10810. """
  10811. Extract formula value out of scheduler log
  10812. :param jobid: Optional, the job identifier for which to get
  10813. the formula.
  10814. :type jobid: str or int
  10815. :param starttime: The time at which to start parsing the
  10816. scheduler log
  10817. :param max_attempts: The number of attempts to search for
  10818. formula in the logs
  10819. :type max_attempts: int
  10820. :returns: If jobid is specified, return the formula value
  10821. associated to that job if no jobid is specified,
  10822. returns a dictionary mapping job ids to formula
  10823. """
  10824. if jobid is None:
  10825. jobid = "(?P<jobid>.*)"
  10826. _alljobs = True
  10827. else:
  10828. if isinstance(jobid, int):
  10829. jobid = str(jobid)
  10830. _alljobs = False
  10831. formula_pat = (".*Job;" + jobid +
  10832. ".*;Formula Evaluation = (?P<fval>.*)")
  10833. rv = self.log_match(formula_pat, regexp=True, starttime=starttime,
  10834. n='ALL', allmatch=True, max_attempts=5)
  10835. ret = {}
  10836. if rv:
  10837. for _, l in rv:
  10838. m = re.match(formula_pat, l)
  10839. if m:
  10840. if _alljobs:
  10841. jobid = m.group('jobid')
  10842. ret[jobid] = float(m.group('fval').strip())
  10843. if not _alljobs:
  10844. if jobid in ret:
  10845. return ret[jobid]
  10846. else:
  10847. return
  10848. return ret
  10849. class FairshareTree(object):
  10850. """
  10851. Object representation of the Scheduler's resource_group
  10852. file and pbsfs data
  10853. :param hostname: Hostname of the machine
  10854. :type hostname: str
  10855. """
  10856. du = DshUtils()
  10857. def __init__(self, hostname=None, resource_group=None):
  10858. self.logger = logging.getLogger(__name__)
  10859. self.hostname = hostname
  10860. self.resource_group = resource_group
  10861. self.nodes = {}
  10862. self.root = None
  10863. self._next_id = -1
  10864. def update_resource_group(self):
  10865. if self.resource_group:
  10866. fn = self.du.create_temp_file(body=self.__str__())
  10867. ret = self.du.run_copy(self.hostname, fn, self.resource_group,
  10868. preserve_permission=False, sudo=True)
  10869. os.remove(fn)
  10870. if ret['rc'] != 0:
  10871. raise PbsFairshareError(rc=1, rv=False,
  10872. msg='error updating resource group')
  10873. return True
  10874. def update(self):
  10875. for node in self.nodes.values():
  10876. if node._parent is None:
  10877. pnode = self.get_node(id=node.parent_id)
  10878. if pnode:
  10879. node._parent = pnode
  10880. if node not in pnode._child:
  10881. pnode._child.append(node)
  10882. def _add_node(self, node):
  10883. if node.name == 'TREEROOT' or node.name == 'root':
  10884. self.root = node
  10885. self.nodes[node.name] = node
  10886. if node.parent_name in self.nodes:
  10887. self.nodes[node.parent_name]._child.append(node)
  10888. node._parent = self.nodes[node.parent_name]
  10889. def add_node(self, node, apply=True):
  10890. """
  10891. add node to the fairshare tree
  10892. """
  10893. self._add_node(node)
  10894. if apply:
  10895. return self.update_resource_group()
  10896. return True
  10897. def create_node(self, name, id, parent_name, nshares):
  10898. """
  10899. Add an entry to the ``resource_group`` file
  10900. :param name: The name of the entity to add
  10901. :type name: str
  10902. :param id: The uniqe numeric identifier of the entity
  10903. :type id: int
  10904. :param parent: The name of the parent/group of the entity
  10905. :type parent: str
  10906. :param nshares: The number of shares assigned to this entity
  10907. :type nshares: int
  10908. :returns: True on success, False otherwise
  10909. """
  10910. if name in self.nodes:
  10911. self.logger.warning('fairshare: node ' + name + ' already defined')
  10912. return True
  10913. self.logger.info('creating tree node: ' + name)
  10914. node = FairshareNode(name, id, parent_name=parent_name,
  10915. nshares=nshares)
  10916. self._add_node(node)
  10917. return self.update_resource_group()
  10918. def get_node(self, name=None, id=None):
  10919. """
  10920. Return a node of the fairshare tree identified by either
  10921. name or id.
  10922. :param name: The name of the entity to query
  10923. :type name: str or None
  10924. :param id: The id of the entity to query
  10925. :returns: The fairshare information of the entity when
  10926. found, if not, returns None
  10927. .. note:: The name takes precedence over the id.
  10928. """
  10929. for node in self.nodes.values():
  10930. if name is not None and node.name == name:
  10931. return node
  10932. if id is not None and node.id == id:
  10933. return node
  10934. return None
  10935. def __batch_status__(self):
  10936. """
  10937. Convert fairshare tree object to a batch status format
  10938. """
  10939. dat = []
  10940. for node in self.nodes.values():
  10941. if node.name == 'root':
  10942. continue
  10943. einfo = {}
  10944. einfo['cgroup'] = node.id
  10945. einfo['id'] = node.name
  10946. einfo['group'] = node.parent_id
  10947. einfo['nshares'] = node.nshares
  10948. if len(node.prio) > 0:
  10949. p = []
  10950. for k, v in node.prio.items():
  10951. p += ["%s:%d" % (k, int(v))]
  10952. einfo['penalty'] = ", ".join(p)
  10953. einfo['usage'] = node.usage
  10954. if node.perc:
  10955. p = []
  10956. for k, v in node.perc.items():
  10957. p += ["%s:%.3f" % (k, float(v))]
  10958. einfo['shares_perc'] = ", ".join(p)
  10959. ppnode = self.get_node(id=node.parent_id)
  10960. if ppnode:
  10961. ppname = ppnode.name
  10962. ppid = ppnode.id
  10963. else:
  10964. ppnode = self.get_node(name=node.parent_name)
  10965. if ppnode:
  10966. ppname = ppnode.name
  10967. ppid = ppnode.id
  10968. else:
  10969. ppname = ''
  10970. ppid = None
  10971. einfo['parent'] = "%s (%s) " % (str(ppid), ppname)
  10972. dat.append(einfo)
  10973. return dat
  10974. def get_next_id(self):
  10975. self._next_id -= 1
  10976. return self._next_id
  10977. def __repr__(self):
  10978. return self.__str__()
  10979. def _dfs(self, node, dat):
  10980. if node.name != 'root':
  10981. s = []
  10982. if node.name is not None:
  10983. s += [node.name]
  10984. if node.id is not None:
  10985. s += [str(node.id)]
  10986. if node.parent_name is not None:
  10987. s += [node.parent_name]
  10988. if node.nshares is not None:
  10989. s += [str(node.nshares)]
  10990. if node.usage is not None:
  10991. s += [str(node.usage)]
  10992. dat.append("\t".join(s))
  10993. for n in node._child:
  10994. self._dfs(n, dat)
  10995. def __str__(self):
  10996. dat = []
  10997. if self.root:
  10998. self._dfs(self.root, dat)
  10999. if len(dat) > 0:
  11000. dat += ['\n']
  11001. return "\n".join(dat)
  11002. class FairshareNode(object):
  11003. """
  11004. Object representation of the fairshare data as queryable through
  11005. the command ``pbsfs``.
  11006. :param name: Name of fairshare node
  11007. :type name: str or None
  11008. :param nshares: Number of shares
  11009. :type nshares: int or None
  11010. :param usage: Fairshare usage
  11011. :param perc: Percentage the entity has of the tree
  11012. """
  11013. def __init__(self, name=None, id=None, parent_name=None, parent_id=None,
  11014. nshares=None, usage=None, perc=None):
  11015. self.name = name
  11016. self.id = id
  11017. self.parent_name = parent_name
  11018. self.parent_id = parent_id
  11019. self.nshares = nshares
  11020. self.usage = usage
  11021. self.perc = perc
  11022. self.prio = {}
  11023. self._parent = None
  11024. self._child = []
  11025. def __str__(self):
  11026. ret = []
  11027. if self.name is not None:
  11028. ret.append(self.name)
  11029. if self.id is not None:
  11030. ret.append(str(self.id))
  11031. if self.parent_name is not None:
  11032. ret.append(str(self.parent_name))
  11033. if self.nshares is not None:
  11034. ret.append(str(self.nshares))
  11035. if self.usage is not None:
  11036. ret.append(str(self.usage))
  11037. if self.perc is not None:
  11038. ret.append(str(self.perc))
  11039. return "\t".join(ret)
  11040. class MoM(PBSService):
  11041. """
  11042. Container for MoM properties.
  11043. Provides various MoM operations, such as creation, insertion,
  11044. deletion of vnodes.
  11045. :param name: The hostname of the server. Defaults to calling
  11046. pbs_default()
  11047. :type name: str or None
  11048. :param attrs: Dictionary of attributes to set, these will
  11049. override defaults.
  11050. :type attrs: Dictionary
  11051. :param pbsconf_file: path to config file to parse for
  11052. ``PBS_HOME``, ``PBS_EXEC``, etc
  11053. :type pbsconf_file: str or None
  11054. :param diagmap: A dictionary of PBS objects ``(node,server,etc)``
  11055. to mapped files from PBS diag directory
  11056. :type diagmap: Dictionary
  11057. :param diag: path to PBS diag directory (This will overrides
  11058. diagmap)
  11059. :type diag: str or None
  11060. :param server: A PBS server instance to which this mom is associated
  11061. :param db_acccess: set to either file containing credentials to DB
  11062. access or dictionary containing
  11063. {'dbname':...,'user':...,'port':...}
  11064. :type db_access: str or dictionary
  11065. """
  11066. dflt_attributes = {}
  11067. conf_to_cmd_map = {'PBS_MOM_SERVICE_PORT': '-M',
  11068. 'PBS_MANAGER_SERVICE_PORT': '-R',
  11069. 'PBS_HOME': '-d'}
  11070. def __init__(self, name=None, attrs={}, pbsconf_file=None, diagmap={},
  11071. diag=None, server=None, db_access=None):
  11072. self.logger = logging.getLogger(__name__)
  11073. if server is not None:
  11074. self.server = server
  11075. if diag is None and self.server.diag is not None:
  11076. diag = self.server.diag
  11077. if (len(diagmap) == 0) and (len(self.server.diagmap) != 0):
  11078. diagmap = self.server.diagmap
  11079. else:
  11080. self.server = Server(name, pbsconf_file=pbsconf_file,
  11081. db_access=db_access, diag=diag,
  11082. diagmap=diagmap)
  11083. PBSService.__init__(self, name, attrs, self.dflt_attributes,
  11084. pbsconf_file, diag=diag, diagmap=diagmap)
  11085. _m = ['mom ', self.shortname]
  11086. if pbsconf_file is not None:
  11087. _m += ['@', pbsconf_file]
  11088. _m += [': ']
  11089. self.logprefix = "".join(_m)
  11090. self.pi = PBSInitServices(hostname=self.hostname,
  11091. conf=self.pbs_conf_file)
  11092. self.configd = os.path.join(self.pbs_conf['PBS_HOME'], 'mom_priv',
  11093. 'config.d')
  11094. self.config = {}
  11095. if self.platform == 'cray' or self.platform == 'craysim':
  11096. usecp = os.path.realpath('/home')
  11097. if self.platform == 'cray':
  11098. if os.path.exists('/opt/cray/alps/default/bin/apbasil'):
  11099. alps_client = '/opt/cray/alps/default/bin/apbasil'
  11100. else:
  11101. alps_client = self.du.which(exe='apbasil')
  11102. else:
  11103. alps_client = "/opt/alps/apbasil.sh"
  11104. self.dflt_config = {'$clienthost': self.server.hostname,
  11105. '$vnodedef_additive': 0,
  11106. '$alps_client': alps_client,
  11107. '$usecp': '*:%s %s' % (usecp, usecp)}
  11108. else:
  11109. self.dflt_config = {'$clienthost': self.server.hostname}
  11110. self.version = None
  11111. self._is_cpuset_mom = None
  11112. def isUp(self):
  11113. """
  11114. Check for PBS mom up
  11115. """
  11116. return super(MoM, self)._isUp(self)
  11117. def signal(self, sig):
  11118. """
  11119. Send signal to PBS mom
  11120. """
  11121. self.logger.info(self.logprefix + 'sent signal ' + sig)
  11122. return super(MoM, self)._signal(sig, inst=self)
  11123. def get_pid(self):
  11124. """
  11125. Get the PBS mom pid
  11126. """
  11127. return super(MoM, self)._get_pid(inst=self)
  11128. def all_instance_pids(self):
  11129. """
  11130. Get all pids of a instance
  11131. """
  11132. return super(MoM, self)._all_instance_pids(inst=self)
  11133. def start(self, args=None, launcher=None):
  11134. """
  11135. Start the PBS mom
  11136. :param args: Arguments to start the mom
  11137. :type args: str or None
  11138. :param launcher: Optional utility to invoke the launch of the service
  11139. :type launcher: str or list or None
  11140. """
  11141. if args is not None or launcher is not None:
  11142. return super(MoM, self)._start(inst=self, args=args,
  11143. cmd_map=self.conf_to_cmd_map,
  11144. launcher=launcher)
  11145. else:
  11146. try:
  11147. rv = self.pi.start_mom()
  11148. self._update_pid(self)
  11149. except PbsInitServicesError as e:
  11150. raise PbsServiceError(rc=e.rc, rv=e.rv, msg=e.msg)
  11151. return rv
  11152. def stop(self, sig=None):
  11153. """
  11154. Stop the PBS mom
  11155. :param sig: Signal to stop the PBS mom
  11156. :type sig: str
  11157. """
  11158. if sig is not None:
  11159. self.logger.info(self.logprefix + 'stopping MoM on host ' +
  11160. self.hostname)
  11161. return super(MoM, self)._stop(sig, inst=self)
  11162. else:
  11163. try:
  11164. self.pi.stop_mom()
  11165. self.pid = None
  11166. except PbsInitServicesError as e:
  11167. raise PbsServiceError(rc=e.rc, rv=e.rv, msg=e.msg)
  11168. return True
  11169. def restart(self):
  11170. """
  11171. Restart the PBS mom
  11172. """
  11173. if self.isUp():
  11174. if not self.stop():
  11175. return False
  11176. return self.start()
  11177. def log_match(self, msg=None, id=None, n=50, tail=True, allmatch=False,
  11178. regexp=False, max_attempts=None, interval=None,
  11179. starttime=None, endtime=None, level=logging.INFO,
  11180. existence=True):
  11181. """
  11182. Match given ``msg`` in given ``n`` lines of MoM log
  11183. :param msg: log message to match, can be regex also when
  11184. ``regexp`` is True
  11185. :type msg: str
  11186. :param id: The id of the object to trace. Only used for
  11187. tracejob
  11188. :type id: str
  11189. :param n: 'ALL' or the number of lines to search through,
  11190. defaults to 50
  11191. :type n: str or int
  11192. :param tail: If true (default), starts from the end of
  11193. the file
  11194. :type tail: bool
  11195. :param allmatch: If True all matching lines out of then
  11196. parsed are returned as a list. Defaults
  11197. to False
  11198. :type allmatch: bool
  11199. :param regexp: If true msg is a Python regular expression.
  11200. Defaults to False
  11201. :type regexp: bool
  11202. :param max_attempts: the number of attempts to make to find
  11203. a matching entry
  11204. :type max_attempts: int
  11205. :param interval: the interval between attempts
  11206. :type interval: int
  11207. :param starttime: If set ignore matches that occur before
  11208. specified time
  11209. :type starttime: int
  11210. :param endtime: If set ignore matches that occur after
  11211. specified time
  11212. :type endtime: int
  11213. :param level: The logging level, defaults to INFO
  11214. :type level: int
  11215. :param existence: If True (default), check for existence of
  11216. given msg, else check for non-existence of
  11217. given msg.
  11218. :type existence: bool
  11219. :return: (x,y) where x is the matching line
  11220. number and y the line itself. If allmatch is True,
  11221. a list of tuples is returned.
  11222. :rtype: tuple
  11223. :raises PtlLogMatchError:
  11224. When ``existence`` is True and given
  11225. ``msg`` is not found in ``n`` line
  11226. Or
  11227. When ``existence`` is False and given
  11228. ``msg`` found in ``n`` line.
  11229. .. note:: The matching line number is relative to the record
  11230. number, not the absolute line number in the file.
  11231. """
  11232. return self._log_match(self, msg, id, n, tail, allmatch, regexp,
  11233. max_attempts, interval, starttime, endtime,
  11234. level, existence)
  11235. def pbs_version(self):
  11236. """
  11237. Get the PBS version
  11238. """
  11239. if self.version:
  11240. return self.version
  11241. exe = os.path.join(self.pbs_conf['PBS_EXEC'], 'sbin', 'pbs_mom')
  11242. version = self.du.run_cmd(self.hostname,
  11243. [exe, '--version'], sudo=True)['out']
  11244. if version:
  11245. self.logger.debug(version)
  11246. # in some cases pbs_mom --version may return multiple lines, we
  11247. # only care about the one that carries pbs_version information
  11248. for ver in version:
  11249. if 'pbs_version' in ver:
  11250. version = ver.split('=')[1].strip()
  11251. break
  11252. else:
  11253. version = self.log_match('pbs_version', tail=False)
  11254. if version:
  11255. version = version[1].strip().split('=')[1].strip()
  11256. else:
  11257. version = "unknown"
  11258. self.version = LooseVersion(version)
  11259. return self.version
  11260. def delete_vnodes(self):
  11261. rah = ATTR_rescavail + '.host'
  11262. rav = ATTR_rescavail + '.vnode'
  11263. a = {rah: self.hostname, rav: None}
  11264. try:
  11265. _vs = self.server.status(HOST, a, id=self.hostname)
  11266. except PbsStatusError:
  11267. try:
  11268. _vs = self.server.status(HOST, a, id=self.shortname)
  11269. except PbsStatusError as e:
  11270. err_msg = e.msg[0].rstrip()
  11271. if (err_msg.endswith('Server has no node list') or
  11272. err_msg.endswith('Unknown node')):
  11273. _vs = []
  11274. else:
  11275. raise e
  11276. vs = []
  11277. for v in _vs:
  11278. if v[rav].split('.')[0] != v[rah].split('.')[0]:
  11279. vs.append(v['id'])
  11280. if len(vs) > 0:
  11281. self.server.manager(MGR_CMD_DELETE, VNODE, id=vs)
  11282. def revert_to_defaults(self, delvnodedefs=True):
  11283. """
  11284. 1. ``Revert MoM configuration to defaults.``
  11285. 2. ``Remove epilogue and prologue``
  11286. 3. ``Delete all vnode definitions
  11287. HUP MoM``
  11288. :param delvnodedefs: if True (the default) delete all vnode
  11289. definitions and restart the MoM
  11290. :type delvnodedefs: bool
  11291. :returns: True on success and False otherwise
  11292. """
  11293. self.logger.info(self.logprefix +
  11294. 'reverting configuration to defaults')
  11295. restart = False
  11296. if not self.has_diag:
  11297. self.delete_pelog()
  11298. if delvnodedefs and self.has_vnode_defs():
  11299. restart = True
  11300. if not self.delete_vnode_defs():
  11301. return False
  11302. self.delete_vnodes()
  11303. if cmp(self.config, self.dflt_config) != 0:
  11304. self.apply_config(self.dflt_config, hup=False, restart=False)
  11305. if restart:
  11306. self.restart()
  11307. else:
  11308. self.signal('-HUP')
  11309. return self.isUp()
  11310. return True
  11311. def save_configuration(self, outfile, mode='a'):
  11312. """
  11313. Save a MoM ``mom_priv/config``
  11314. :param outfile: the output file to which onfiguration is
  11315. saved
  11316. :type outfile: str
  11317. :param mode: the mode in which to open outfile to save
  11318. configuration.
  11319. :type mode: str
  11320. :returns: True on success, False on error
  11321. .. note:: first object being saved should open this file
  11322. with 'w' and subsequent calls from other objects
  11323. should save with mode 'a' or 'a+'. Defaults to a+
  11324. """
  11325. conf = {}
  11326. mconf = {MGR_OBJ_NODE: conf}
  11327. mpriv = os.path.join(self.pbs_conf['PBS_HOME'], 'mom_priv')
  11328. cf = os.path.join(mpriv, 'config')
  11329. self._save_config_file(conf, cf)
  11330. if os.path.isdir(os.path.join(mpriv, 'config.d')):
  11331. for f in os.listdir(os.path.join(mpriv, 'config.d')):
  11332. self._save_config_file(conf,
  11333. os.path.join(mpriv, 'config.d', f))
  11334. try:
  11335. f = open(outfile, mode)
  11336. cPickle.dump(mconf, f)
  11337. f.close()
  11338. except:
  11339. self.logger.error('error saving configuration to ' + outfile)
  11340. return False
  11341. return True
  11342. def load_configuration(self, infile):
  11343. """
  11344. load configuration from saved file infile
  11345. """
  11346. self._load_configuration(infile, MGR_OBJ_NODE)
  11347. def is_cray(self):
  11348. """
  11349. Returns True if the version of PBS used was built for Cray platforms
  11350. """
  11351. try:
  11352. self.log_match("alps_client", n='ALL', tail=False, max_attempts=1)
  11353. except PtlLogMatchError:
  11354. return False
  11355. else:
  11356. return True
  11357. def is_cpuset_mom(self):
  11358. """
  11359. Check for cpuset mom
  11360. """
  11361. if self._is_cpuset_mom is not None:
  11362. return self._is_cpuset_mom
  11363. raa = ATTR_rescavail + '.arch'
  11364. a = {raa: None}
  11365. try:
  11366. rv = self.server.status(NODE, a, id=self.shortname)
  11367. except PbsStatusError:
  11368. try:
  11369. rv = self.server.status(NODE, a, id=self.hostname)
  11370. except PbsStatusError as e:
  11371. if e.msg[0].endswith('Server has no node list'):
  11372. return False
  11373. else:
  11374. raise e
  11375. if rv[0][raa] == 'linux_cpuset':
  11376. self._is_cpuset_mom = True
  11377. else:
  11378. self._is_cpuset_mom = False
  11379. return self._is_cpuset_mom
  11380. def create_vnode_def(self, name, attrs={}, numnodes=1, sharednode=True,
  11381. pre='[', post=']', usenatvnode=False, attrfunc=None,
  11382. vnodes_per_host=1):
  11383. """
  11384. Create a vnode definition string representation
  11385. :param name: The prefix for name of vnode to create,
  11386. name of vnode will be prefix + pre + <num> +
  11387. post
  11388. :type name: str
  11389. :param attrs: Dictionary of attributes to set on each vnode
  11390. :type attrs: Dictionary
  11391. :param numnodes: The number of vnodes to create
  11392. :type numnodes: int
  11393. :param sharednode: If true vnodes are shared on a host
  11394. :type sharednode: bool
  11395. :param pre: The symbol preceding the numeric value of that
  11396. vnode.
  11397. :type pre: str
  11398. :param post: The symbol following the numeric value of that
  11399. vnode.
  11400. :type post: str
  11401. :param usenatvnode: use the natural vnode as the first vnode
  11402. to allocate this only makes sense
  11403. starting with PBS 11.3 when natural
  11404. vnodes are reported as a allocatable
  11405. :type usenatvnode: bool
  11406. :param attrfunc: function to customize the attributes,
  11407. signature is (name, numnodes, curnodenum,
  11408. attrs), must return a dict that contains
  11409. new or modified attrs that will be added to
  11410. the vnode def. The function is called once
  11411. per vnode being created, it does not modify
  11412. attrs itself across calls.
  11413. :param vnodes_per_host: number of vnodes per host
  11414. :type vnodes_per_host: int
  11415. :returns: A string representation of the vnode definition
  11416. file
  11417. """
  11418. sethost = False
  11419. attribs = attrs.copy()
  11420. if not sharednode and 'resources_available.host' not in attrs:
  11421. sethost = True
  11422. if attrfunc is None:
  11423. customattrs = attribs
  11424. vdef = ["$configversion 2"]
  11425. # altering the natural vnode information
  11426. if numnodes == 0:
  11427. for k, v in attribs.items():
  11428. vdef += [name + ": " + str(k) + "=" + str(v)]
  11429. else:
  11430. if usenatvnode:
  11431. if attrfunc:
  11432. customattrs = attrfunc(name, numnodes, "", attribs)
  11433. for k, v in customattrs.items():
  11434. vdef += [self.shortname + ": " + str(k) + "=" + str(v)]
  11435. # account for the use of the natural vnode
  11436. numnodes -= 1
  11437. else:
  11438. # ensure that natural vnode is not allocatable by the scheduler
  11439. vdef += [self.shortname + ": resources_available.ncpus=0"]
  11440. vdef += [self.shortname + ": resources_available.mem=0"]
  11441. for n in xrange(numnodes):
  11442. vnid = name + pre + str(n) + post
  11443. if sethost:
  11444. if vnodes_per_host > 1:
  11445. if n % vnodes_per_host == 0:
  11446. _nid = vnid
  11447. else:
  11448. _nid = name + pre + str(n - n % vnodes_per_host) + post
  11449. attribs['resources_available.host'] = _nid
  11450. else:
  11451. attribs['resources_available.host'] = vnid
  11452. if attrfunc:
  11453. customattrs = attrfunc(vnid, numnodes, n, attribs)
  11454. for k, v in customattrs.items():
  11455. vdef += [vnid + ": " + str(k) + "=" + str(v)]
  11456. if numnodes == 0:
  11457. nn = 1
  11458. else:
  11459. nn = numnodes
  11460. if numnodes > 1:
  11461. vnn_msg = ' vnodes '
  11462. else:
  11463. vnn_msg = ' vnode '
  11464. self.logger.info(self.logprefix + 'created ' + str(nn) +
  11465. vnn_msg + name + ' with attr ' +
  11466. str(attribs) + ' on host ' + self.hostname)
  11467. vdef += ["\n"]
  11468. del attribs
  11469. return "\n".join(vdef)
  11470. def parse_config(self):
  11471. """
  11472. Parse mom config file into a dictionary of configuration
  11473. options.
  11474. :returns: A dictionary of configuration options on success,
  11475. and None otherwise
  11476. """
  11477. try:
  11478. mconf = os.path.join(self.pbs_conf['PBS_HOME'], 'mom_priv',
  11479. 'config')
  11480. ret = self.du.cat(self.hostname, mconf, sudo=True)
  11481. if ret['rc'] != 0:
  11482. self.logger.error('error parsing configuration file')
  11483. return None
  11484. self.config = {}
  11485. lines = ret['out']
  11486. for line in lines:
  11487. (k, v) = line.split(' ', 1)
  11488. if k in self.config:
  11489. if isinstance(self.config[k], list):
  11490. self.config[k].append(v)
  11491. else:
  11492. self.config[k] = [self.config[k], v]
  11493. else:
  11494. self.config[k] = v
  11495. except:
  11496. self.logger.error('error in parse_config')
  11497. return None
  11498. return self.config
  11499. def add_config(self, conf={}, hup=True):
  11500. """
  11501. Add config options to mom_priv_config.
  11502. :param conf: The configurations to add to ``mom_priv/config``
  11503. :type conf: Dictionary
  11504. :param hup: If True (default) ``HUP`` the MoM
  11505. :type hup: bool
  11506. :returns: True on success and False otherwise
  11507. """
  11508. doconfig = False
  11509. if not self.config:
  11510. self.parse_config()
  11511. mc = self.config
  11512. if mc is None:
  11513. mc = {}
  11514. for k, v in conf.items():
  11515. if k in mc and (mc[k] == v or (isinstance(v, list) and
  11516. mc[k] in v)):
  11517. self.logger.debug(self.logprefix + 'config ' + k +
  11518. ' already set to ' + str(v))
  11519. continue
  11520. else:
  11521. doconfig = True
  11522. break
  11523. if not doconfig:
  11524. return True
  11525. self.logger.info(self.logprefix + "config " + str(conf))
  11526. return self.apply_config(conf, hup)
  11527. def unset_mom_config(self, name, hup=True):
  11528. """
  11529. Delete a mom_config entry
  11530. :param name: The entry to remove from ``mom_priv/config``
  11531. :type name: String
  11532. :param hup: if True (default) ``HUP`` the MoM
  11533. :type hup: bool
  11534. :returns: True on success and False otherwise
  11535. """
  11536. mc = self.parse_config()
  11537. if mc is None or name not in mc:
  11538. return True
  11539. self.logger.info(self.logprefix + "unsetting config " + name)
  11540. del mc[name]
  11541. return self.apply_config(mc, hup)
  11542. def apply_config(self, conf={}, hup=True, restart=False):
  11543. """
  11544. Apply configuration options to MoM.
  11545. :param conf: A dictionary of configuration options to apply
  11546. to MoM
  11547. :type conf: Dictionary
  11548. :param hup: If True (default) , HUP the MoM to apply the
  11549. configuration
  11550. :type hup: bool
  11551. :returns: True on success and False otherwise.
  11552. """
  11553. self.config = dict(self.config.items() + conf.items())
  11554. try:
  11555. fn = self.du.create_temp_file()
  11556. with open(fn, 'w+') as f:
  11557. for k, v in self.config.items():
  11558. if isinstance(v, list):
  11559. for eachprop in v:
  11560. f.write(str(k) + ' ' + str(eachprop) + '\n')
  11561. else:
  11562. f.write(str(k) + ' ' + str(v) + '\n')
  11563. dest = os.path.join(
  11564. self.pbs_conf['PBS_HOME'], 'mom_priv', 'config')
  11565. self.du.run_copy(self.hostname, fn, dest,
  11566. preserve_permission=False, sudo=True)
  11567. os.remove(fn)
  11568. except:
  11569. raise PbsMomConfigError(rc=1, rv=False,
  11570. msg='error processing add_config')
  11571. if restart:
  11572. return self.restart()
  11573. elif hup:
  11574. return self.signal('-HUP')
  11575. return True
  11576. def get_vnode_def(self, vnodefile=None):
  11577. """
  11578. :returns: A vnode def file as a single string
  11579. """
  11580. if vnodefile is None:
  11581. return None
  11582. f = open(vnodefile)
  11583. lines = f.readlines()
  11584. f.close()
  11585. return "".join(lines)
  11586. def insert_vnode_def(self, vdef, fname=None, additive=False, restart=True):
  11587. """
  11588. Insert and enable a vnode definition. Root privilege
  11589. is required
  11590. :param vdef: The vnode definition string as created by
  11591. create_vnode_def
  11592. :type vdef: str
  11593. :param fname: The filename to write the vnode def string to
  11594. :type fname: str or None
  11595. :param additive: If True, keep all other vnode def files
  11596. under config.d Default is False
  11597. :type additive: bool
  11598. :param delete: If True, delete all nodes known to the server.
  11599. Default is True
  11600. :type delete: bool
  11601. :param restart: If True, restart the MoM. Default is True
  11602. :type restart: bool
  11603. """
  11604. try:
  11605. fn = self.du.create_temp_file(self.hostname, body=vdef)
  11606. except:
  11607. raise PbsMomConfigError(rc=1, rv=False,
  11608. msg="Failed to insert vnode definition")
  11609. if fname is None:
  11610. fname = 'pbs_vnode_' + str(int(time.time())) + '.def'
  11611. if not additive:
  11612. self.delete_vnode_defs()
  11613. cmd = [os.path.join(self.pbs_conf['PBS_EXEC'], 'sbin', 'pbs_mom')]
  11614. cmd += ['-s', 'insert', fname, fn]
  11615. ret = self.du.run_cmd(self.hostname, cmd, sudo=True, logerr=False,
  11616. level=logging.INFOCLI)
  11617. self.du.rm(hostname=self.hostname, path=fn, force=True)
  11618. if ret['rc'] != 0:
  11619. raise PbsMomConfigError(rc=1, rv=False, msg="\n".join(ret['err']))
  11620. msg = self.logprefix + 'inserted vnode definition file '
  11621. msg += fname + ' on host: ' + self.hostname
  11622. self.logger.info(msg)
  11623. if restart:
  11624. self.restart()
  11625. def has_vnode_defs(self):
  11626. """
  11627. Check for vnode definition(s)
  11628. """
  11629. cmd = [os.path.join(self.pbs_conf['PBS_EXEC'], 'sbin', 'pbs_mom')]
  11630. cmd += ['-s', 'list']
  11631. ret = self.du.run_cmd(self.hostname, cmd, sudo=True, logerr=False,
  11632. level=logging.INFOCLI)
  11633. if ret['rc'] == 0:
  11634. files = [x for x in ret['out'] if not x.startswith('PBS')]
  11635. if len(files) > 0:
  11636. return True
  11637. else:
  11638. return False
  11639. else:
  11640. return False
  11641. def delete_vnode_defs(self, vdefname=None):
  11642. """
  11643. delete vnode definition(s) on this MoM
  11644. :param vdefname: name of a vnode definition file to delete,
  11645. if None all vnode definitions are deleted
  11646. :type vdefname: str
  11647. :returns: True if delete succeed otherwise False
  11648. """
  11649. cmd = [os.path.join(self.pbs_conf['PBS_EXEC'], 'sbin', 'pbs_mom')]
  11650. cmd += ['-s', 'list']
  11651. ret = self.du.run_cmd(self.hostname, cmd, sudo=True, logerr=False,
  11652. level=logging.INFOCLI)
  11653. if ret['rc'] != 0:
  11654. return False
  11655. rv = True
  11656. if len(ret['out']) > 0:
  11657. for vnodedef in ret['out']:
  11658. vnodedef = vnodedef.strip()
  11659. if (vnodedef == vdefname) or vdefname is None:
  11660. if vnodedef.startswith('PBS'):
  11661. continue
  11662. cmd = [os.path.join(self.pbs_conf['PBS_EXEC'], 'sbin',
  11663. 'pbs_mom')]
  11664. cmd += ['-s', 'remove', vnodedef]
  11665. ret = self.du.run_cmd(self.hostname, cmd, sudo=True,
  11666. logerr=False, level=logging.INFOCLI)
  11667. if ret['rc'] != 0:
  11668. return False
  11669. else:
  11670. rv = True
  11671. return rv
  11672. def has_pelog(self, filename=None):
  11673. """
  11674. Check for prologue and epilogue
  11675. """
  11676. _has_pro = False
  11677. _has_epi = False
  11678. phome = self.pbs_conf['PBS_HOME']
  11679. prolog = os.path.join(phome, 'mom_priv', 'prologue')
  11680. epilog = os.path.join(phome, 'mom_priv', 'epilogue')
  11681. if self.du.isfile(self.hostname, path=prolog, sudo=True):
  11682. _has_pro = True
  11683. if filename == 'prologue':
  11684. return _has_pro
  11685. if self.du.isfile(self.hostname, path=epilog, sudo=True):
  11686. _has_epi = True
  11687. if filename == 'epilogue':
  11688. return _has_pro
  11689. if _has_epi or _has_pro:
  11690. return True
  11691. return False
  11692. def has_prologue(self):
  11693. """
  11694. Check for prologue
  11695. """
  11696. return self.has_pelog('prolouge')
  11697. def has_epilogue(self):
  11698. """
  11699. Check for epilogue
  11700. """
  11701. return self.has_pelog('epilogue')
  11702. def delete_pelog(self):
  11703. """
  11704. Delete any prologue and epilogue files that may have been
  11705. defined on this MoM
  11706. """
  11707. phome = self.pbs_conf['PBS_HOME']
  11708. prolog = os.path.join(phome, 'mom_priv', 'prologue')
  11709. epilog = os.path.join(phome, 'mom_priv', 'epilogue')
  11710. ret = self.du.rm(self.hostname, epilog, force=True,
  11711. sudo=True, logerr=False)
  11712. if ret:
  11713. ret = self.du.rm(self.hostname, prolog, force=True,
  11714. sudo=True, logerr=False)
  11715. if not ret:
  11716. self.logger.error('problem deleting prologue/epilogue')
  11717. # we don't bail because the problem may be that files did not
  11718. # exist. Let tester fix the issue
  11719. return ret
  11720. def create_pelog(self, body=None, src=None, filename=None):
  11721. """
  11722. create ``prologue`` and ``epilogue`` files, functionality
  11723. accepts either a body of the script or a source file.
  11724. :returns: True on success and False on error
  11725. """
  11726. if self.has_diag:
  11727. _msg = 'MoM is in loaded from diag so bypassing pelog creation'
  11728. self.logger.info(_msg)
  11729. return False
  11730. if (src is None and body is None) or (filename is None):
  11731. self.logger.error('file and body of script are required')
  11732. return False
  11733. pelog = os.path.join(self.pbs_conf['PBS_HOME'], 'mom_priv', filename)
  11734. self.logger.info(self.logprefix +
  11735. ' creating ' + filename + ' with body\n' + '---')
  11736. if body is not None:
  11737. self.logger.info(body)
  11738. src = self.du.create_temp_file(prefix='pbs-pelog', body=body)
  11739. elif src is not None:
  11740. _b = open(src)
  11741. self.logger.info("\n".join(_b.readlines()))
  11742. _b.close()
  11743. self.logger.info('---')
  11744. ret = self.du.run_copy(self.hostname, src, pelog,
  11745. preserve_permission=False, sudo=True)
  11746. if body is not None:
  11747. os.remove(src)
  11748. if ret['rc'] != 0:
  11749. self.logger.error('error creating pelog ')
  11750. return False
  11751. ret = self.du.chown(self.hostname, path=pelog, uid=0, gid=0, sudo=True,
  11752. logerr=False)
  11753. if not ret:
  11754. self.logger.error('error chowning pelog to root')
  11755. return False
  11756. ret = self.du.chmod(self.hostname, path=pelog, mode=0755, sudo=True)
  11757. return ret
  11758. def prologue(self, body=None, src=None):
  11759. """
  11760. create prologue
  11761. """
  11762. return self.create_pelog(body, src, 'prologue')
  11763. def epilogue(self, body=None, src=None):
  11764. """
  11765. Create epilogue
  11766. """
  11767. return self.create_pelog(body, src, 'epilogue')
  11768. def action(self, act, script):
  11769. """
  11770. Define action script. Not currently implemented
  11771. """
  11772. pass
  11773. class Hook(PBSObject):
  11774. """
  11775. PBS hook objects. Holds attributes information and pointer
  11776. to server
  11777. :param name: Hook name
  11778. :type name: str or None
  11779. :param attrs: Hook attributes
  11780. :type attrs: Dictionary
  11781. :param server: Pointer to server
  11782. """
  11783. dflt_attributes = {}
  11784. def __init__(self, name=None, attrs={}, server=None):
  11785. self.logger = logging.getLogger(__name__)
  11786. PBSObject.__init__(self, name, attrs, self.dflt_attributes)
  11787. self.server = server
  11788. class ResourceResv(PBSObject):
  11789. """
  11790. Generic PBS resource reservation, i.e., job or
  11791. ``advance/standing`` reservation
  11792. """
  11793. def execvnode(self, attr='exec_vnode'):
  11794. """
  11795. PBS type execution vnode
  11796. """
  11797. if attr in self.attributes:
  11798. return PbsTypeExecVnode(self.attributes[attr])
  11799. else:
  11800. return None
  11801. def exechost(self):
  11802. """
  11803. PBS type execution host
  11804. """
  11805. if 'exec_host' in self.attributes:
  11806. return PbsTypeExecHost(self.attributes['exec_host'])
  11807. else:
  11808. return None
  11809. def resvnodes(self):
  11810. """
  11811. nodes assigned to a reservation
  11812. """
  11813. if 'resv_nodes' in self.attributes:
  11814. return self.attributes['resv_nodes']
  11815. else:
  11816. return None
  11817. def select(self):
  11818. if hasattr(self, '_select') and self._select is not None:
  11819. return self._select
  11820. if 'schedselect' in self.attributes:
  11821. self._select = PbsTypeSelect(self.attributes['schedselect'])
  11822. elif 'select' in self.attributes:
  11823. self._select = PbsTypeSelect(self.attributes['select'])
  11824. else:
  11825. return None
  11826. return self._select
  11827. @classmethod
  11828. def get_hosts(cls, exechost=None):
  11829. """
  11830. :returns: The hosts portion of the exec_host
  11831. """
  11832. hosts = []
  11833. exechosts = cls.utils.parse_exechost(exechost)
  11834. if exechosts:
  11835. for h in exechosts:
  11836. eh = h.keys()[0]
  11837. if eh not in hosts:
  11838. hosts.append(eh)
  11839. return hosts
  11840. def get_vnodes(self, execvnode=None):
  11841. """
  11842. :returns: The unique vnode names of an execvnode as a list
  11843. """
  11844. if execvnode is None:
  11845. if 'exec_vnode' in self.attributes:
  11846. execvnode = self.attributes['exec_vnode']
  11847. elif 'resv_nodes' in self.attributes:
  11848. execvnode = self.attributes['resv_nodes']
  11849. else:
  11850. return []
  11851. vnodes = []
  11852. execvnodes = PbsTypeExecVnode(execvnode)
  11853. if execvnodes:
  11854. for n in execvnodes:
  11855. ev = n.keys()[0]
  11856. if ev not in vnodes:
  11857. vnodes.append(ev)
  11858. return vnodes
  11859. def walltime(self, attr='Resource_List.walltime'):
  11860. if attr in self.attributes:
  11861. return self.utils.convert_duration(self.attributes[attr])
  11862. class Job(ResourceResv):
  11863. """
  11864. PBS Job. Attributes and Resources
  11865. :param username: Job username
  11866. :type username: str or None
  11867. :param attrs: Job attributes
  11868. :type attrs: Dictionary
  11869. :param jobname: Name of the PBS job
  11870. :type jobname: str or None
  11871. """
  11872. dflt_attributes = {
  11873. ATTR_N: 'STDIN',
  11874. ATTR_j: 'n',
  11875. ATTR_m: 'a',
  11876. ATTR_p: '0',
  11877. ATTR_r: 'y',
  11878. ATTR_k: 'oe',
  11879. }
  11880. runtime = 100
  11881. logger = logging.getLogger(__name__)
  11882. du = DshUtils()
  11883. def __init__(self, username=None, attrs={}, jobname=None):
  11884. self.platform = self.du.get_platform()
  11885. self.server = {}
  11886. self.script = None
  11887. self.script_body = None
  11888. if username is not None:
  11889. self.username = str(username)
  11890. else:
  11891. self.username = None
  11892. self.du = None
  11893. self.interactive_handle = None
  11894. if self.platform == 'cray' or self.platform == 'craysim':
  11895. if 'Resource_List.select' in attrs:
  11896. select = attrs['Resource_List.select']
  11897. attrs['Resource_List.select'] = self.add_cray_vntype(select)
  11898. elif 'Resource_List.vntype' not in attrs:
  11899. attrs['Resource_List.vntype'] = 'cray_compute'
  11900. PBSObject.__init__(self, None, attrs, self.dflt_attributes)
  11901. if jobname is not None:
  11902. self.custom_attrs[ATTR_N] = jobname
  11903. self.attributes[ATTR_N] = jobname
  11904. self.set_variable_list(self.username)
  11905. self.set_sleep_time(100)
  11906. def add_cray_vntype(self, select=None):
  11907. """
  11908. Cray specific function to add vntype as ``cray_compute`` to each
  11909. select chunk
  11910. :param select: PBS select statement
  11911. :type select: str or None
  11912. """
  11913. ra = []
  11914. r = select.split('+')
  11915. for i in r:
  11916. select = PbsTypeSelect(i)
  11917. novntype = 'vntype' not in select.resources
  11918. nohost = 'host' not in select.resources
  11919. novnode = 'vnode' not in select.resources
  11920. if novntype and nohost and novnode:
  11921. i = i + ":vntype=cray_compute"
  11922. ra.append(i)
  11923. select_str = ''
  11924. for l in ra:
  11925. select_str = select_str + "+" + l
  11926. select_str = select_str[1:]
  11927. return select_str
  11928. def set_attributes(self, a={}):
  11929. """
  11930. set attributes and custom attributes on this job.
  11931. custom attributes are used when converting attributes to CLI.
  11932. In case of Cray platform if 'Resource_List.vntype' is set
  11933. already then remove it and add vntype value to each chunk of a
  11934. select statement.
  11935. :param a: Attribute dictionary
  11936. :type a: Dictionary
  11937. """
  11938. if isinstance(a, list):
  11939. a = OrderedDict(a)
  11940. self.attributes = OrderedDict(self.dflt_attributes.items() +
  11941. self.attributes.items() + a.items())
  11942. if self.platform == 'cray' or self.platform == 'craysim':
  11943. s = 'Resource_List.select' in a
  11944. v = 'Resource_List.vntype' in self.custom_attrs
  11945. if s and v:
  11946. del self.custom_attrs['Resource_List.vntype']
  11947. select = a['Resource_List.select']
  11948. a['Resource_List.select'] = self.add_cray_vntype(select)
  11949. self.custom_attrs = OrderedDict(self.custom_attrs.items() +
  11950. a.items())
  11951. def set_variable_list(self, user=None, workdir=None):
  11952. """
  11953. Customize the ``Variable_List`` job attribute to ``<user>``
  11954. """
  11955. if user is None:
  11956. userinfo = pwd.getpwuid(os.getuid())
  11957. user = userinfo[0]
  11958. homedir = userinfo[5]
  11959. else:
  11960. try:
  11961. homedir = pwd.getpwnam(user)[5]
  11962. except:
  11963. homedir = ""
  11964. self.username = user
  11965. s = ['PBS_O_HOME=' + homedir]
  11966. s += ['PBS_O_LANG=en_US.UTF-8']
  11967. s += ['PBS_O_LOGNAME=' + user]
  11968. s += ['PBS_O_PATH=/usr/bin:/bin:/usr/bin:/usr/local/bin']
  11969. s += ['PBS_O_MAIL=/var/spool/mail/' + user]
  11970. s += ['PBS_O_SHELL=/bin/bash']
  11971. s += ['PBS_O_SYSTEM=Linux']
  11972. if workdir is not None:
  11973. wd = workdir
  11974. else:
  11975. wd = os.getcwd()
  11976. s += ['PBS_O_WORKDIR=' + str(wd)]
  11977. self.attributes[ATTR_v] = ",".join(s)
  11978. self.set_attributes()
  11979. def set_sleep_time(self, duration):
  11980. """
  11981. Set the sleep duration for this job.
  11982. :param duration: The duration, in seconds, to sleep
  11983. :type duration: int
  11984. """
  11985. self.set_execargs('/bin/sleep', duration)
  11986. def set_execargs(self, executable, arguments=None):
  11987. """
  11988. Set the executable and arguments to use for this job
  11989. :param executable: path to an executable. No checks are made.
  11990. :type executable: str
  11991. :param arguments: arguments to executable.
  11992. :type arguments: str or list or int
  11993. """
  11994. msg = ['job: executable set to ' + str(executable)]
  11995. if arguments is not None:
  11996. msg += [' with arguments: ' + str(arguments)]
  11997. self.logger.info("".join(msg))
  11998. self.attributes[ATTR_executable] = executable
  11999. if arguments is not None:
  12000. args = ''
  12001. xml_beginargs = '<jsdl-hpcpa:Argument>'
  12002. xml_endargs = '</jsdl-hpcpa:Argument>'
  12003. if isinstance(arguments, list):
  12004. for a in arguments:
  12005. args += xml_beginargs + str(a) + xml_endargs
  12006. elif isinstance(arguments, str):
  12007. args = xml_beginargs + arguments + xml_endargs
  12008. elif isinstance(arguments, int):
  12009. args = xml_beginargs + str(arguments) + xml_endargs
  12010. self.attributes[ATTR_Arglist] = args
  12011. else:
  12012. self.unset_attributes([ATTR_Arglist])
  12013. self.set_attributes()
  12014. def create_script(self, body=None, asuser=None, hostname=None):
  12015. """
  12016. Create a job script from a given body of text into a
  12017. temporary location
  12018. :param body: the body of the script
  12019. :type body: str or None
  12020. :param asuser: Optionally the user to own this script,
  12021. defaults ot current user
  12022. :type asuser: str or None
  12023. :param hostname: The host on which the job script is to
  12024. be created
  12025. :type hostname: str or None
  12026. """
  12027. if body is None:
  12028. return None
  12029. if isinstance(body, list):
  12030. body = '\n'.join(body)
  12031. if self.platform == 'cray' or self.platform == 'craysim':
  12032. body = body.split("\n")
  12033. for i, line in enumerate(body):
  12034. if line.startswith("#PBS") and "select=" in line:
  12035. if 'Resource_List.vntype' in self.attributes:
  12036. self.unset_attributes(['Resource_List.vntype'])
  12037. line_arr = line.split(" ")
  12038. for j, element in enumerate(line_arr):
  12039. select = element.startswith("select=")
  12040. lselect = element.startswith("-lselect=")
  12041. if select or lselect:
  12042. if lselect:
  12043. sel_str = element[9:]
  12044. else:
  12045. sel_str = element[7:]
  12046. sel_str = self.add_cray_vntype(select=sel_str)
  12047. if lselect:
  12048. line_arr[j] = "-lselect=" + sel_str
  12049. else:
  12050. line_arr[j] = "select=" + sel_str
  12051. body[i] = " ".join(line_arr)
  12052. body = '\n'.join(body)
  12053. self.script_body = body
  12054. if self.du is None:
  12055. self.du = DshUtils()
  12056. # First create the temporary file as current user and only change
  12057. # its mode once the current user has written to it
  12058. fn = self.du.create_temp_file(hostname, prefix='PtlPbsJobScript',
  12059. asuser=asuser, body=body)
  12060. self.du.chmod(hostname, fn, mode=0755)
  12061. if not self.du.is_localhost(hostname):
  12062. self.du.run_copy(hostname, fn, fn)
  12063. self.script = fn
  12064. return fn
  12065. def create_subjob_id(self, job_array_id, subjob_index):
  12066. """
  12067. insert subjob index into the square brackets of job array id
  12068. :param job_array_id: PBS parent array job id
  12069. :type job_array_id: str
  12070. :param subjob_index: index of subjob
  12071. :type subjob_index: int
  12072. :returns: subjob id string
  12073. """
  12074. idx = job_array_id.find('[]')
  12075. return job_array_id[:idx + 1] + str(subjob_index) + \
  12076. job_array_id[idx + 1:]
  12077. class Reservation(ResourceResv):
  12078. """
  12079. PBS Reservation. Attributes and Resources
  12080. :param attrs: Reservation attributes
  12081. :type attrs: Dictionary
  12082. """
  12083. dflt_attributes = {}
  12084. def __init__(self, username=None, attrs={}):
  12085. self.server = {}
  12086. self.script = None
  12087. self.attributes = attrs
  12088. if username is None:
  12089. userinfo = pwd.getpwuid(os.getuid())
  12090. self.username = userinfo[0]
  12091. else:
  12092. self.username = str(username)
  12093. # These are not in dflt_attributes because of the conversion to CLI
  12094. # options is done strictly
  12095. if ATTR_resv_start not in attrs:
  12096. attrs[ATTR_resv_start] = str(int(time.time()) + 36 * 3600)
  12097. if ATTR_resv_end not in attrs:
  12098. if ATTR_resv_duration not in attrs:
  12099. attrs[ATTR_resv_end] = str(int(time.time()) + 72 * 3600)
  12100. PBSObject.__init__(self, None, attrs, self.dflt_attributes)
  12101. self.set_attributes()
  12102. def set_variable_list(self, user, workdir=None):
  12103. pass
  12104. class InteractiveJob(threading.Thread):
  12105. """
  12106. An Interactive Job thread
  12107. Interactive Jobs are submitted as a thread that sets the jobid
  12108. as soon as it is returned by ``qsub -I``, such that the caller
  12109. can get back to monitoring the state of PBS while the interactive
  12110. session goes on in the thread.
  12111. The commands to be run within an interactive session are
  12112. specified in the job's interactive_script attribute as a list of
  12113. tuples, where the first item in each tuple is the command to run,
  12114. and the subsequent items are the expected returned data.
  12115. Implementation details:
  12116. Support for interactive jobs is currently done through the
  12117. pexpect module which must be installed separately from PTL.
  12118. Interactive jobs are submitted through ``CLI`` only, there is no
  12119. API support for this operation yet.
  12120. The submission of an interactive job requires passing in job
  12121. attributes,the command to execute ``(i.e. path to qsub -I)``
  12122. and the hostname
  12123. when not impersonating:
  12124. pexpect spawns the ``qsub -I`` command and expects a prompt
  12125. back, for each tuple in the interactive_script, it sends the
  12126. command and expects to match the return value.
  12127. when impersonating:
  12128. pexpect spawns ``sudo -u <user> qsub -I``. The rest is as
  12129. described in non- impersonating mode.
  12130. """
  12131. logger = logging.getLogger(__name__)
  12132. pexpect_timeout = 15
  12133. pexpect_sleep_time = .1
  12134. du = DshUtils()
  12135. def __init__(self, job, cmd, host):
  12136. threading.Thread.__init__(self)
  12137. self.job = job
  12138. self.cmd = cmd
  12139. self.jobid = None
  12140. self.hostname = host
  12141. def run(self):
  12142. """
  12143. Run the interactive job
  12144. """
  12145. try:
  12146. import pexpect
  12147. except:
  12148. self.logger.error('pexpect module is required for '
  12149. 'interactive jobs')
  12150. return None
  12151. job = self.job
  12152. cmd = self.cmd
  12153. self.jobid = None
  12154. self.logger.info("submit interactive job as " + job.username +
  12155. ": " + " ".join(cmd))
  12156. if not hasattr(job, 'interactive_script'):
  12157. self.logger.debug('no interactive_script attribute on job')
  12158. return None
  12159. try:
  12160. # sleep to allow server to communicate with client
  12161. # this value is set empirically so tweaking may be
  12162. # needed
  12163. _st = self.pexpect_sleep_time
  12164. _to = self.pexpect_timeout
  12165. _sc = job.interactive_script
  12166. current_user = pwd.getpwuid(os.getuid())[0]
  12167. if current_user != job.username:
  12168. if hasattr(job, 'preserve_env') and job.preserve_env is True:
  12169. cmd = ['sudo', '-E', '-u', job.username] + cmd
  12170. else:
  12171. cmd = ['sudo', '-u', job.username] + cmd
  12172. self.logger.debug(cmd)
  12173. _p = pexpect.spawn(" ".join(cmd), timeout=_to)
  12174. self.job.interactive_handle = _p
  12175. time.sleep(_st)
  12176. expstr = "qsub: waiting for job "
  12177. expstr += "(?P<jobid>\d+.[0-9A-Za-z-.]+) to start"
  12178. _p.expect(expstr)
  12179. if _p.match:
  12180. self.jobid = _p.match.group('jobid')
  12181. else:
  12182. _p.close()
  12183. self.job.interactive_handle = None
  12184. return None
  12185. self.logger.debug(_p.after.decode())
  12186. for _l in _sc:
  12187. (cmd, out) = _l
  12188. self.logger.info('sending: ' + cmd)
  12189. _p.sendline(cmd)
  12190. self.logger.info('expecting: ' + out)
  12191. _p.expect(out)
  12192. self.logger.info('sending exit')
  12193. _p.sendline("exit")
  12194. self.logger.info('waiting for the subprocess to finish')
  12195. _p.wait()
  12196. _p.close()
  12197. self.job.interactive_handle = None
  12198. self.logger.debug(_p.exitstatus)
  12199. except Exception:
  12200. self.logger.error(traceback.print_exc())
  12201. return None
  12202. return self.jobid
  12203. class Queue(PBSObject):
  12204. """
  12205. PBS Queue container, holds attributes of the queue and
  12206. pointer to server
  12207. :param name: Queue name
  12208. :type name: str or None
  12209. :param attrs: Queue attributes
  12210. :type attrs: Dictionary
  12211. """
  12212. dflt_attributes = {}
  12213. def __init__(self, name=None, attrs={}, server=None):
  12214. self.logger = logging.getLogger(__name__)
  12215. PBSObject.__init__(self, name, attrs, self.dflt_attributes)
  12216. self.server = server
  12217. m = ['queue']
  12218. if server is not None:
  12219. m += ['@' + server.shortname]
  12220. if self.name is not None:
  12221. m += [' ', self.name]
  12222. m += [': ']
  12223. self.logprefix = "".join(m)
  12224. def revert_to_defaults(self):
  12225. """
  12226. reset queue attributes to defaults
  12227. """
  12228. ignore_attrs = ['id', ATTR_count, ATTR_rescassn]
  12229. ignore_attrs += [ATTR_qtype, ATTR_enable, ATTR_start, ATTR_total]
  12230. ignore_attrs += ['THE_END']
  12231. len_attrs = len(ignore_attrs)
  12232. unsetlist = []
  12233. setdict = {}
  12234. self.logger.info(
  12235. self.logprefix +
  12236. "reverting configuration to defaults")
  12237. if self.server is not None:
  12238. self.server.status(QUEUE, id=self.name, level=logging.DEBUG)
  12239. for k in self.attributes.keys():
  12240. for i in range(len_attrs):
  12241. if k.startswith(ignore_attrs[i]):
  12242. break
  12243. if (i == (len_attrs - 1)) and k not in self.dflt_attributes:
  12244. unsetlist.append(k)
  12245. if len(unsetlist) != 0 and self.server is not None:
  12246. try:
  12247. self.server.manager(MGR_CMD_UNSET, MGR_OBJ_QUEUE, unsetlist,
  12248. self.name)
  12249. except PbsManagerError, e:
  12250. self.logger.error(e.msg)
  12251. for k in self.dflt_attributes.keys():
  12252. if (k not in self.attributes or
  12253. self.attributes[k] != self.dflt_attributes[k]):
  12254. setdict[k] = self.dflt_attributes[k]
  12255. if len(setdict.keys()) != 0 and self.server is not None:
  12256. self.server.manager(MGR_CMD_SET, MGR_OBJ_QUEUE, setdict)
  12257. class PBSInitServices(object):
  12258. """
  12259. PBS initialization services
  12260. :param hostname: Machine hostname
  12261. :type hostname: str or None
  12262. :param conf: PBS configuaration file
  12263. :type conf: str or None
  12264. """
  12265. def __init__(self, hostname=None, conf=None):
  12266. self.logger = logging.getLogger(__name__)
  12267. self.hostname = hostname
  12268. if self.hostname is None:
  12269. self.hostname = socket.gethostname()
  12270. self.dflt_conf_file = os.environ.get('PBS_CONF_FILE', '/etc/pbs.conf')
  12271. self.conf_file = conf
  12272. self.du = DshUtils()
  12273. self.is_linux = sys.platform.startswith('linux')
  12274. def initd(self, hostname=None, op='status', conf_file=None,
  12275. init_script=None, daemon='all'):
  12276. """
  12277. Run the init script for a given operation
  12278. :param hostname: hostname on which to execute the init script
  12279. :type hostname: str or None
  12280. :param op: one of status, start, stop, restart
  12281. :type op: str
  12282. :param conf_file: optional path to a configuration file
  12283. :type conf_file: str or None
  12284. :param init_script: optional path to a PBS init script
  12285. :type init_script: str or None
  12286. :param daemon: name of daemon to operate on. one of server, mom,
  12287. sched, comm or all
  12288. :type daemon: str
  12289. """
  12290. if hostname is None:
  12291. hostname = self.hostname
  12292. if conf_file is None:
  12293. conf_file = self.conf_file
  12294. return self._unix_initd(hostname, op, conf_file, init_script, daemon)
  12295. def restart(self, hostname=None, init_script=None):
  12296. """
  12297. Run the init script for a restart operation
  12298. :param hostname: hostname on which to execute the init script
  12299. :type hostname: str or None
  12300. :param init_script: optional path to a PBS init script
  12301. :type init_script: str or None
  12302. """
  12303. return self.initd(hostname, op='restart', init_script=init_script)
  12304. def restart_server(self, hostname=None, init_script=None):
  12305. """
  12306. Run the init script for a restart server
  12307. :param hostname: hostname on which to restart server
  12308. :type hostname: str or None
  12309. :param init_script: optional path to a PBS init script
  12310. :type init_script: str or None
  12311. """
  12312. return self.initd(hostname, op='restart', init_script=init_script,
  12313. daemon='server')
  12314. def restart_mom(self, hostname=None, init_script=None):
  12315. """
  12316. Run the init script for a restart mom
  12317. :param hostname: hostname on which to restart mom
  12318. :type hostname: str or None
  12319. :param init_script: optional path to a PBS init script
  12320. :type init_script: str or None
  12321. """
  12322. return self.initd(hostname, op='restart', init_script=init_script,
  12323. daemon='mom')
  12324. def restart_sched(self, hostname=None, init_script=None):
  12325. """
  12326. Run the init script for a restart sched
  12327. :param hostname: hostname on which to restart sched
  12328. :type hostname: str or None
  12329. :param init_script: optional path to a PBS init script
  12330. :type init_script: str or None
  12331. """
  12332. return self.initd(hostname, op='restart', init_script=init_script,
  12333. daemon='sched')
  12334. def restart_comm(self, hostname=None, init_script=None):
  12335. """
  12336. Run the init script for a restart comm
  12337. :param hostname: hostname on which to restart comm
  12338. :type hostname: str or None
  12339. :param init_script: optional path to a PBS init script
  12340. :type init_script: str or None
  12341. """
  12342. return self.initd(hostname, op='restart', init_script=init_script,
  12343. daemon='comm')
  12344. def start(self, hostname=None, init_script=None):
  12345. """
  12346. Run the init script for a start operation
  12347. :param hostname: hostname on which to execute the init script
  12348. :type hostname: str or None
  12349. :param init_script: optional path to a PBS init script
  12350. :type init_script: str or None
  12351. """
  12352. return self.initd(hostname, op='start', init_script=init_script)
  12353. def start_server(self, hostname=None, init_script=None):
  12354. """
  12355. Run the init script for a start server
  12356. :param hostname: hostname on which to start server
  12357. :type hostname: str or None
  12358. :param init_script: optional path to a PBS init script
  12359. :type init_script: str or None
  12360. """
  12361. return self.initd(hostname, op='start', init_script=init_script,
  12362. daemon='server')
  12363. def start_mom(self, hostname=None, init_script=None):
  12364. """
  12365. Run the init script for a start mom
  12366. :param hostname: hostname on which to start mom
  12367. :type hostname: str or None
  12368. :param init_script: optional path to a PBS init script
  12369. :type init_script: str or None
  12370. """
  12371. return self.initd(hostname, op='start', init_script=init_script,
  12372. daemon='mom')
  12373. def start_sched(self, hostname=None, init_script=None):
  12374. """
  12375. Run the init script for a start sched
  12376. :param hostname: hostname on which to start sched
  12377. :type hostname: str or None
  12378. :param init_script: optional path to a PBS init script
  12379. :type init_script: str or None
  12380. """
  12381. return self.initd(hostname, op='start', init_script=init_script,
  12382. daemon='sched')
  12383. def start_comm(self, hostname=None, init_script=None):
  12384. """
  12385. Run the init script for a start comm
  12386. :param hostname: hostname on which to start comm
  12387. :type hostname: str or None
  12388. :param init_script: optional path to a PBS init script
  12389. :type init_script: str or None
  12390. """
  12391. return self.initd(hostname, op='start', init_script=init_script,
  12392. daemon='comm')
  12393. def stop(self, hostname=None, init_script=None):
  12394. """
  12395. Run the init script for a stop operation
  12396. :param hostname: hostname on which to execute the init script
  12397. :type hostname: str or None
  12398. :param init_script: optional path to a PBS init script
  12399. :type init_script: str or None
  12400. """
  12401. return self.initd(hostname, op='stop', init_script=init_script)
  12402. def stop_server(self, hostname=None, init_script=None):
  12403. """
  12404. Run the init script for a stop server
  12405. :param hostname: hostname on which to stop server
  12406. :type hostname: str or None
  12407. :param init_script: optional path to a PBS init script
  12408. :type init_script: str or None
  12409. """
  12410. return self.initd(hostname, op='stop', init_script=init_script,
  12411. daemon='server')
  12412. def stop_mom(self, hostname=None, init_script=None):
  12413. """
  12414. Run the init script for a stop mom
  12415. :param hostname: hostname on which to stop mom
  12416. :type hostname: str or None
  12417. :param init_script: optional path to a PBS init script
  12418. :type init_script: str or None
  12419. """
  12420. return self.initd(hostname, op='stop', init_script=init_script,
  12421. daemon='mom')
  12422. def stop_sched(self, hostname=None, init_script=None):
  12423. """
  12424. Run the init script for a stop sched
  12425. :param hostname: hostname on which to stop sched
  12426. :type hostname: str or None
  12427. :param init_script: optional path to a PBS init script
  12428. :type init_script: str or None
  12429. """
  12430. return self.initd(hostname, op='stop', init_script=init_script,
  12431. daemon='sched')
  12432. def stop_comm(self, hostname=None, init_script=None):
  12433. """
  12434. Run the init script for a stop comm
  12435. :param hostname: hostname on which to stop comm
  12436. :type hostname: str or None
  12437. :param init_script: optional path to a PBS init script
  12438. :type init_script: str or None
  12439. """
  12440. return self.initd(hostname, op='stop', init_script=init_script,
  12441. daemon='comm')
  12442. def status(self, hostname=None, init_script=None):
  12443. """
  12444. Run the init script for a status operation
  12445. :param hostname: hostname on which to execute the init script
  12446. :type hostname: str or None
  12447. :param init_script: optional path to a PBS init script
  12448. :type init_script: str or None
  12449. """
  12450. return self.initd(hostname, op='status', init_script=init_script)
  12451. def status_server(self, hostname=None, init_script=None):
  12452. """
  12453. Run the init script for a status server
  12454. :param hostname: hostname on which to status server
  12455. :type hostname: str or None
  12456. :param init_script: optional path to a PBS init script
  12457. :type init_script: str or None
  12458. """
  12459. return self.initd(hostname, op='status', init_script=init_script,
  12460. daemon='server')
  12461. def status_mom(self, hostname=None, init_script=None):
  12462. """
  12463. Run the init script for a status mom
  12464. :param hostname: hostname on which to status mom
  12465. :type hostname: str or None
  12466. :param init_script: optional path to a PBS init script
  12467. :type init_script: str or None
  12468. """
  12469. return self.initd(hostname, op='status', init_script=init_script,
  12470. daemon='mom')
  12471. def status_sched(self, hostname=None, init_script=None):
  12472. """
  12473. Run the init script for a status sched
  12474. :param hostname: hostname on which to status sched
  12475. :type hostname: str or None
  12476. :param init_script: optional path to a PBS init script
  12477. :type init_script: str or None
  12478. """
  12479. return self.initd(hostname, op='status', init_script=init_script,
  12480. daemon='sched')
  12481. def status_comm(self, hostname=None, init_script=None):
  12482. """
  12483. Run the init script for a status comm
  12484. :param hostname: hostname on which to status comm
  12485. :type hostname: str or None
  12486. :param init_script: optional path to a PBS init script
  12487. :type init_script: str or None
  12488. """
  12489. return self.initd(hostname, op='status', init_script=init_script,
  12490. daemon='comm')
  12491. def _unix_initd(self, hostname, op, conf_file, init_script, daemon):
  12492. """
  12493. Helper function for initd ``(*nix version)``
  12494. :param hostname: hostname on which init script should run
  12495. :type hostname: str
  12496. :param op: Operation on daemons - start, stop, restart or status
  12497. :op type: str
  12498. :param conf_file: Optional path to the pbs configuration file
  12499. :type conf_file: str or None
  12500. :param init_script: optional path to a PBS init script
  12501. :type init_script: str or None
  12502. :param daemon: name of daemon to operate on. one of server, mom,
  12503. sched, comm or all
  12504. :type daemon: str
  12505. """
  12506. init_cmd = ['sudo']
  12507. if daemon is not None and daemon != 'all':
  12508. conf = self.du.parse_pbs_config(hostname, conf_file)
  12509. dconf = {
  12510. 'PBS_START_SERVER': 0,
  12511. 'PBS_START_MOM': 0,
  12512. 'PBS_START_SCHED': 0,
  12513. 'PBS_START_COMM': 0
  12514. }
  12515. if daemon == 'server' and conf.get('PBS_START_SERVER', 0) != 0:
  12516. dconf['PBS_START_SERVER'] = 1
  12517. elif daemon == 'mom' and conf.get('PBS_START_MOM', 0) != 0:
  12518. dconf['PBS_START_MOM'] = 1
  12519. elif daemon == 'sched' and conf.get('PBS_START_SCHED', 0) != 0:
  12520. dconf['PBS_START_SCHED'] = 1
  12521. elif daemon == 'comm' and conf.get('PBS_START_COMM', 0) != 0:
  12522. dconf['PBS_START_COMM'] = 1
  12523. for k, v in dconf.items():
  12524. init_cmd += ["%s=%s" % (k, str(v))]
  12525. _as = True
  12526. else:
  12527. fn = None
  12528. if (conf_file is not None) and (conf_file != self.dflt_conf_file):
  12529. init_cmd += ['PBS_CONF_FILE=' + conf_file]
  12530. _as = True
  12531. else:
  12532. _as = False
  12533. conf = self.du.parse_pbs_config(hostname, conf_file)
  12534. if (init_script is None) or (not init_script.startswith('/')):
  12535. if 'PBS_EXEC' not in conf:
  12536. msg = 'Missing PBS_EXEC setting in pbs config'
  12537. raise PbsInitServicesError(rc=1, rv=False, msg=msg)
  12538. if init_script is None:
  12539. init_script = os.path.join(conf['PBS_EXEC'], 'libexec',
  12540. 'pbs_init.d')
  12541. else:
  12542. init_script = os.path.join(conf['PBS_EXEC'], 'etc',
  12543. init_script)
  12544. if not self.du.isfile(hostname, path=init_script, sudo=True):
  12545. # Could be Type 3 installation where we will not have
  12546. # PBS_EXEC/libexec/pbs_init.d
  12547. return []
  12548. init_cmd += [init_script, op]
  12549. msg = 'running init script to ' + op + ' pbs'
  12550. if daemon is not None and daemon != 'all':
  12551. msg += ' ' + daemon
  12552. msg += ' on ' + hostname
  12553. if conf_file is not None:
  12554. msg += ' using ' + conf_file
  12555. msg += ' init_cmd=%s' % (str(init_cmd))
  12556. self.logger.info(msg)
  12557. ret = self.du.run_cmd(hostname, init_cmd, as_script=_as,
  12558. logerr=False)
  12559. if ret['rc'] != 0:
  12560. raise PbsInitServicesError(rc=ret['rc'], rv=False,
  12561. msg='\n'.join(ret['err']))
  12562. else:
  12563. return ret
  12564. def switch_version(self, hostname=None, version=None):
  12565. """
  12566. Switch to another version of PBS installed on the system
  12567. :param hostname: The hostname to operate on
  12568. :type hostname: str or None
  12569. :param version: version to switch
  12570. """
  12571. pbs_conf = self.du.parse_pbs_config(hostname)
  12572. if 'PBS_EXEC' in pbs_conf:
  12573. dn = os.path.dirname(pbs_conf['PBS_EXEC'])
  12574. newver = os.path.join(dn, version)
  12575. ret = self.du.isdir(hostname, path=newver)
  12576. if not ret:
  12577. msg = 'no version ' + version + ' on host ' + hostname
  12578. raise PbsInitServicesError(rc=0, rv=False, msg=msg)
  12579. self.stop(hostname)
  12580. dflt = os.path.join(dn, 'default')
  12581. ret = self.du.isfile(hostname, path=dflt)
  12582. if ret:
  12583. self.logger.info('removing symbolic link ' + dflt)
  12584. self.du.rm(hostname, dflt, sudo=True, logerr=False)
  12585. self.du.set_pbs_config(hostname, confs={'PBS_EXEC': dflt})
  12586. else:
  12587. self.du.set_pbs_config(hostname, confs={'PBS_EXEC': newver})
  12588. self.logger.info('linking ' + newver + ' to ' + dflt)
  12589. self.du.run_cmd(hostname, ['ln', '-s', newver, dflt],
  12590. sudo=True, logerr=False)
  12591. self.start(hostname)