pbs_procutils.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. # coding: utf-8
  2. # Copyright (C) 1994-2018 Altair Engineering, Inc.
  3. # For more information, contact Altair at www.altair.com.
  4. #
  5. # This file is part of the PBS Professional ("PBS Pro") software.
  6. #
  7. # Open Source License Information:
  8. #
  9. # PBS Pro is free software. You can redistribute it and/or modify it under the
  10. # terms of the GNU Affero General Public License as published by the Free
  11. # Software Foundation, either version 3 of the License, or (at your option) any
  12. # later version.
  13. #
  14. # PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
  15. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  16. # FOR A PARTICULAR PURPOSE.
  17. # See the GNU Affero General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU Affero General Public License
  20. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. #
  22. # Commercial License Information:
  23. #
  24. # For a copy of the commercial license terms and conditions,
  25. # go to: (http://www.pbspro.com/UserArea/agreement.html)
  26. # or contact the Altair Legal Department.
  27. #
  28. # Altair’s dual-license business model allows companies, individuals, and
  29. # organizations to create proprietary derivative works of PBS Pro and
  30. # distribute them - whether embedded or bundled with other software -
  31. # under a commercial license agreement.
  32. #
  33. # Use of Altair’s trademarks, including but not limited to "PBS™",
  34. # "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
  35. # trademark licensing policies.
  36. import sys
  37. import time
  38. import re
  39. import threading
  40. import logging
  41. import socket
  42. from ptl.utils.pbs_dshutils import DshUtils
  43. class ProcUtils(object):
  44. """
  45. Utilities to query process information
  46. """
  47. logger = logging.getLogger(__name__)
  48. du = DshUtils()
  49. platform = sys.platform
  50. def __init__(self):
  51. self.processes = {}
  52. self.__h2ps = {}
  53. def get_ps_cmd(self, hostname=None):
  54. """
  55. Get the ps command
  56. :param hostname: hostname of the machine
  57. :type hostname: str or None
  58. """
  59. if hostname is None:
  60. hostname = socket.gethostname()
  61. if hostname in self.__h2ps:
  62. return self.__h2ps[hostname]
  63. if not self.du.is_localhost(hostname):
  64. platform = self.du.get_platform(hostname)
  65. else:
  66. platform = self.platform
  67. # set some platform-specific arguments to ps
  68. ps_arg = '-C'
  69. ps_cmd = ['ps', '-o', 'pid,rss,vsz,pcpu,command']
  70. self.__h2ps[hostname] = (ps_cmd, ps_arg)
  71. return (ps_cmd, ps_arg)
  72. def _init_processes(self):
  73. self.processes = {}
  74. def _get_proc_info_unix(self, hostname=None, name=None,
  75. pid=None, regexp=False):
  76. """
  77. Helper function to ``get_proc_info`` for Unix only system
  78. """
  79. (ps_cmd, ps_arg) = self.get_ps_cmd(hostname)
  80. if name is not None:
  81. if not regexp:
  82. cr = self.du.run_cmd(hostname, (ps_cmd + [ps_arg, name]),
  83. level=logging.DEBUG2)
  84. else:
  85. cr = self.du.run_cmd(hostname, ps_cmd + ['-e'],
  86. level=logging.DEBUG2)
  87. elif pid is not None:
  88. cr = self.du.run_cmd(hostname, ps_cmd + ['-p', pid],
  89. level=logging.DEBUG2)
  90. else:
  91. return
  92. if cr['rc'] == 0 and cr['out']:
  93. for proc in cr['out']:
  94. _pi = None
  95. try:
  96. _s = proc.split()
  97. p = _s[0]
  98. rss = _s[1]
  99. vsz = _s[2]
  100. pcpu = _s[3]
  101. command = " ".join(_s[4:])
  102. except:
  103. continue
  104. if ((pid is not None and p == str(pid)) or
  105. (name is not None and (
  106. (regexp and re.search(name, command) is not None) or
  107. (not regexp and name in command)))):
  108. _pi = ProcInfo(name=command)
  109. _pi.pid = p
  110. _pi.rss = rss
  111. _pi.vsz = vsz
  112. _pi.pcpu = pcpu
  113. _pi.command = command
  114. if _pi is not None:
  115. if command in self.processes:
  116. self.processes[command].append(_pi)
  117. else:
  118. self.processes[command] = [_pi]
  119. return self.processes
  120. def get_proc_info(self, hostname=None, name=None, pid=None, regexp=False):
  121. """
  122. Return process information from a process name, or pid,
  123. on a given host
  124. :param hostname: The hostname on which to query the process
  125. info. On Windows,only localhost is queried.
  126. :type hostname: str or none
  127. :param name: The name of the process to query.
  128. :type name: str or None
  129. :param pid: The pid of the process to query
  130. :type pid: int or None
  131. :param regexp: Match processes by regular expression. Defaults
  132. to True. Does not apply to matching by PID.
  133. :type regexp: bool
  134. :returns: A list of ProcInfo objects, one for each matching
  135. process.
  136. .. note:: If both, name and pid, are specified, name is used.
  137. """
  138. self._init_processes()
  139. return self._get_proc_info_unix(hostname, name, pid, regexp)
  140. def get_proc_state(self, hostname=None, pid=None):
  141. """
  142. :returns: PID's process state on host hostname
  143. On error the empty string is returned.
  144. """
  145. if not self.du.is_localhost(hostname):
  146. platform = self.du.get_platform(hostname)
  147. else:
  148. platform = sys.platform
  149. try:
  150. if platform.startswith('linux'):
  151. cmd = ['ps', '-o', 'stat', '-p', str(pid), '--no-heading']
  152. rv = self.du.run_cmd(hostname, cmd, level=logging.DEBUG2)
  153. return rv['out'][0][0]
  154. except:
  155. self.logger.error('Error getting process state for pid ' + pid)
  156. return ''
  157. def get_proc_children(self, hostname=None, ppid=None):
  158. """
  159. :returns: A list of children PIDs associated to ``PPID`` on
  160. host hostname.
  161. On error, an empty list is returned.
  162. """
  163. try:
  164. if not isinstance(ppid, str):
  165. ppid = str(ppid)
  166. if int(ppid) <= 0:
  167. raise
  168. if not self.du.is_localhost(hostname):
  169. platform = self.du.get_platform(hostname)
  170. else:
  171. platform = sys.platform
  172. childlist = []
  173. if platform.startswith('linux'):
  174. cmd = ['ps', '-o', 'pid', '--ppid:%s' % ppid, '--no-heading']
  175. rv = self.du.run_cmd(hostname, cmd)
  176. children = rv['out'][:-1]
  177. else:
  178. children = []
  179. for child in children:
  180. child = child.strip()
  181. if child != '':
  182. childlist.append(child)
  183. childlist.extend(self.get_proc_children(hostname, child))
  184. return childlist
  185. except:
  186. self.logger.error('Error getting children processes of parent ' +
  187. ppid)
  188. return []
  189. class ProcInfo(object):
  190. """
  191. Process information reports ``PID``, ``RSS``, ``VSZ``, Command
  192. and Time at which process information is collected
  193. """
  194. def __init__(self, name=None, pid=None):
  195. self.name = name
  196. self.pid = pid
  197. self.rss = None
  198. self.vsz = None
  199. self.pcpu = None
  200. self.time = time.time()
  201. self.command = None
  202. def __str__(self):
  203. return "%s pid: %s rss: %s vsz: %s pcpu: %s command: %s" % \
  204. (self.name, str(self.pid), str(self.rss), str(self.vsz),
  205. str(self.pcpu), self.command)
  206. class ProcMonitor(threading.Thread):
  207. """
  208. A background process monitoring tool
  209. """
  210. def __init__(self, name=None, regexp=False, frequency=60):
  211. threading.Thread.__init__(self)
  212. self.name = name
  213. self.frequency = frequency
  214. self.regexp = regexp
  215. self._pu = ProcUtils()
  216. self._go = True
  217. self.db_proc_info = []
  218. def set_frequency(self, value=60):
  219. """
  220. Set the frequency
  221. :param value: Frequency value
  222. :type value: int
  223. """
  224. self.logger.debug('procmonitor: set frequency to ' + str(value))
  225. self.frequency = value
  226. def run(self):
  227. """
  228. Run the process monitoring
  229. """
  230. while self._go:
  231. self._pu.get_proc_info(name=self.name, regexp=self.regexp)
  232. for _p in self._pu.processes.values():
  233. for _per_proc in _p:
  234. _to_db = {}
  235. _to_db['time'] = time.ctime(int(_per_proc.time))
  236. _to_db['rss'] = _per_proc.rss
  237. _to_db['vsz'] = _per_proc.vsz
  238. _to_db['pcpu'] = _per_proc.pcpu
  239. _to_db['name'] = _per_proc.name
  240. self.db_proc_info.append(_to_db)
  241. time.sleep(self.frequency)
  242. def stop(self):
  243. """
  244. Stop the process monitoring
  245. """
  246. self._go = False
  247. self._Thread__stop()
  248. if __name__ == '__main__':
  249. pm = ProcMonitor(name='.*pbs_server.*|.*pbs_sched.*', regexp=True,
  250. frequency=1)
  251. pm.start()
  252. time.sleep(4)
  253. pm.stop()