pbs_cgroups_hook.py 107 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684
  1. # coding: utf-8
  2. # Copyright (C) 1994-2018 Altair Engineering, Inc.
  3. # For more information, contact Altair at www.altair.com.
  4. #
  5. # This file is part of the PBS Professional ("PBS Pro") software.
  6. #
  7. # Open Source License Information:
  8. #
  9. # PBS Pro is free software. You can redistribute it and/or modify it under the
  10. # terms of the GNU Affero General Public License as published by the Free
  11. # Software Foundation, either version 3 of the License, or (at your option) any
  12. # later version.
  13. #
  14. # PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
  15. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  16. # FOR A PARTICULAR PURPOSE.
  17. # See the GNU Affero General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU Affero General Public License
  20. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. #
  22. # Commercial License Information:
  23. #
  24. # For a copy of the commercial license terms and conditions,
  25. # go to: (http://www.pbspro.com/UserArea/agreement.html)
  26. # or contact the Altair Legal Department.
  27. #
  28. # Altair’s dual-license business model allows companies, individuals, and
  29. # organizations to create proprietary derivative works of PBS Pro and
  30. # distribute them - whether embedded or bundled with other software -
  31. # under a commercial license agreement.
  32. #
  33. # Use of Altair’s trademarks, including but not limited to "PBS™",
  34. # "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
  35. # trademark licensing policies.
  36. from tests.functional import *
  37. import glob
  38. def have_swap():
  39. """
  40. Returns 1 if swap space is not 0 otherwise returns 0
  41. """
  42. tt = 0
  43. with open(os.path.join(os.sep, 'proc', 'meminfo'), 'r') as fd:
  44. for line in fd:
  45. entry = line.split()
  46. if ((entry[0] == 'SwapFree:') and (entry[1] != '0')):
  47. tt = 1
  48. return tt
  49. def is_memsw_enabled(mem_path):
  50. """
  51. Check if system has swapcontrol enabled, then return true
  52. else return false
  53. """
  54. # List all files and check if memsw files exists
  55. for files in os.listdir(mem_path):
  56. if 'memory.memsw' in files:
  57. return 'true'
  58. return 'false'
  59. def systemd_escape(buf):
  60. """
  61. Escape strings for usage in system unit names
  62. Some distros don't provide the systemd-escape command
  63. """
  64. if not isinstance(buf, basestring):
  65. raise ValueError('Not a basetype string')
  66. ret = ''
  67. for i, char in enumerate(buf):
  68. if i < 1 and char == '.':
  69. ret += '\\x' + char.encode('hex')
  70. continue
  71. if char.isalnum() or char in '_.':
  72. ret += char
  73. elif char == '/':
  74. ret += '-'
  75. else:
  76. hexval = char.encode('hex')
  77. for j in range(0, len(hexval), 2):
  78. ret += '\\x' + hexval[j:j + 2]
  79. return ret
  80. @tags('mom', 'multi_node')
  81. class TestCgroupsHook(TestFunctional):
  82. """
  83. This test suite targets Linux Cgroups hook functionality.
  84. """
  85. def setUp(self):
  86. TestFunctional.setUp(self)
  87. # Some of the tests requires 2 nodes.
  88. # Hence setting the values to default when no mom is specified
  89. self.vntypenameA = 'no_cgroups'
  90. self.vntypenameB = self.vntypenameA
  91. self.iscray = False
  92. self.noprefix = False
  93. self.tempfile = []
  94. if self.moms:
  95. if len(self.moms) == 1:
  96. self.momA = self.moms.values()[0]
  97. self.momB = self.momA
  98. if self.momA.is_cray():
  99. self.iscray = True
  100. self.hostA = self.momA.shortname
  101. self.hostB = self.hostA
  102. if self.iscray:
  103. self.nodeA = self.get_hostname(self.momA.shortname)
  104. self.nodeB = self.hostA
  105. else:
  106. self.nodeA = self.momA.shortname
  107. self.nodeB = self.hostA
  108. self.vntypenameA = self.get_vntype(self.hostA)
  109. self.vntypenameB = self.vntypenameA
  110. self.momA.delete_vnode_defs()
  111. self.server.manager(MGR_CMD_DELETE, NODE, None, "")
  112. self.server.manager(MGR_CMD_CREATE, NODE, id=self.hostA)
  113. elif len(self.moms) == 2:
  114. self.momA = self.moms.values()[0]
  115. self.momB = self.moms.values()[1]
  116. if self.momA.is_cray() or self.momB.is_cray():
  117. self.iscray = True
  118. self.hostA = self.momA.shortname
  119. self.hostB = self.momB.shortname
  120. if self.iscray:
  121. self.nodeA = self.get_hostname(self.momA.shortname)
  122. self.nodeB = self.get_hostname(self.momB.shortname)
  123. else:
  124. self.nodeA = self.momA.shortname
  125. self.nodeB = self.momB.shortname
  126. self.vntypenameA = self.get_vntype(self.hostA)
  127. self.vntypenameB = self.get_vntype(self.hostB)
  128. if self.momA.is_cray() or self.momB.is_cray():
  129. self.iscray = True
  130. self.momA.delete_vnode_defs()
  131. self.momB.delete_vnode_defs()
  132. self.server.manager(MGR_CMD_DELETE, NODE, None, "")
  133. self.server.manager(MGR_CMD_CREATE, NODE, id=self.nodeA)
  134. self.server.manager(MGR_CMD_CREATE, NODE, id=self.nodeB)
  135. else:
  136. self.skipTest('Tests require one or two MoMs, '
  137. 'use -p moms=<mom1>:<mom2>')
  138. self.serverA = self.servers.values()[0].name
  139. self.paths = self.get_paths()
  140. if not (self.paths['cpuset'] and self.paths['memory']):
  141. self.skipTest('cpuset or memory cgroup subsystem not mounted')
  142. self.swapctl = is_memsw_enabled(self.paths['memsw'])
  143. self.server.set_op_mode(PTL_CLI)
  144. self.server.cleanup_jobs(extend='force')
  145. if not self.iscray:
  146. self.remove_vntype()
  147. self.eatmem_script = """
  148. import sys
  149. import time
  150. MB = 2 ** 20
  151. iterations = 1
  152. chunkSizeMb = 1
  153. sleeptime = 0
  154. if (len(sys.argv) > 1):
  155. iterations = int(sys.argv[1])
  156. if (len(sys.argv) > 2):
  157. chunkSizeMb = int(sys.argv[2])
  158. if (len(sys.argv) > 3):
  159. sleeptime = int(sys.argv[3])
  160. if (iterations < 1):
  161. print('Iteration count must be greater than zero.')
  162. exit(1)
  163. if (chunkSizeMb < 1):
  164. print('Chunk size must be greater than zero.')
  165. exit(1)
  166. totalSizeMb = chunkSizeMb * iterations
  167. print('Allocating %d chunk(s) of size %dMB. (%dMB total)' %
  168. (iterations, chunkSizeMb, totalSizeMb))
  169. buf = ''
  170. for i in range(iterations):
  171. print('allocating %dMB' % ((i + 1) * chunkSizeMb))
  172. buf += ('#' * MB * chunkSizeMb)
  173. if sleeptime > 0:
  174. time.sleep(sleeptime)
  175. """
  176. self.eatmem_job1 = \
  177. '#PBS -joe\n' \
  178. '#PBS -S /bin/bash\n' \
  179. 'sleep 4\n' \
  180. 'python - 80 10 10 <<EOF\n' \
  181. '%s\nEOF\n' % self.eatmem_script
  182. self.eatmem_job2 = \
  183. '#PBS -joe\n' \
  184. '#PBS -S /bin/bash\n' \
  185. 'let i=0; while [ $i -lt 400000 ]; do let i+=1 ; done\n' \
  186. 'python - 200 2 10 <<EOF\n' \
  187. '%s EOF\n' \
  188. 'let i=0; while [ $i -lt 400000 ]; do let i+=1 ; done\n' \
  189. 'python - 100 4 10 <<EOF\n' \
  190. '%sEOF\n' \
  191. 'let i=0; while [ $i -lt 400000 ]; do let i+=1 ; done\n' \
  192. 'sleep 25\n' % (self.eatmem_script, self.eatmem_script)
  193. self.eatmem_job3 = \
  194. '#PBS -joe\n' \
  195. '#PBS -S /bin/bash\n' \
  196. 'sleep 2\n' \
  197. 'let i=0; while [ $i -lt 500000 ]; do let i+=1 ; done\n' \
  198. 'python - 90 5 30 <<EOF\n' \
  199. '%s\nEOF\n' % self.eatmem_script
  200. self.cpuset_mem_script = """#!/bin/bash
  201. #PBS -joe
  202. echo $PBS_JOBID
  203. cpuset_base=`grep cgroup /proc/mounts | grep cpuset | cut -d' ' -f2 | \
  204. tr " " "\n" | sed -n '1p'`
  205. if [ -z "$cpuset_base" ]; then
  206. echo "Cpuset subsystem not mounted."
  207. exit 1
  208. fi
  209. memory_base=`grep cgroup /proc/mounts | grep memory | cut -d' ' -f2 | \
  210. tr " " "\n" | sed -n '1p'`
  211. if [ -z "$memory_base" ]; then
  212. echo "Memory subsystem not mounted."
  213. exit 1
  214. fi
  215. echo "cpuset_base is $cpuset_base"
  216. if [ -d "$cpuset_base/pbspro" ]; then
  217. base="$cpuset_base/pbspro/$PBS_JOBID"
  218. else
  219. jobnum=${PBS_JOBID%%.*}
  220. base="$cpuset_base/pbspro.slice/pbspro-${jobnum}.*.slice"
  221. fi
  222. echo "cgroups base path for cpuset is $base"
  223. if [ -d $base ]; then
  224. cpupath1=$base/cpuset.cpus
  225. cpupath2=$base/cpus
  226. if [ -f $cpupath1 ]; then
  227. cpus=`cat $cpupath1`
  228. elif [ -f $cpupath2 ]; then
  229. cpus=`cat $cpupath2`
  230. fi
  231. echo "CpuIDs=${cpus}"
  232. mempath1="$base/cpuset.mems"
  233. mempath2="$base/mems"
  234. if [ -f $mempath1 ]; then
  235. mems=`cat $mempath1`
  236. elif [ -f $mempath2 ]; then
  237. mems=`cat $mempath2`
  238. fi
  239. echo "MemorySocket=${mems}"
  240. else
  241. echo "Cpuset subsystem job directory not created."
  242. fi
  243. if [ -d "$memory_base/pbspro" ]; then
  244. base="$memory_base/pbspro/$PBS_JOBID"
  245. else
  246. jobnum=${PBS_JOBID%%.*}
  247. base="$memory_base/pbspro.slice/pbspro-${jobnum}.*.slice"
  248. fi
  249. echo "cgroups base path for memory is $base"
  250. if [ -d $base ]; then
  251. mem_limit=`cat $base/memory.limit_in_bytes`
  252. echo "MemoryLimit=${mem_limit}"
  253. memsw_limit=`cat $base/memory.memsw.limit_in_bytes`
  254. echo "MemswLimit=${memsw_limit}"
  255. else
  256. echo "Memory subsystem job directory not created."
  257. fi
  258. sleep 10
  259. """
  260. self.check_dirs_script = """#!/bin/bash
  261. #PBS -joe
  262. check_file_diff() {
  263. for filename in $1/*.*; do
  264. filename=$(basename $filename)
  265. [ $filename = memory.kmem.slabinfo ] && continue
  266. [ ! -r $1/$filename ] && continue
  267. [ ! -r $2/$filename ] && continue
  268. if ! diff $1/$filename $2/$filename >/dev/null ; then
  269. echo "Disabled cgroup subsystems are populated with the job id"
  270. fi
  271. done
  272. }
  273. jobnum=${PBS_JOBID%%.*}
  274. cpuset_base=`grep cgroup /proc/mounts | grep cpuset | cut -d' ' -f2`
  275. if [ -d "$cpuset_base/propbs" ]; then
  276. cpuset_job="$cpuset_base/propbs/$PBS_JOBID"
  277. else
  278. cpuset_job="$cpuset_base/propbs.slice/propbs-${jobnum}.*.slice"
  279. fi
  280. cpuacct_base=`grep cgroup /proc/mounts | grep cpuacct | cut -d' ' -f2`
  281. if [ -d "$cpuacct_base/propbs" ]; then
  282. cpuacct_job="$cpuacct_base/propbs/$PBS_JOBID"
  283. else
  284. cpuacct_job="$cpuacct_base/propbs.slice/propbs-${jobnum}.*.slice"
  285. fi
  286. memory_base=`grep cgroup /proc/mounts | grep memory | cut -d' ' -f2`
  287. if [ -d "$memory_base/propbs" ]; then
  288. memory_job="$memory_base/propbs/$PBS_JOBID"
  289. else
  290. memory_job="$memory_base/propbs.slice/propbs-${jobnum}.*.slice"
  291. fi
  292. devices_base=`grep cgroup /proc/mounts | grep devices | cut -d' ' -f2`
  293. if [ -d "$devices_base/propbs" ]; then
  294. devices_job="$devices_base/propbs/$PBS_JOBID"
  295. else
  296. devices_job="$devices_base/propbs.slice/propbs-${jobnum}.*.slice"
  297. fi
  298. echo ====
  299. ls -l $devices_base
  300. echo ====
  301. ls -l $devices_job
  302. echo ====
  303. if [ -d $devices_job ]; then
  304. device_list=`cat $devices_job/devices.list`
  305. echo "${device_list}"
  306. sysd=`systemctl --version | grep systemd | awk '{print $2}'`
  307. if [ "$sysd" -ge 205 ]; then
  308. if [ -d $cpuacct_job ]; then
  309. check_file_diff $cpuacct_base/propbs.slice/ $cpuacct_job
  310. fi
  311. if [ -d $cpuset_job ]; then
  312. check_file_diff $cpuset_base/propbs.slice/ $cpuset_job
  313. fi
  314. if [ -d $memory_job ] ; then
  315. check_file_diff $memory_base/propbs.slice/ $memory_job
  316. fi
  317. else
  318. if [ -d $cpuacct_job -o -d $cpuset_job -o -d $memory_job ]; then
  319. echo "Disabled cgroup subsystems are populated with the job id"
  320. fi
  321. fi
  322. else
  323. echo "Devices directory should be populated"
  324. fi
  325. sleep 10
  326. """
  327. self.check_gpu_script = """#!/bin/bash
  328. #PBS -joe
  329. jobnum=${PBS_JOBID%%.*}
  330. devices_base=`grep cgroup /proc/mounts | grep devices | cut -d' ' -f2`
  331. if [ -d "$devices_base/propbs" ]; then
  332. devices_job="$devices_base/propbs/$PBS_JOBID"
  333. else
  334. devices_job="$devices_base/propbs.slice/propbs-${jobnum}.*.slice"
  335. fi
  336. device_list=`cat $devices_job/devices.list`
  337. grep "195" $devices_job/devices.list
  338. echo "There are `nvidia-smi -q -x | grep "GPU" | wc -l` GPUs"
  339. sleep 10
  340. """
  341. self.sleep15_job = """#!/bin/bash
  342. #PBS -joe
  343. sleep 15
  344. """
  345. self.sleep5_job = """#!/bin/bash
  346. #PBS -joe
  347. sleep 5
  348. """
  349. self.eat_cpu_script = """#!/bin/bash
  350. #PBS -joe
  351. for i in 1 2 3 4; do while : ; do : ; done & done
  352. """
  353. self.job_scr2 = """#!/bin/bash
  354. #PBS -l select=host=%s:ncpus=1+ncpus=4:mem=2gb
  355. #PBS -l place=vscatter
  356. #PBS -W umask=022
  357. #PBS -koe
  358. echo "$PBS_NODEFILE"
  359. cat $PBS_NODEFILE
  360. sleep 300
  361. """ % self.hostB
  362. self.job_scr3 = """#!/bin/bash
  363. #PBS -l select=2:ncpus=4:mem=2gb
  364. #PBS -l place=pack
  365. #PBS -W umask=022
  366. #PBS -W tolerate_node_failures=job_start
  367. #PBS -koe
  368. echo "$PBS_NODEFILE"
  369. cat $PBS_NODEFILE
  370. sleep 300
  371. """
  372. self.cfg0 = """{
  373. "cgroup_prefix" : "pbspro",
  374. "exclude_hosts" : [],
  375. "exclude_vntypes" : [],
  376. "run_only_on_hosts" : [],
  377. "periodic_resc_update" : false,
  378. "vnode_per_numa_node" : false,
  379. "online_offlined_nodes" : false,
  380. "use_hyperthreads" : false,
  381. "cgroup" : {
  382. "cpuacct" : {
  383. "enabled" : false
  384. },
  385. "cpuset" : {
  386. "enabled" : false
  387. },
  388. "devices" : {
  389. "enabled" : false
  390. },
  391. "hugetlb" : {
  392. "enabled" : false
  393. },
  394. "memory" : {
  395. "enabled" : false
  396. },
  397. "memsw" : {
  398. "enabled" : false
  399. }
  400. }
  401. }
  402. """
  403. self.cfg1 = """{
  404. "cgroup_prefix" : "pbspro",
  405. "exclude_hosts" : [%s],
  406. "exclude_vntypes" : [%s],
  407. "run_only_on_hosts" : [%s],
  408. "periodic_resc_update" : true,
  409. "vnode_per_numa_node" : false,
  410. "online_offlined_nodes" : true,
  411. "use_hyperthreads" : false,
  412. "cgroup":
  413. {
  414. "cpuacct":
  415. {
  416. "enabled" : true,
  417. "exclude_hosts" : [],
  418. "exclude_vntypes" : []
  419. },
  420. "cpuset":
  421. {
  422. "enabled" : true,
  423. "exclude_hosts" : [%s],
  424. "exclude_vntypes" : []
  425. },
  426. "devices":
  427. {
  428. "enabled" : false
  429. },
  430. "hugetlb":
  431. {
  432. "enabled" : false
  433. },
  434. "memory":
  435. {
  436. "enabled" : true,
  437. "exclude_hosts" : [],
  438. "exclude_vntypes" : [],
  439. "soft_limit" : false,
  440. "default" : "96MB",
  441. "reserve_percent" : "0",
  442. "reserve_amount" : "0MB"
  443. },
  444. "memsw":
  445. {
  446. "enabled" : %s,
  447. "exclude_hosts" : [],
  448. "exclude_vntypes" : [],
  449. "default" : "96MB",
  450. "reserve_percent" : "0",
  451. "reserve_amount" : "128MB"
  452. }
  453. }
  454. }
  455. """
  456. self.cfg2 = """{
  457. "cgroup_prefix" : "propbs",
  458. "exclude_hosts" : [],
  459. "exclude_vntypes" : [],
  460. "run_only_on_hosts" : [],
  461. "periodic_resc_update" : false,
  462. "vnode_per_numa_node" : false,
  463. "online_offlined_nodes" : false,
  464. "use_hyperthreads" : false,
  465. "cgroup":
  466. {
  467. "cpuacct":
  468. {
  469. "enabled" : false
  470. },
  471. "cpuset":
  472. {
  473. "enabled" : false
  474. },
  475. "devices":
  476. {
  477. "enabled" : true,
  478. "exclude_hosts" : [],
  479. "exclude_vntypes" : [],
  480. "allow" : [
  481. "b *:* rwm",
  482. ["console","rwm"],
  483. ["tty0","rwm", "*"],
  484. "c 1:* rwm",
  485. "c 10:* rwm"
  486. ]
  487. },
  488. "hugetlb":
  489. {
  490. "enabled" : false
  491. },
  492. "memory":
  493. {
  494. "enabled" : false
  495. },
  496. "memsw":
  497. {
  498. "enabled" : false
  499. }
  500. }
  501. }
  502. """
  503. self.cfg3 = """{
  504. "cgroup_prefix" : "pbspro",
  505. "exclude_hosts" : [],
  506. "exclude_vntypes" : [%s],
  507. "run_only_on_hosts" : [],
  508. "periodic_resc_update" : true,
  509. "vnode_per_numa_node" : false,
  510. "online_offlined_nodes" : true,
  511. "use_hyperthreads" : true,
  512. "cgroup":
  513. {
  514. "cpuacct":
  515. {
  516. "enabled" : true,
  517. "exclude_hosts" : [],
  518. "exclude_vntypes" : []
  519. },
  520. "cpuset":
  521. {
  522. "enabled" : true,
  523. "exclude_hosts" : [],
  524. "exclude_vntypes" : [%s]
  525. },
  526. "devices":
  527. {
  528. "enabled" : false
  529. },
  530. "hugetlb":
  531. {
  532. "enabled" : false
  533. },
  534. "memory":
  535. {
  536. "enabled" : true,
  537. "default" : "96MB",
  538. "reserve_amount" : "50MB",
  539. "exclude_hosts" : [],
  540. "exclude_vntypes" : [%s]
  541. },
  542. "memsw":
  543. {
  544. "enabled" : %s,
  545. "default" : "96MB",
  546. "reserve_amount" : "45MB",
  547. "exclude_hosts" : [],
  548. "exclude_vntypes" : [%s]
  549. }
  550. }
  551. }
  552. """
  553. self.cfg4 = """{
  554. "cgroup_prefix" : "pbspro",
  555. "exclude_hosts" : [],
  556. "exclude_vntypes" : ["no_cgroups"],
  557. "run_only_on_hosts" : [],
  558. "periodic_resc_update" : true,
  559. "vnode_per_numa_node" : false,
  560. "online_offlined_nodes" : true,
  561. "use_hyperthreads" : false,
  562. "cgroup":
  563. {
  564. "cpuacct":
  565. {
  566. "enabled" : true,
  567. "exclude_hosts" : [],
  568. "exclude_vntypes" : []
  569. },
  570. "cpuset":
  571. {
  572. "enabled" : true,
  573. "exclude_hosts" : [],
  574. "exclude_vntypes" : ["no_cgroups_cpus"]
  575. },
  576. "devices":
  577. {
  578. "enabled" : false
  579. },
  580. "hugetlb":
  581. {
  582. "enabled" : false
  583. },
  584. "memory":
  585. {
  586. "enabled" : true,
  587. "default" : "96MB",
  588. "reserve_amount" : "100MB",
  589. "exclude_hosts" : [],
  590. "exclude_vntypes" : ["no_cgroups_mem"]
  591. },
  592. "memsw":
  593. {
  594. "enabled" : %s,
  595. "default" : "96MB",
  596. "reserve_amount" : "90MB",
  597. "exclude_hosts" : [],
  598. "exclude_vntypes" : []
  599. }
  600. }
  601. }
  602. """
  603. self.cfg5 = """{
  604. "vnode_per_numa_node" : %s,
  605. "cgroup" : {
  606. "cpuset" : {
  607. "enabled" : true,
  608. "exclude_cpus" : [%s],
  609. "mem_fences" : %s,
  610. "mem_hardwall" : %s,
  611. "memory_spread_page" : %s
  612. },
  613. "memory" : {
  614. "enabled" : true
  615. },
  616. "memsw" : {
  617. "enabled" : %s
  618. }
  619. }
  620. }
  621. """
  622. self.cfg6 = """{
  623. "vnode_per_numa_node" : false,
  624. "cgroup" : {
  625. "memory":
  626. {
  627. "enabled" : true,
  628. "default" : "64MB",
  629. "reserve_percent" : "0",
  630. "reserve_amount" : "0MB"
  631. },
  632. "memsw":
  633. {
  634. "enabled" : %s,
  635. "default" : "64MB",
  636. "reserve_percent" : "0",
  637. "reserve_amount" : "0MB"
  638. }
  639. }
  640. }
  641. """
  642. self.cfg7 = """{
  643. "cgroup_prefix" : "pbspro",
  644. "exclude_hosts" : [],
  645. "exclude_vntypes" : [],
  646. "run_only_on_hosts" : [],
  647. "periodic_resc_update" : true,
  648. "vnode_per_numa_node" : true,
  649. "online_offlined_nodes" : true,
  650. "use_hyperthreads" : false,
  651. "cgroup" : {
  652. "cpuacct" : {
  653. "enabled" : true,
  654. "exclude_hosts" : [],
  655. "exclude_vntypes" : []
  656. },
  657. "cpuset" : {
  658. "enabled" : true,
  659. "exclude_cpus" : [],
  660. "exclude_hosts" : [],
  661. "exclude_vntypes" : []
  662. },
  663. "devices" : {
  664. "enabled" : false
  665. },
  666. "hugetlb" : {
  667. "enabled" : false
  668. },
  669. "memory" : {
  670. "enabled" : true,
  671. "exclude_hosts" : [],
  672. "exclude_vntypes" : [],
  673. "default" : "256MB",
  674. "reserve_amount" : "64MB"
  675. },
  676. "memsw" : {
  677. "enabled" : true,
  678. "exclude_hosts" : [],
  679. "exclude_vntypes" : [],
  680. "default" : "256MB",
  681. "reserve_amount" : "64MB"
  682. }
  683. }
  684. }
  685. """
  686. Job.dflt_attributes[ATTR_k] = 'oe'
  687. # Increase the log level
  688. a = {'log_events': '4095'}
  689. self.server.manager(MGR_CMD_SET, SERVER, a, expect=True)
  690. # Configure the scheduler to schedule using vmem
  691. a = {'resources': 'ncpus,mem,vmem,host,vnode,ngpus,nmics'}
  692. self.scheduler.set_sched_config(a)
  693. # Configure the mom
  694. c = {'$logevent': '0xffffffff', '$clienthost': self.server.name,
  695. '$min_check_poll': 8, '$max_check_poll': 12}
  696. self.momA.add_config(c)
  697. if self.hostA != self.hostB:
  698. self.momB.add_config(c)
  699. # Create resource as root
  700. attr = {'type': 'long', 'flag': 'nh'}
  701. self.server.manager(MGR_CMD_CREATE, RSC, attr, id='nmics',
  702. runas=ROOT_USER, logerr=False)
  703. self.server.manager(MGR_CMD_CREATE, RSC, attr, id='ngpus',
  704. runas=ROOT_USER, logerr=False)
  705. # Import the hook
  706. self.hook_name = 'pbs_cgroups'
  707. self.hook_file = os.path.join(self.server.pbs_conf['PBS_EXEC'],
  708. 'lib',
  709. 'python',
  710. 'altair',
  711. 'pbs_hooks',
  712. 'pbs_cgroups.PY')
  713. self.load_hook(self.hook_file)
  714. events = '"execjob_begin,execjob_launch,execjob_attach,'
  715. events += 'execjob_epilogue,execjob_end,exechost_startup,'
  716. events += 'exechost_periodic,execjob_resize"'
  717. # Enable the cgroups hook
  718. conf = {'enabled': 'True', 'freq': 10, 'alarm': 30, 'event': events}
  719. self.server.manager(MGR_CMD_SET, HOOK, conf, self.hook_name)
  720. # Restart mom so exechost_startup hook is run
  721. self.momA.signal('-HUP')
  722. if self.hostA != self.hostB:
  723. self.momB.signal('-HUP')
  724. self.logger.info('vntype set for %s is %s' %
  725. (self.momA, self.vntypenameA))
  726. self.logger.info('vntype set for %s is %s' %
  727. (self.momB, self.vntypenameB))
  728. # queuejob hook
  729. self.qjob_hook_body = """
  730. import pbs
  731. e=pbs.event()
  732. pbs.logmsg(pbs.LOG_DEBUG, "queuejob hook executed")
  733. # Save current select spec in resource 'site'
  734. e.job.Resource_List["site"] = str(e.job.Resource_List["select"])
  735. # Add 1 chunk to each chunk (except the first chunk) in the job's select s
  736. new_select = e.job.Resource_List["select"].increment_chunks(1)
  737. e.job.Resource_List["select"] = new_select
  738. # Make job tolerate node failures that occur only during start.
  739. e.job.tolerate_node_failures = "job_start"
  740. """
  741. # launch hook
  742. self.launch_hook_body = """
  743. import pbs
  744. import time
  745. e=pbs.event()
  746. pbs.logmsg(pbs.LOG_DEBUG, "Executing launch")
  747. # print out the vnode_list[] values
  748. for vn in e.vnode_list:
  749. v = e.vnode_list[vn]
  750. pbs.logjobmsg(e.job.id, "launch: found vnode_list[" + v.name + "]")
  751. # print out the vnode_list_fail[] values:
  752. for vn in e.vnode_list_fail:
  753. v = e.vnode_list_fail[vn]
  754. pbs.logjobmsg(e.job.id, "launch: found vnode_list_fail[" + v.name + "]")
  755. if e.job.in_ms_mom():
  756. pj = e.job.release_nodes(keep_select=%s)
  757. if pj is None:
  758. e.job.Hold_Types = pbs.hold_types("s")
  759. e.job.rerun()
  760. e.reject("unsuccessful at LAUNCH")
  761. """
  762. # resize hook
  763. self.resize_hook_body = """
  764. import pbs
  765. e=pbs.event()
  766. if %s e.job.in_ms_mom():
  767. e.reject("Cannot resize the job")
  768. """
  769. def get_paths(self):
  770. """
  771. Returns a dictionary containing the location where each cgroup
  772. is mounted.
  773. """
  774. paths = {'pids': None,
  775. 'blkio': None,
  776. 'systemd': None,
  777. 'cpuset': None,
  778. 'memory': None,
  779. 'memsw': None,
  780. 'cpuacct': None,
  781. 'devices': None}
  782. # Loop through the mounts and collect the ones for cgroups
  783. with open(os.path.join(os.sep, 'proc', 'mounts'), 'r') as fd:
  784. for line in fd:
  785. entries = line.split()
  786. if entries[2] != 'cgroup':
  787. continue
  788. flags = entries[3].split(',')
  789. if 'noprefix' in flags:
  790. self.noprefix = True
  791. subsys = os.path.basename(entries[1])
  792. paths[subsys] = entries[1]
  793. if 'memory' in flags:
  794. paths['memsw'] = paths[subsys]
  795. paths['memory'] = paths[subsys]
  796. if 'cpuacct' in flags:
  797. paths['cpuacct'] = paths[subsys]
  798. if 'devices' in flags:
  799. paths['devices'] = paths[subsys]
  800. return paths
  801. def is_dir(self, cpath, host):
  802. """
  803. Returns True if path exists otherwise false
  804. """
  805. for _ in range(5):
  806. rv = self.du.isdir(hostname=host, path=cpath, sudo=True)
  807. if rv:
  808. return True
  809. time.sleep(0.1)
  810. return False
  811. def is_file(self, cpath, host):
  812. """
  813. Returns True if path exists otherwise false
  814. """
  815. for _ in range(5):
  816. rv = self.du.isfile(hostname=host, path=cpath, sudo=True)
  817. if rv:
  818. return True
  819. time.sleep(0.1)
  820. return False
  821. def get_cgroup_job_dir(self, subsys, jobid, host):
  822. """
  823. Returns path of subsystem
  824. """
  825. basedir = self.paths[subsys]
  826. if self.du.isdir(hostname=host, path=os.path.join(basedir, 'pbspro')):
  827. return os.path.join(basedir, 'pbspro', jobid)
  828. else:
  829. return os.path.join(basedir, 'pbspro.slice',
  830. 'pbspro-%s.slice' % systemd_escape(jobid))
  831. def load_hook(self, filename):
  832. """
  833. Import and enable a hook pointed to by the URL specified.
  834. """
  835. try:
  836. with open(filename, 'r') as fd:
  837. script = fd.read()
  838. except IOError:
  839. self.assertTrue(False, 'Failed to open hook file %s' % filename)
  840. events = '"execjob_begin,execjob_launch,execjob_attach,'
  841. events += 'execjob_epilogue,execjob_end,exechost_startup,'
  842. events += 'exechost_periodic"'
  843. a = {'enabled': 'True',
  844. 'freq': '10',
  845. 'event': events}
  846. # Sometimes the deletion of the old hook is still pending
  847. failed = True
  848. for _ in range(5):
  849. try:
  850. self.server.create_import_hook(self.hook_name, a, script,
  851. overwrite=True)
  852. except Exception:
  853. time.sleep(2)
  854. else:
  855. failed = False
  856. break
  857. if failed:
  858. self.skipTest('pbs_cgroups_hook: failed to load hook')
  859. # Add the configuration
  860. self.load_config(self.cfg0)
  861. def load_config(self, cfg):
  862. """
  863. Create a hook configuration file with the provided contents.
  864. """
  865. fn = self.du.create_temp_file(hostname=self.serverA, body=cfg)
  866. self.tempfile.append(fn)
  867. a = {'content-type': 'application/x-config',
  868. 'content-encoding': 'default',
  869. 'input-file': fn}
  870. self.server.manager(MGR_CMD_IMPORT, HOOK, a, self.hook_name)
  871. self.momA.log_match('pbs_cgroups.CF;copy hook-related '
  872. 'file request received',
  873. starttime=self.server.ctime)
  874. self.logger.info('Current config: %s' % cfg)
  875. # Restart MoM to work around PP-993
  876. self.momA.restart()
  877. if self.hostA != self.hostB:
  878. self.momB.restart()
  879. def set_vntype(self, host, typestring='myvntype'):
  880. """
  881. Set the vnode type for the local mom.
  882. """
  883. pbs_home = self.server.pbs_conf['PBS_HOME']
  884. vntype_file = os.path.join(pbs_home, 'mom_priv', 'vntype')
  885. self.logger.info('Setting vntype to %s in %s on mom %s' %
  886. (typestring, vntype_file, host))
  887. localhost = socket.gethostname()
  888. fn = self.du.create_temp_file(hostname=localhost, body=typestring)
  889. self.tempfile.append(fn)
  890. ret = self.du.run_copy(hosts=host, src=fn,
  891. dest=vntype_file, sudo=True, uid='root',
  892. gid='root', mode=0644)
  893. if ret['rc'] != 0:
  894. self.skipTest('pbs_cgroups_hook: failed to set vntype')
  895. def remove_vntype(self):
  896. """
  897. Unset the vnode type for the local mom.
  898. """
  899. pbs_home = self.server.pbs_conf['PBS_HOME']
  900. vntype_file = os.path.join(pbs_home, 'mom_priv', 'vntype')
  901. self.logger.info('Deleting vntype files from moms')
  902. ret = self.du.rm(hostname=self.hostA, path=vntype_file,
  903. force=True, sudo=True, logerr=False)
  904. if not ret:
  905. self.skipTest('pbs_cgroups_hook: failed to remove vntype')
  906. if self.hostA != self.hostB:
  907. ret = self.du.rm(hostname=self.hostB, path=vntype_file,
  908. force=True, sudo=True, logerr=False)
  909. if not ret:
  910. self.skipTest('pbs_cgroups_hook: failed to remove vntype')
  911. def get_vntype(self, host):
  912. """
  913. Get the vntype if it exists for example on cray
  914. """
  915. vntype = 'no_cgroups'
  916. pbs_home = self.server.pbs_conf['PBS_HOME']
  917. vntype_f = os.path.join(pbs_home, 'mom_priv', 'vntype')
  918. self.logger.info('Reading the vntype value for mom %s' % host)
  919. if self.du.isfile(hostname=host, path=vntype_f):
  920. output = self.du.cat(hostname=host, filename=vntype_f, sudo=True)
  921. vntype = output['out'][0]
  922. return vntype
  923. def wait_and_read_file(self, host, filename=''):
  924. """
  925. Make several attempts to read a file and return its contents
  926. """
  927. self.logger.info('Reading file: %s on host: %s' % (filename, host))
  928. if not filename:
  929. raise ValueError('Invalid filename')
  930. for _ in range(30):
  931. if self.du.isfile(hostname=host, path=filename):
  932. break
  933. time.sleep(0.5)
  934. self.assertTrue(self.du.isfile(hostname=host, path=filename),
  935. 'File %s not found on host %s' % (filename, host))
  936. # Wait for output to flush
  937. time.sleep(2)
  938. output = self.du.cat(hostname=host, filename=filename, sudo=True)
  939. return output['out']
  940. def get_hostname(self, host):
  941. """
  942. get hostname of the mom.
  943. This is needed since cgroups logs hostname not mom name
  944. """
  945. cmd = 'hostname'
  946. rv = self.du.run_cmd(hosts=host, cmd=cmd)
  947. ret = rv['out'][0].split('.')[0]
  948. return ret
  949. def get_host_names(self, host):
  950. """
  951. get shortname and hostname of the mom. This is needed
  952. for some systems where hostname and shortname is different.
  953. """
  954. cmd1 = 'hostname -s'
  955. rv1 = self.du.run_cmd(hosts=host, cmd=cmd1)
  956. host2 = self.get_hostname(host)
  957. hostlist = '"' + host2 + '"'
  958. moms = [hostlist]
  959. mlog = ["'" + host2 + "'"]
  960. # if shortname and hostname is not same then construct a
  961. # list including both to be passed to cgroups hook
  962. if (str(rv1['out'][0]) != host2):
  963. moms.append('"' + str(rv1['out'][0]) + '"')
  964. mlog.append("'" + str(rv1['out'][0]) + "'")
  965. if len(moms) > 1:
  966. mom1 = ','.join(moms)
  967. log1 = ', '.join(mlog)
  968. else:
  969. mom1 = '"' + host2 + '"'
  970. log1 = "'" + host2 + "'"
  971. return mom1, log1
  972. def test_cgroup_vntype_excluded(self):
  973. """
  974. Test to verify that cgroups are not enforced on nodes
  975. that have an exclude vntype file set
  976. """
  977. # Test requires 2 nodes
  978. if len(self.moms) != 2:
  979. self.skipTest('Test requires two Moms as input, '
  980. 'use -p moms=<mom1:mom2>')
  981. name = 'CGROUP8'
  982. if self.vntypenameA == 'no_cgroups':
  983. self.logger.info('Adding vntype %s to mom %s ' %
  984. (self.vntypenameA, self.momA))
  985. self.set_vntype(typestring=self.vntypenameA, host=self.hostA)
  986. self.load_config(self.cfg1 %
  987. ('', '"' + self.vntypenameA + '"',
  988. '', '', self.swapctl))
  989. a = {'Resource_List.select': '1:ncpus=1:mem=300mb:host=%s' %
  990. self.hostA, ATTR_N: name}
  991. j = Job(TEST_USER, attrs=a)
  992. j.create_script(self.sleep15_job)
  993. jid = self.server.submit(j)
  994. a = {'job_state': 'R'}
  995. self.server.expect(JOB, a, jid)
  996. self.server.status(JOB, ATTR_o, jid)
  997. o = j.attributes[ATTR_o]
  998. self.tempfile.append(o)
  999. self.logger.info('memory subsystem is at location %s' %
  1000. self.paths['memory'])
  1001. cpath = self.get_cgroup_job_dir('memory', jid, self.hostA)
  1002. self.assertFalse(self.is_dir(cpath, self.hostA))
  1003. self.momA.log_match("%s is in the excluded vnode type list: ['%s']"
  1004. % (self.vntypenameA, self.vntypenameA),
  1005. starttime=self.server.ctime)
  1006. self.logger.info('vntypes on both hosts are: %s and %s'
  1007. % (self.vntypenameA, self.vntypenameB))
  1008. if self.vntypenameB == self.vntypenameA:
  1009. self.logger.info('Skipping the second part of this test '
  1010. 'since hostB also has same vntype value')
  1011. return
  1012. a = {'Resource_List.select': '1:ncpus=1:mem=300mb:host=%s' %
  1013. self.hostB, ATTR_N: name}
  1014. j1 = Job(TEST_USER, attrs=a)
  1015. j1.create_script(self.sleep15_job)
  1016. jid2 = self.server.submit(j1)
  1017. a = {'job_state': 'R'}
  1018. self.server.expect(JOB, a, jid2)
  1019. self.server.status(JOB, ATTR_o, jid2)
  1020. o = j1.attributes[ATTR_o]
  1021. self.tempfile.append(o)
  1022. cpath = self.get_cgroup_job_dir('memory', jid2, self.hostB)
  1023. self.assertTrue(self.is_dir(cpath, self.hostB))
  1024. def test_cgroup_host_excluded(self):
  1025. """
  1026. Test to verify that cgroups are not enforced on nodes
  1027. that have the exclude_hosts set
  1028. """
  1029. # Test requires 2 nodes
  1030. if len(self.moms) != 2:
  1031. self.skipTest('Test requires two Moms as input, '
  1032. 'use -p moms=<mom1:mom2>')
  1033. name = 'CGROUP9'
  1034. mom, log = self.get_host_names(self.hostA)
  1035. self.load_config(self.cfg1 % ('%s' % mom, '', '', '', self.swapctl))
  1036. a = {'Resource_List.select': '1:ncpus=1:mem=300mb:host=%s' %
  1037. self.hostA, ATTR_N: name}
  1038. j = Job(TEST_USER, attrs=a)
  1039. j.create_script(self.sleep15_job)
  1040. jid = self.server.submit(j)
  1041. a = {'job_state': 'R'}
  1042. self.server.expect(JOB, a, jid)
  1043. self.server.status(JOB, ATTR_o, jid)
  1044. o = j.attributes[ATTR_o]
  1045. self.tempfile.append(o)
  1046. cpath = self.get_cgroup_job_dir('memory', jid, self.hostA)
  1047. self.assertFalse(self.is_dir(cpath, self.hostA))
  1048. host = self.get_hostname(self.hostA)
  1049. self.momA.log_match('%s is in the excluded host list: [%s]' %
  1050. (host, log), starttime=self.server.ctime)
  1051. a = {'Resource_List.select': '1:ncpus=1:mem=300mb:host=%s' %
  1052. self.hostB, ATTR_N: name}
  1053. j = Job(TEST_USER, attrs=a)
  1054. j.create_script(self.sleep15_job)
  1055. jid2 = self.server.submit(j)
  1056. a = {'job_state': 'R'}
  1057. self.server.expect(JOB, a, jid2)
  1058. self.server.status(JOB, ATTR_o, jid2)
  1059. o = j.attributes[ATTR_o]
  1060. self.tempfile.append(o)
  1061. cpath = self.get_cgroup_job_dir('memory', jid2, self.hostB)
  1062. self.assertTrue(self.is_dir(cpath, self.hostB))
  1063. def test_cgroup_exclude_vntype_mem(self):
  1064. """
  1065. Test to verify that cgroups are not enforced on nodes
  1066. that have an exclude vntype file set
  1067. """
  1068. # Test requires 2 nodes
  1069. if len(self.moms) != 2:
  1070. self.skipTest('Test requires two Moms as input, '
  1071. 'use -p moms=<mom1:mom2>')
  1072. name = 'CGROUP12'
  1073. if self.vntypenameA == 'no_cgroups':
  1074. self.logger.info('Adding vntype %s to mom %s' %
  1075. (self.vntypenameA, self.momA))
  1076. self.set_vntype(typestring='no_cgroups', host=self.hostA)
  1077. self.load_config(self.cfg3 % ('', '', '"' + self.vntypenameA + '"',
  1078. self.swapctl,
  1079. '"' + self.vntypenameA + '"'))
  1080. a = {'Resource_List.select': '1:ncpus=1:mem=100mb:host=%s'
  1081. % self.hostA, ATTR_N: name}
  1082. j = Job(TEST_USER, attrs=a)
  1083. j.create_script(self.sleep15_job)
  1084. jid = self.server.submit(j)
  1085. a = {'job_state': 'R'}
  1086. self.server.expect(JOB, a, jid)
  1087. self.server.status(JOB, ATTR_o, jid)
  1088. o = j.attributes[ATTR_o]
  1089. self.tempfile.append(o)
  1090. self.momA.log_match('cgroup excluded for subsystem memory '
  1091. 'on vnode type %s' % self.vntypenameA,
  1092. starttime=self.server.ctime)
  1093. self.logger.info('vntype values for each hosts are: %s and %s'
  1094. % (self.vntypenameA, self.vntypenameB))
  1095. if self.vntypenameB == self.vntypenameA:
  1096. self.logger.info('Skipping the second part of this test '
  1097. 'since hostB also has same vntype value')
  1098. return
  1099. a = {'Resource_List.select': '1:ncpus=1:mem=100mb:host=%s' %
  1100. self.hostB, ATTR_N: name}
  1101. j1 = Job(TEST_USER, attrs=a)
  1102. j1.create_script(self.sleep15_job)
  1103. jid2 = self.server.submit(j1)
  1104. a = {'job_state': 'R'}
  1105. self.server.expect(JOB, a, jid2)
  1106. self.server.status(JOB, ATTR_o, jid2)
  1107. o = j1.attributes[ATTR_o]
  1108. self.tempfile.append(o)
  1109. cpath = self.get_cgroup_job_dir('memory', jid2, self.hostB)
  1110. self.assertTrue(self.is_dir(cpath, self.hostB))
  1111. @timeout(300)
  1112. def test_cgroup_periodic_update_check_values(self):
  1113. """
  1114. Test to verify that cgroups are reporting usage for cput and mem
  1115. """
  1116. name = 'CGROUP13'
  1117. conf = {'freq': 2}
  1118. self.server.manager(MGR_CMD_SET, HOOK, conf, self.hook_name)
  1119. self.load_config(self.cfg3 % ('', '', '', self.swapctl, ''))
  1120. a = {'Resource_List.select': '1:ncpus=1:mem=500mb:host=%s' %
  1121. self.hostA, ATTR_N: name}
  1122. j = Job(TEST_USER, attrs=a)
  1123. j.create_script(self.eatmem_job3)
  1124. jid = self.server.submit(j)
  1125. a = {'job_state': 'R'}
  1126. self.server.expect(JOB, a, jid)
  1127. self.server.status(JOB, ATTR_o, jid)
  1128. o = j.attributes[ATTR_o]
  1129. self.tempfile.append(o)
  1130. # Scouring the logs for initial values takes too long
  1131. resc_list = ['resources_used.cput', 'resources_used.mem']
  1132. if self.swapctl == 'true':
  1133. resc_list.append('resources_used.vmem')
  1134. qstat = self.server.status(JOB, resc_list, id=jid)
  1135. cput = qstat[0]['resources_used.cput']
  1136. self.assertEqual(cput, '00:00:00')
  1137. mem = qstat[0]['resources_used.mem']
  1138. match = re.match(r'(\d+)kb', mem)
  1139. self.assertFalse(match is None)
  1140. usage = int(match.groups()[0])
  1141. self.assertGreater(30000, usage)
  1142. if self.swapctl == 'true':
  1143. vmem = qstat[0]['resources_used.vmem']
  1144. match = re.match(r'(\d+)kb', vmem)
  1145. self.assertFalse(match is None)
  1146. usage = int(match.groups()[0])
  1147. self.assertGreater(30000, usage)
  1148. err_msg = "Unexpected error in pbs_cgroups " + \
  1149. "handling exechost_periodic event: TypeError"
  1150. self.mom.log_match(err_msg, max_attempts=3,
  1151. interval=1, n=100, existence=False)
  1152. # Allow some time to pass for values to be updated
  1153. begin = int(time.time())
  1154. self.logger.info('Waiting for periodic hook to update usage data.')
  1155. time.sleep(15)
  1156. if self.paths['cpuacct']:
  1157. lines = self.momA.log_match(
  1158. '%s;update_job_usage: CPU usage:' %
  1159. jid, allmatch=True, starttime=begin)
  1160. usage = 0.0
  1161. for line in lines:
  1162. match = re.search(r'CPU usage: ([0-9.]+) secs', line[1])
  1163. if not match:
  1164. continue
  1165. usage = float(match.groups()[0])
  1166. if usage > 1.0:
  1167. break
  1168. self.assertGreater(usage, 1.0)
  1169. if self.paths['memory']:
  1170. lines = self.momA.log_match(
  1171. '%s;update_job_usage: Memory usage: mem=' % jid,
  1172. allmatch=True, max_attempts=5, starttime=begin)
  1173. usage = 0
  1174. for line in lines:
  1175. match = re.search(r'mem=(\d+)kb', line[1])
  1176. if not match:
  1177. continue
  1178. usage = int(match.groups()[0])
  1179. if usage > 400000:
  1180. break
  1181. self.assertGreater(usage, 400000, 'Max memory usage: %dkb' % usage)
  1182. if self.swapctl == 'true':
  1183. lines = self.momA.log_match(
  1184. '%s;update_job_usage: Memory usage: vmem=' % jid,
  1185. allmatch=True, max_attempts=5, starttime=begin)
  1186. usage = 0
  1187. for line in lines:
  1188. match = re.search(r'vmem=(\d+)kb', line[1])
  1189. if not match:
  1190. continue
  1191. usage = int(match.groups()[0])
  1192. if usage > 400000:
  1193. break
  1194. self.assertGreater(usage, 400000)
  1195. def test_cgroup_cpuset_and_memory(self):
  1196. """
  1197. Test to verify that the job cgroup is created correctly
  1198. Check to see that cpuset.cpus=0, cpuset.mems=0 and that
  1199. memory.limit_in_bytes = 314572800
  1200. """
  1201. name = 'CGROUP1'
  1202. self.load_config(self.cfg3 % ('', '', '', self.swapctl, ''))
  1203. a = {'Resource_List.select': '1:ncpus=1:mem=300mb',
  1204. ATTR_N: name, ATTR_k: 'oe'}
  1205. j = Job(TEST_USER, attrs=a)
  1206. j.create_script(self.cpuset_mem_script)
  1207. jid = self.server.submit(j)
  1208. a = {'job_state': 'R'}
  1209. self.server.expect(JOB, a, jid)
  1210. self.server.status(JOB, [ATTR_o, 'exec_host'], jid)
  1211. filename = j.attributes[ATTR_o]
  1212. self.tempfile.append(filename)
  1213. ehost = j.attributes['exec_host']
  1214. tmp_file = filename.split(':')[1]
  1215. tmp_host = ehost.split('/')[0]
  1216. tmp_out = self.wait_and_read_file(filename=tmp_file, host=tmp_host)
  1217. self.logger.info("Job output is %s\n" % tmp_out)
  1218. self.assertTrue(jid in tmp_out)
  1219. self.logger.info('job dir check passed')
  1220. if self.paths['cpuacct']:
  1221. self.assertTrue('CpuIDs=0' in tmp_out)
  1222. self.logger.info('CpuIDs check passed')
  1223. if self.paths['memory']:
  1224. self.assertTrue('MemorySocket=0' in tmp_out)
  1225. self.logger.info('MemorySocket check passed')
  1226. if self.swapctl == 'true':
  1227. self.assertTrue('MemoryLimit=314572800' in tmp_out)
  1228. self.logger.info('MemoryLimit check passed')
  1229. def test_cgroup_cpuset_and_memsw(self):
  1230. """
  1231. Test to verify that the job cgroup is created correctly
  1232. using the default memory and vmem
  1233. Check to see that cpuset.cpus=0, cpuset.mems=0 and that
  1234. memory.limit_in_bytes = 268435456
  1235. memory.memsw.limit_in_bytes = 268435456
  1236. """
  1237. name = 'CGROUP2'
  1238. self.load_config(self.cfg3 % ('', '', '', self.swapctl, ''))
  1239. a = {'Resource_List.select': '1:ncpus=1:host=%s' %
  1240. self.hostA, ATTR_N: name}
  1241. j = Job(TEST_USER, attrs=a)
  1242. j.create_script(self.cpuset_mem_script)
  1243. jid = self.server.submit(j)
  1244. a = {'job_state': 'R'}
  1245. self.server.expect(JOB, a, jid)
  1246. self.server.status(JOB, [ATTR_o, 'exec_host'], jid)
  1247. filename = j.attributes[ATTR_o]
  1248. self.tempfile.append(filename)
  1249. ehost = j.attributes['exec_host']
  1250. tmp_file = filename.split(':')[1]
  1251. tmp_host = ehost.split('/')[0]
  1252. tmp_out = self.wait_and_read_file(filename=tmp_file, host=tmp_host)
  1253. self.logger.info("Job output is %s\n" % tmp_out)
  1254. self.assertTrue(jid in tmp_out)
  1255. self.logger.info('job dir check passed')
  1256. if self.paths['cpuacct']:
  1257. self.assertTrue('CpuIDs=0' in tmp_out)
  1258. self.logger.info('CpuIDs check passed')
  1259. if self.paths['memory']:
  1260. self.assertTrue('MemorySocket=0' in tmp_out)
  1261. self.logger.info('MemorySocket check passed')
  1262. if self.swapctl == 'true':
  1263. self.assertTrue('MemoryLimit=100663296' in tmp_out)
  1264. self.assertTrue('MemswLimit=100663296' in tmp_out)
  1265. self.logger.info('MemoryLimit check passed')
  1266. def test_cgroup_prefix_and_devices(self):
  1267. """
  1268. Test to verify that the cgroup prefix is set to pbs and that
  1269. only the devices subsystem is enabled with the correct devices
  1270. allowed
  1271. """
  1272. if not self.paths['devices']:
  1273. self.skipTest('Skipping test since no devices subsystem defined')
  1274. name = 'CGROUP3'
  1275. self.load_config(self.cfg2)
  1276. a = {'Resource_List.select': '1:ncpus=1:mem=300mb', ATTR_N: name}
  1277. j = Job(TEST_USER, attrs=a)
  1278. j.create_script(self.check_dirs_script)
  1279. jid = self.server.submit(j)
  1280. a = {'job_state': 'R'}
  1281. self.server.expect(JOB, a, jid)
  1282. self.server.status(JOB, [ATTR_o, 'exec_host'], jid)
  1283. filename = j.attributes[ATTR_o]
  1284. self.tempfile.append(filename)
  1285. ehost = j.attributes['exec_host']
  1286. tmp_file = filename.split(':')[1]
  1287. tmp_host = ehost.split('/')[0]
  1288. tmp_out = self.wait_and_read_file(filename=tmp_file, host=tmp_host)
  1289. check_devices = ['b *:* rwm',
  1290. 'c 5:1 rwm',
  1291. 'c 4:* rwm',
  1292. 'c 1:* rwm',
  1293. 'c 10:* rwm']
  1294. for device in check_devices:
  1295. self.assertTrue(device in tmp_out,
  1296. '"%s" not found in: %s' % (device, tmp_out))
  1297. self.logger.info('device_list check passed')
  1298. self.assertFalse('Disabled cgroup subsystems are populated '
  1299. 'with the job id' in tmp_out,
  1300. 'Found disabled cgroup subsystems populated')
  1301. self.logger.info('Disabled subsystems check passed')
  1302. def test_cgroup_cpuset(self):
  1303. """
  1304. Test to verify that 2 jobs are not assigned the same cpus
  1305. """
  1306. pcpus = 0
  1307. with open('/proc/cpuinfo', 'r') as desc:
  1308. for line in desc:
  1309. if re.match('^processor', line):
  1310. pcpus += 1
  1311. if pcpus < 2:
  1312. self.skipTest('Test requires at least two physical CPUs')
  1313. name = 'CGROUP4'
  1314. self.load_config(self.cfg3 % ('', '', '', self.swapctl, ''))
  1315. # Submit two jobs
  1316. a = {'Resource_List.select': '1:ncpus=1:mem=300mb:host=%s' %
  1317. self.hostA, ATTR_N: name + 'a'}
  1318. j1 = Job(TEST_USER, attrs=a)
  1319. j1.create_script(self.cpuset_mem_script)
  1320. jid1 = self.server.submit(j1)
  1321. b = {'Resource_List.select': '1:ncpus=1:mem=300mb:host=%s' %
  1322. self.hostA, ATTR_N: name + 'b'}
  1323. j2 = Job(TEST_USER, attrs=b)
  1324. j2.create_script(self.cpuset_mem_script)
  1325. jid2 = self.server.submit(j2)
  1326. a = {'job_state': 'R'}
  1327. # Make sure they are both running
  1328. self.server.expect(JOB, a, jid1)
  1329. self.server.expect(JOB, a, jid2)
  1330. # Status the jobs for their output files
  1331. attrib = [ATTR_o]
  1332. self.server.status(JOB, attrib, jid1)
  1333. filename1 = j1.attributes[ATTR_o]
  1334. self.logger.info('Job1 .o file: %s' % filename1)
  1335. self.tempfile.append(filename1)
  1336. self.server.status(JOB, attrib, jid2)
  1337. filename2 = j2.attributes[ATTR_o]
  1338. self.logger.info('Job2 .o file: %s' % filename2)
  1339. self.tempfile.append(filename2)
  1340. # Read the output files
  1341. tmp_file1 = filename1.split(':')[1]
  1342. tmp_out1 = self.wait_and_read_file(filename=tmp_file1, host=self.hostA)
  1343. self.logger.info("test output for job1: %s" % (tmp_out1))
  1344. self.assertTrue(
  1345. jid1 in tmp_out1, '%s not found in output on host %s'
  1346. % (jid1, self.hostA))
  1347. tmp_file2 = filename2.split(':')[1]
  1348. tmp_out2 = self.wait_and_read_file(filename=tmp_file2, host=self.hostA)
  1349. self.logger.info("test output for job2: %s" % (tmp_out2))
  1350. self.assertTrue(
  1351. jid2 in tmp_out2, '%s not found in output on host %s'
  1352. % (jid2, self.hostA))
  1353. self.logger.info('job dir check passed')
  1354. # Ensure the CPU ID for each job differs
  1355. cpuid1 = None
  1356. for kv in tmp_out1:
  1357. if 'CpuIDs=' in kv:
  1358. cpuid1 = kv
  1359. break
  1360. self.assertNotEqual(cpuid1, None, 'Could not read first CPU ID.')
  1361. cpuid2 = None
  1362. for kv in tmp_out2:
  1363. if 'CpuIDs=' in kv:
  1364. cpuid2 = kv
  1365. break
  1366. self.assertNotEqual(cpuid2, None, 'Could not read second CPU ID.')
  1367. self.logger.info("cpuid1 = %s and cpuid2 = %s" % (cpuid1, cpuid2))
  1368. self.assertNotEqual(cpuid1, cpuid2,
  1369. 'Processes should be assigned to different CPUs')
  1370. self.logger.info('CpuIDs check passed')
  1371. def test_cgroup_enforce_memory(self):
  1372. """
  1373. Test to verify that the job is killed when it tries to
  1374. use more memory then it requested
  1375. """
  1376. name = 'CGROUP5'
  1377. self.load_config(self.cfg3 % ('', '', '', self.swapctl, ''))
  1378. a = {'Resource_List.select': '1:ncpus=1:mem=300mb:host=%s' %
  1379. self.hostA, ATTR_N: name}
  1380. j = Job(TEST_USER, attrs=a)
  1381. j.create_script(self.eatmem_job1)
  1382. jid = self.server.submit(j)
  1383. a = {'job_state': 'R'}
  1384. self.server.expect(JOB, a, jid)
  1385. self.server.status(JOB, ATTR_o, jid)
  1386. o = j.attributes[ATTR_o]
  1387. self.tempfile.append(o)
  1388. # mem and vmem limit will both be set, and either could be detected
  1389. self.momA.log_match('%s;Cgroup mem(ory|sw) limit exceeded' % jid,
  1390. regexp=True,
  1391. max_attempts=20)
  1392. def test_cgroup_enforce_memsw(self):
  1393. """
  1394. Test to verify that the job is killed when it tries to
  1395. use more vmem then it requested
  1396. """
  1397. # run the test if swap space is available
  1398. if have_swap() == 0:
  1399. self.skipTest('no swap space available on the local host')
  1400. fn = self.get_cgroup_job_dir('memory', '123.foo', self.hostA)
  1401. # Get the grandparent directory
  1402. fn = os.path.dirname(fn)
  1403. fn = os.path.dirname(fn)
  1404. fn = os.path.join(fn, 'memory.memsw.limit_in_bytes')
  1405. if not self.is_file(fn, self.hostA):
  1406. self.skipTest('vmem resource not present on node')
  1407. name = 'CGROUP6'
  1408. self.load_config(self.cfg3 % ('', '', '', self.swapctl, ''))
  1409. a = {
  1410. 'Resource_List.select':
  1411. '1:ncpus=1:mem=300mb:vmem=320mb:host=%s' % self.hostA,
  1412. ATTR_N: name}
  1413. j = Job(TEST_USER, attrs=a)
  1414. j.create_script(self.eatmem_job1)
  1415. jid = self.server.submit(j)
  1416. a = {'job_state': 'R'}
  1417. self.server.expect(JOB, a, jid)
  1418. self.server.status(JOB, [ATTR_o, 'exec_host'], jid)
  1419. filename = j.attributes[ATTR_o]
  1420. self.tempfile.append(filename)
  1421. ehost = j.attributes['exec_host']
  1422. tmp_file = filename.split(':')[1]
  1423. tmp_host = ehost.split('/')[0]
  1424. tmp_out = self.wait_and_read_file(filename=tmp_file, host=tmp_host)
  1425. self.assertTrue('MemoryError' in tmp_out,
  1426. 'MemoryError not present in output')
  1427. @timeout(300)
  1428. def test_cgroup_offline_node(self):
  1429. """
  1430. Test to verify that the node is offlined when it can't clean up
  1431. the cgroup and brought back online once the cgroup is cleaned up
  1432. """
  1433. name = 'CGROUP7'
  1434. if 'freezer' not in self.paths:
  1435. self.skipTest('Freezer cgroup is not mounted')
  1436. fdir = self.get_cgroup_job_dir('freezer', '123.foo', self.hostA)
  1437. # Get the grandparent directory
  1438. fdir = os.path.dirname(fdir)
  1439. fdir = os.path.dirname(fdir)
  1440. if not self.is_dir(fdir, self.hostA):
  1441. self.skipTest('Freezer cgroup is not found')
  1442. # Configure the hook
  1443. self.load_config(self.cfg3 % ('', '', '', self.swapctl, ''))
  1444. a = {'Resource_List.select': '1:ncpus=1:mem=300mb:host=%s' %
  1445. self.hostA, 'Resource_List.walltime': 3, ATTR_N: name}
  1446. j = Job(TEST_USER, attrs=a)
  1447. j.create_script(self.sleep15_job)
  1448. jid = self.server.submit(j)
  1449. a = {'job_state': 'R'}
  1450. self.server.expect(JOB, a, jid)
  1451. self.server.status(JOB, ATTR_o, jid)
  1452. filename = j.attributes[ATTR_o]
  1453. self.tempfile.append(filename)
  1454. tmp_file = filename.split(':')[1]
  1455. # Query the pids in the cgroup
  1456. jdir = self.get_cgroup_job_dir('cpuset', jid, self.hostA)
  1457. tasks_file = os.path.join(jdir, 'tasks')
  1458. time.sleep(2)
  1459. ret = self.du.cat(self.hostA, tasks_file, sudo=True)
  1460. tasks = ret['out']
  1461. if len(tasks) < 2:
  1462. self.skipTest('pbs_cgroups_hook: only one task in cgroup')
  1463. self.logger.info('Tasks: %s' % tasks)
  1464. self.assertTrue(tasks, 'No tasks in cpuset cgroup for job')
  1465. # Make dir in freezer subsystem
  1466. fdir = os.path.join(fdir, 'PtlPbs')
  1467. if not self.du.isdir(fdir):
  1468. self.du.mkdir(hostname=self.hostA, path=fdir,
  1469. mode=0755, sudo=True)
  1470. self.tempfile.append(fdir)
  1471. # Write a PID into the tasks file for the freezer cgroup
  1472. task_file = os.path.join(fdir, 'tasks')
  1473. success = False
  1474. for pid in reversed(tasks[1:]):
  1475. fn = self.du.create_temp_file(hostname=self.hostA, body=pid)
  1476. self.tempfile.append(fn)
  1477. ret = self.du.run_copy(hosts=self.hostA, src=fn,
  1478. dest=task_file, sudo=True,
  1479. uid='root', gid='root',
  1480. mode=0644)
  1481. if ret['rc'] == 0:
  1482. success = True
  1483. break
  1484. self.logger.info('Failed to copy %s to %s on %s' %
  1485. (fn, task_file, self.hostA))
  1486. self.logger.info('rc = %d', ret['rc'])
  1487. self.logger.info('stdout = %s', ret['out'])
  1488. self.logger.info('stderr = %s', ret['err'])
  1489. if not success:
  1490. self.skipTest('pbs_cgroups_hook: Failed to copy freezer tasks')
  1491. # Freeze the cgroup
  1492. freezer_file = os.path.join(fdir, 'freezer.state')
  1493. state = 'FROZEN'
  1494. fn = self.du.create_temp_file(hostname=self.hostA, body=state)
  1495. self.tempfile.append(fn)
  1496. ret = self.du.run_copy(self.hostA, src=fn,
  1497. dest=freezer_file, sudo=True,
  1498. uid='root', gid='root',
  1499. mode=0644)
  1500. if ret['rc'] != 0:
  1501. self.skipTest('pbs_cgroups_hook: Failed to copy '
  1502. 'freezer state FROZEN')
  1503. self.server.expect(NODE, {'state': (MATCH_RE, 'offline')},
  1504. id=self.nodeA, interval=3)
  1505. # Thaw the cgroup
  1506. state = 'THAWED'
  1507. fn = self.du.create_temp_file(hostname=self.hostA, body=state)
  1508. self.tempfile.append(fn)
  1509. ret = self.du.run_copy(self.hostA, src=fn,
  1510. dest=freezer_file, sudo=True,
  1511. uid='root', gid='root',
  1512. mode=0644)
  1513. if ret['rc'] != 0:
  1514. self.skipTest('pbs_cgroups_hook: Failed to copy '
  1515. 'freezer state THAWED')
  1516. time.sleep(1)
  1517. self.du.rm(hostname=self.hostA, path=os.path.dirname(fdir),
  1518. force=True, recursive=True, sudo=True)
  1519. self.server.expect(NODE, {'state': 'free'},
  1520. id=self.nodeA, interval=3)
  1521. def test_cgroup_cpuset_host_excluded(self):
  1522. """
  1523. Test to verify that cgroups subsystems are not enforced on nodes
  1524. that have the exclude_hosts set but are enforced on other systems
  1525. """
  1526. # Test requires 2 nodes
  1527. if len(self.moms) != 2:
  1528. self.skipTest('Test requires two Moms as input, '
  1529. 'use -p moms=<mom1:mom2>')
  1530. name = 'CGROUP10'
  1531. mom, _ = self.get_host_names(self.hostA)
  1532. self.load_config(self.cfg1 % ('', '', '', '%s' % mom, self.swapctl))
  1533. a = {'Resource_List.select': '1:ncpus=1:mem=300mb:host=%s' %
  1534. self.hostA, ATTR_N: name}
  1535. j = Job(TEST_USER, attrs=a)
  1536. j.create_script(self.sleep15_job)
  1537. jid = self.server.submit(j)
  1538. a = {'job_state': 'R'}
  1539. self.server.expect(JOB, a, jid)
  1540. self.server.status(JOB, ATTR_o, jid)
  1541. o = j.attributes[ATTR_o]
  1542. self.tempfile.append(o)
  1543. hostn = self.get_hostname(self.hostA)
  1544. self.momA.log_match('cgroup excluded for subsystem cpuset '
  1545. 'on host %s' % hostn, starttime=self.server.ctime)
  1546. cpath = self.get_cgroup_job_dir('cpuset', jid, self.hostA)
  1547. self.assertFalse(self.is_dir(cpath, self.hostA))
  1548. # Now try a job on momB
  1549. a = {'Resource_List.select': '1:ncpus=1:mem=300mb:host=%s' %
  1550. self.hostB, ATTR_N: name}
  1551. j = Job(TEST_USER, attrs=a)
  1552. j.create_script(self.sleep15_job)
  1553. jid2 = self.server.submit(j)
  1554. a = {'job_state': 'R'}
  1555. self.server.expect(JOB, a, jid2)
  1556. cpath = self.get_cgroup_job_dir('cpuset', jid2, self.hostB)
  1557. self.logger.info('Checking for %s on %s' % (cpath, self.momB))
  1558. self.assertTrue(self.is_dir(cpath, self.hostB))
  1559. def test_cgroup_run_on_host(self):
  1560. """
  1561. Test to verify that the cgroup hook only runs on nodes
  1562. in the run_only_on_hosts
  1563. """
  1564. # Test requires 2 nodes
  1565. if len(self.moms) != 2:
  1566. self.skipTest('Test requires two Moms as input, '
  1567. 'use -p moms=<mom1:mom2>')
  1568. name = 'CGROUP11'
  1569. mom, log = self.get_host_names(self.hostA)
  1570. self.load_config(self.cfg1 % ('', '', '%s' % mom, '', self.swapctl))
  1571. a = {'Resource_List.select': '1:ncpus=1:mem=300mb:host=%s' %
  1572. self.hostB, ATTR_N: name}
  1573. j = Job(TEST_USER, attrs=a)
  1574. j.create_script(self.sleep15_job)
  1575. jid = self.server.submit(j)
  1576. a = {'job_state': 'R'}
  1577. self.server.expect(JOB, a, jid)
  1578. self.server.status(JOB, ATTR_o, jid)
  1579. o = j.attributes[ATTR_o]
  1580. self.tempfile.append(o)
  1581. time.sleep(1)
  1582. hostn = self.get_hostname(self.hostB)
  1583. self.momB.log_match('%s is not in the approved host list: [%s]' %
  1584. (hostn, log), starttime=self.server.ctime)
  1585. cpath = self.get_cgroup_job_dir('memory', jid, self.hostB)
  1586. self.assertFalse(self.is_dir(cpath, self.hostB))
  1587. a = {'Resource_List.select': '1:ncpus=1:mem=300mb:host=%s' %
  1588. self.hostA, ATTR_N: name}
  1589. j = Job(TEST_USER, attrs=a)
  1590. j.create_script(self.sleep15_job)
  1591. jid2 = self.server.submit(j)
  1592. a = {'job_state': 'R'}
  1593. self.server.expect(JOB, a, jid2)
  1594. self.server.status(JOB, ATTR_o, jid2)
  1595. o = j.attributes[ATTR_o]
  1596. self.tempfile.append(o)
  1597. cpath = self.get_cgroup_job_dir('memory', jid2, self.hostA)
  1598. self.assertTrue(self.is_dir(cpath, self.hostA))
  1599. def test_cgroup_qstat_resources(self):
  1600. """
  1601. Test to verify that cgroups are reporting usage for
  1602. mem, and vmem in qstat
  1603. """
  1604. name = 'CGROUP14'
  1605. self.load_config(self.cfg3 % ('', '', '', self.swapctl, ''))
  1606. a = {'Resource_List.select': '1:ncpus=1:mem=500mb', ATTR_N: name}
  1607. j = Job(TEST_USER, attrs=a)
  1608. j.create_script(self.eatmem_job2)
  1609. jid = self.server.submit(j)
  1610. a = {'job_state': 'R'}
  1611. self.server.expect(JOB, a, jid)
  1612. self.server.status(JOB, [ATTR_o, 'exec_host'], jid)
  1613. o = j.attributes[ATTR_o]
  1614. self.tempfile.append(o)
  1615. host = j.attributes['exec_host']
  1616. self.logger.info('OUTPUT: %s' % o)
  1617. resc_list = ['resources_used.cput']
  1618. resc_list += ['resources_used.mem']
  1619. resc_list += ['resources_used.vmem']
  1620. qstat1 = self.server.status(JOB, resc_list, id=jid)
  1621. for q in qstat1:
  1622. self.logger.info('Q1: %s' % q)
  1623. cput1 = qstat1[0]['resources_used.cput']
  1624. mem1 = qstat1[0]['resources_used.mem']
  1625. vmem1 = qstat1[0]['resources_used.vmem']
  1626. self.logger.info('Waiting 25 seconds for CPU time to accumulate')
  1627. time.sleep(25)
  1628. qstat2 = self.server.status(JOB, resc_list, id=jid)
  1629. for q in qstat2:
  1630. self.logger.info('Q2: %s' % q)
  1631. cput2 = qstat2[0]['resources_used.cput']
  1632. mem2 = qstat2[0]['resources_used.mem']
  1633. vmem2 = qstat2[0]['resources_used.vmem']
  1634. self.assertNotEqual(cput1, cput2)
  1635. self.assertNotEqual(mem1, mem2)
  1636. self.assertNotEqual(vmem1, vmem2)
  1637. @timeout(500)
  1638. def test_cgroup_reserve_mem(self):
  1639. """
  1640. Test to verify that the mom reserve memory for OS
  1641. when there is a reserve mem request in the config.
  1642. Install cfg3 and then cfg4 and measure diffenece
  1643. between the amount of available memory and memsw.
  1644. For example, on a system with 1GB of physical memory
  1645. and 1GB of active swap. With cfg3 in place, we should
  1646. see 1GB - 50MB = 950MB of available memory and
  1647. 2GB - (50MB + 45MB) = 1905MB of available vmem.
  1648. With cfg4 in place, we should see 1GB - 100MB = 900MB
  1649. of available memory and 2GB - (100MB + 90MB) = 1810MB
  1650. of available vmem. When we calculate the differences
  1651. we get:
  1652. mem: 950MB - 900MB = 50MB = 51200KB
  1653. vmem: 1905MB - 1810MB = 95MB = 97280KB
  1654. """
  1655. self.load_config(self.cfg3 % ('', '', '', self.swapctl, ''))
  1656. self.server.expect(NODE, {'state': 'free'},
  1657. id=self.nodeA, interval=3, offset=10)
  1658. if self.swapctl == 'true':
  1659. vmem = self.server.status(NODE, 'resources_available.vmem',
  1660. id=self.nodeA)
  1661. self.logger.info('vmem: %s' % str(vmem))
  1662. vmem1 = PbsTypeSize(vmem[0]['resources_available.vmem'])
  1663. self.logger.info('Vmem-1: %s' % vmem1.value)
  1664. mem = self.server.status(NODE, 'resources_available.mem',
  1665. id=self.nodeA)
  1666. mem1 = PbsTypeSize(mem[0]['resources_available.mem'])
  1667. self.logger.info('Mem-1: %s' % mem1.value)
  1668. self.load_config(self.cfg4 % (self.swapctl))
  1669. self.server.expect(NODE, {'state': 'free'},
  1670. id=self.nodeA, interval=3, offset=10)
  1671. if self.swapctl == 'true':
  1672. vmem = self.server.status(NODE, 'resources_available.vmem',
  1673. id=self.nodeA)
  1674. vmem2 = PbsTypeSize(vmem[0]['resources_available.vmem'])
  1675. self.logger.info('Vmem-2: %s' % vmem2.value)
  1676. vmem_resv = vmem1 - vmem2
  1677. self.logger.info('Vmem resv: %s' % vmem_resv.value)
  1678. self.assertEqual(vmem_resv.value, 97280)
  1679. self.assertEqual(vmem_resv.unit, 'kb')
  1680. mem = self.server.status(NODE, 'resources_available.mem',
  1681. id=self.nodeA)
  1682. mem2 = PbsTypeSize(mem[0]['resources_available.mem'])
  1683. self.logger.info('Mem-2: %s' % mem2.value)
  1684. mem_resv = mem1 - mem2
  1685. self.logger.info('Mem resv: %s' % mem_resv.value)
  1686. self.assertEqual(mem_resv.value, 51200)
  1687. self.assertEqual(mem_resv.unit, 'kb')
  1688. def test_cgroup_multi_node(self):
  1689. """
  1690. Test multi-node jobs with cgroups
  1691. """
  1692. # Test requires 2 nodes
  1693. if len(self.moms) != 2:
  1694. self.skipTest('Test requires two Moms as input, '
  1695. 'use -p moms=<mom1:mom2>')
  1696. name = 'CGROUP16'
  1697. self.load_config(self.cfg6 % (self.swapctl))
  1698. a = {'Resource_List.select': '2:ncpus=1:mem=100mb',
  1699. 'Resource_List.place': 'scatter', ATTR_N: name}
  1700. j = Job(TEST_USER, attrs=a)
  1701. j.create_script(self.sleep15_job)
  1702. jid = self.server.submit(j)
  1703. a = {'job_state': 'R'}
  1704. self.server.expect(JOB, a, jid)
  1705. self.server.status(JOB, 'exec_host', jid)
  1706. ehost = j.attributes['exec_host']
  1707. tmp_host = ehost.split('+')
  1708. ehost1 = tmp_host[0].split('/')[0]
  1709. ehjd1 = self.get_cgroup_job_dir('memory', jid, ehost1)
  1710. self.assertTrue(self.is_dir(ehjd1, ehost1),
  1711. 'Missing memory subdirectory: %s' % ehjd1)
  1712. ehost2 = tmp_host[1].split('/')[0]
  1713. ehjd2 = self.get_cgroup_job_dir('memory', jid, ehost2)
  1714. self.assertTrue(self.is_dir(ehjd2, ehost2))
  1715. # Wait for job to finish and make sure that cgroup directories
  1716. # has been cleaned up by the hook
  1717. self.server.expect(JOB, 'queue', op=UNSET, offset=15, interval=1,
  1718. id=jid)
  1719. self.assertFalse(self.is_dir(ehjd1, ehost1),
  1720. 'Directory still present: %s' % ehjd1)
  1721. self.assertFalse(self.is_dir(ehjd2, ehost2),
  1722. 'Directory still present: %s' % ehjd2)
  1723. def test_cgroup_job_array(self):
  1724. """
  1725. Test that cgroups are created for subjobs like a regular job
  1726. """
  1727. name = 'CGROUP17'
  1728. self.load_config(self.cfg1 % ('', '', '', '', self.swapctl))
  1729. a = {'Resource_List.select': '1:ncpus=1:mem=300mb:host=%s' %
  1730. self.hostA, ATTR_N: name, ATTR_J: '1-4',
  1731. 'Resource_List.place': 'pack:excl'}
  1732. j = Job(TEST_USER, attrs=a)
  1733. j.set_sleep_time(60)
  1734. jid = self.server.submit(j)
  1735. a = {'job_state': 'B'}
  1736. self.server.expect(JOB, a, jid)
  1737. # Get subjob ID
  1738. subj1 = jid.replace('[]', '[1]')
  1739. self.server.expect(JOB, {'job_state': 'R'}, subj1)
  1740. rv = self.server.status(JOB, ['exec_host'], subj1)
  1741. ehost = rv[0].get('exec_host')
  1742. ehost1 = ehost.split('/')[0]
  1743. # Verify that cgroups files created for subjobs
  1744. # but not for parent job array
  1745. cpath = self.get_cgroup_job_dir('memory', subj1, ehost1)
  1746. self.assertTrue(self.is_dir(cpath, ehost1))
  1747. cpath = self.get_cgroup_job_dir('memory', jid, ehost1)
  1748. self.assertFalse(self.is_dir(cpath, ehost1))
  1749. # Verify that subjob4 is queued and no cgroups
  1750. # files are created for queued subjob
  1751. subj4 = jid.replace('[]', '[4]')
  1752. self.server.expect(JOB, {'job_state': 'Q'}, id=subj4)
  1753. cpath = self.get_cgroup_job_dir('memory', subj4, ehost1)
  1754. self.assertFalse(self.is_dir(cpath, self.hostA))
  1755. # Delete subjob1 and verify that cgroups files are cleaned up
  1756. self.server.delete(id=subj1)
  1757. self.server.expect(JOB, {'job_state': 'X'}, subj1)
  1758. cpath = self.get_cgroup_job_dir('memory', subj1, ehost1)
  1759. self.assertFalse(self.is_dir(cpath, ehost1))
  1760. # Verify if subjob2 is running
  1761. subj2 = jid.replace('[]', '[2]')
  1762. self.server.expect(JOB, {'job_state': 'R'}, id=subj2)
  1763. # Force delete the subjob and verify cgroups
  1764. # files are cleaned up
  1765. self.server.delete(id=subj2, extend='force')
  1766. self.server.expect(JOB, {'job_state': 'X'}, subj2)
  1767. # Adding extra sleep for file to clean up
  1768. time.sleep(2)
  1769. cpath = self.get_cgroup_job_dir('memory', subj2, ehost1)
  1770. self.assertFalse(self.is_dir(cpath, ehost1))
  1771. def test_cgroup_cleanup(self):
  1772. """
  1773. Test that cgroups files are cleaned up after qdel
  1774. """
  1775. # Test requires 2 nodes
  1776. if len(self.moms) != 2:
  1777. self.skipTest('Test requires two Moms as input, '
  1778. 'use -p moms=<mom1:mom2>')
  1779. self.load_config(self.cfg1 % ('', '', '', '', self.swapctl))
  1780. a = {'Resource_List.select': '2:ncpus=1:mem=100mb',
  1781. 'Resource_List.place': 'scatter'}
  1782. j = Job(TEST_USER, attrs=a)
  1783. j.create_script(self.sleep15_job)
  1784. jid = self.server.submit(j)
  1785. a = {'job_state': 'R'}
  1786. self.server.expect(JOB, a, jid)
  1787. self.server.status(JOB, ['exec_host'], jid)
  1788. ehost = j.attributes['exec_host']
  1789. tmp_host = ehost.split('+')
  1790. ehost1 = tmp_host[0].split('/')[0]
  1791. ehost2 = tmp_host[1].split('/')[0]
  1792. ehjd1 = self.get_cgroup_job_dir('cpuset', jid, ehost1)
  1793. self.assertTrue(self.is_dir(ehjd1, ehost1))
  1794. ehjd2 = self.get_cgroup_job_dir('cpuset', jid, ehost2)
  1795. self.assertTrue(self.is_dir(ehjd2, ehost2))
  1796. self.server.delete(id=jid, wait=True)
  1797. self.assertFalse(self.is_dir(ehjd1, ehost1))
  1798. self.assertFalse(self.is_dir(ehjd2, ehost2))
  1799. def test_cgroup_execjob_end_should_delete_cgroup(self):
  1800. """
  1801. Test to verify that if execjob_epilogue hook failed to run or to
  1802. clean up cgroup files for a job, execjob_end hook should clean
  1803. them up
  1804. """
  1805. self.load_config(self.cfg4 % (self.swapctl))
  1806. # remove epilogue and periodic from the list of events
  1807. attr = {'enabled': 'True',
  1808. 'event': '"execjob_begin,execjob_launch,'
  1809. 'execjob_attach,execjob_end,exechost_startup"'}
  1810. self.server.manager(MGR_CMD_SET, HOOK, attr, self.hook_name)
  1811. self.server.expect(NODE, {'state': 'free'}, id=self.nodeA)
  1812. j = Job(TEST_USER)
  1813. j.set_sleep_time(1)
  1814. jid = self.server.submit(j)
  1815. # wait for job to finish
  1816. self.server.expect(JOB, 'queue', id=jid, op=UNSET, max_attempts=20,
  1817. interval=1, offset=1)
  1818. # verify that cgroup files for this job are gone even if
  1819. # epilogue and periodic events are not disabled
  1820. for subsys, path in self.paths.items():
  1821. # only check under subsystems that are enabled
  1822. enabled_subsys = ['cpuacct', 'cpuset', 'memory', 'memsw']
  1823. if (any([x in subsys for x in enabled_subsys])):
  1824. continue
  1825. if path:
  1826. filename = os.path.join(path, 'pbspro', str(jid))
  1827. self.logger.info('Checking that file %s should not exist'
  1828. % filename)
  1829. self.assertFalse(os.path.isfile(filename))
  1830. @skipOnCray
  1831. def test_cgroup_assign_resources_mem_only_vnode(self):
  1832. """
  1833. Test to verify that job requesting mem larger than any single vnode
  1834. works properly
  1835. """
  1836. vn_attrs = {ATTR_rescavail + '.ncpus': 1,
  1837. ATTR_rescavail + '.mem': '500mb'}
  1838. self.load_config(self.cfg4 % (self.swapctl))
  1839. self.server.expect(NODE, {ATTR_NODE_state: 'free'}, id=self.nodeA)
  1840. self.server.create_vnodes('vnode', vn_attrs, 2, self.moms.values()[0])
  1841. self.server.expect(NODE, {ATTR_NODE_state: 'free'}, id=self.nodeA)
  1842. a = {'Resource_List.select': '1:ncpus=1:mem=500mb'}
  1843. j1 = Job(TEST_USER, attrs=a)
  1844. j1.create_script('date')
  1845. jid1 = self.server.submit(j1)
  1846. self.server.expect(JOB, 'queue', id=jid1, op=UNSET, max_attempts=20,
  1847. interval=1, offset=1)
  1848. a = {'Resource_List.select': '1:ncpus=1:mem=1000mb'}
  1849. j2 = Job(TEST_USER, attrs=a)
  1850. j2.create_script('date')
  1851. jid2 = self.server.submit(j2)
  1852. self.server.expect(JOB, 'queue', id=jid2, op=UNSET, max_attempts=30,
  1853. interval=1, offset=1)
  1854. a = {'Resource_List.select': '1:ncpus=1:mem=40gb'}
  1855. j3 = Job(TEST_USER, attrs=a)
  1856. j3.create_script('date')
  1857. jid3 = self.server.submit(j3)
  1858. a = {'job_state': 'Q',
  1859. 'comment':
  1860. (MATCH_RE,
  1861. '.*Can Never Run: Insufficient amount of resource: mem.*')}
  1862. self.server.expect(JOB, a, attrop=PTL_AND, id=jid3, offset=10,
  1863. interval=1, max_attempts=30)
  1864. @timeout(500)
  1865. def test_cgroup_cpuset_exclude_cpu(self):
  1866. """
  1867. Confirm that exclude_cpus reduces resources_available.ncpus
  1868. """
  1869. # Fetch the unmodified value of resources_available.ncpus
  1870. self.load_config(self.cfg5 % ('false', '', 'false', 'false',
  1871. 'false', self.swapctl))
  1872. self.server.expect(NODE, {'state': 'free'},
  1873. id=self.nodeA, interval=3, offset=10)
  1874. result = self.server.status(NODE, 'resources_available.ncpus',
  1875. id=self.nodeA)
  1876. orig_ncpus = int(result[0]['resources_available.ncpus'])
  1877. self.assertGreater(orig_ncpus, 0)
  1878. self.logger.info('Original value of ncpus: %d' % orig_ncpus)
  1879. if orig_ncpus < 2:
  1880. self.skipTest('Node must have at least two CPUs')
  1881. # Now exclude CPU zero
  1882. self.load_config(self.cfg5 % ('false', '0', 'false', 'false',
  1883. 'false', self.swapctl))
  1884. self.server.expect(NODE, {'state': 'free'},
  1885. id=self.nodeA, interval=3, offset=10)
  1886. result = self.server.status(NODE, 'resources_available.ncpus',
  1887. id=self.nodeA)
  1888. new_ncpus = int(result[0]['resources_available.ncpus'])
  1889. self.assertGreater(new_ncpus, 0)
  1890. self.logger.info('New value with one CPU excluded: %d' % new_ncpus)
  1891. self.assertEqual((new_ncpus + 1), orig_ncpus)
  1892. # Repeat the process with vnode_per_numa_node set to true
  1893. vnode = '%s[0]' % self.nodeA
  1894. self.load_config(self.cfg5 % ('true', '', 'false', 'false',
  1895. 'false', self.swapctl))
  1896. self.server.expect(NODE, {'state': 'free'},
  1897. id=vnode, interval=3, offset=10)
  1898. result = self.server.status(NODE, 'resources_available.ncpus',
  1899. id=vnode)
  1900. orig_ncpus = int(result[0]['resources_available.ncpus'])
  1901. self.assertGreater(orig_ncpus, 0)
  1902. self.logger.info('Original value of vnode ncpus: %d' % orig_ncpus)
  1903. # Exclude CPU zero again
  1904. self.load_config(self.cfg5 % ('true', '0', 'false', 'false',
  1905. 'false', self.swapctl))
  1906. self.server.expect(NODE, {'state': 'free'},
  1907. id=vnode, interval=3, offset=10)
  1908. result = self.server.status(NODE, 'resources_available.ncpus',
  1909. id=vnode)
  1910. new_ncpus = int(result[0]['resources_available.ncpus'])
  1911. self.assertEqual((new_ncpus + 1), orig_ncpus)
  1912. def test_cgroup_cpuset_mem_fences(self):
  1913. """
  1914. Confirm that mem_fences affects setting of cpuset.mems
  1915. """
  1916. cpuset_base = self.get_cgroup_job_dir('cpuset', '123.foo', self.hostA)
  1917. # Get the grandparent directory
  1918. cpuset_base = os.path.dirname(cpuset_base)
  1919. cpuset_base = os.path.dirname(cpuset_base)
  1920. cpuset_mems = os.path.join(cpuset_base, 'cpuset.mems')
  1921. result = self.du.cat(hostname=self.hostA, filename=cpuset_mems,
  1922. sudo=True)
  1923. if result['rc'] != 0 or result['out'][0] == '0':
  1924. self.skipTest('Test requires two NUMA nodes')
  1925. # First try with mem_fences set to true (the default)
  1926. self.load_config(self.cfg5 % ('false', '', 'true', 'false',
  1927. 'false', self.swapctl))
  1928. self.server.expect(NODE, {'state': 'free'},
  1929. id=self.nodeA, interval=3, offset=10)
  1930. a = {'Resource_List.select': '1:ncpus=1:mem=100mb:host=%s' %
  1931. self.hostA}
  1932. j = Job(TEST_USER, attrs=a)
  1933. j.create_script(self.sleep15_job)
  1934. jid = self.server.submit(j)
  1935. a = {'job_state': 'R'}
  1936. self.server.expect(JOB, a, jid)
  1937. self.server.status(JOB, ATTR_o, jid)
  1938. o = j.attributes[ATTR_o]
  1939. self.tempfile.append(o)
  1940. fn = self.get_cgroup_job_dir('cpuset', jid, self.hostA)
  1941. fn = os.path.join(fn, 'cpuset.mems')
  1942. result = self.du.cat(hostname=self.hostA, filename=fn, sudo=True)
  1943. self.assertEqual(result['rc'], 0)
  1944. self.assertEqual(result['out'][0], '0')
  1945. # Now try with mem_fences set to false
  1946. self.load_config(self.cfg5 % ('false', '', 'false', 'false',
  1947. 'false', self.swapctl))
  1948. self.server.expect(NODE, {'state': 'free'},
  1949. id=self.nodeA, interval=3, offset=10)
  1950. a = {'Resource_List.select': '1:ncpus=1:mem=100mb:host=%s' %
  1951. self.hostA}
  1952. j = Job(TEST_USER, attrs=a)
  1953. j.create_script(self.sleep15_job)
  1954. jid = self.server.submit(j)
  1955. a = {'job_state': 'R'}
  1956. self.server.expect(JOB, a, jid)
  1957. self.server.status(JOB, ATTR_o, jid)
  1958. o = j.attributes[ATTR_o]
  1959. self.tempfile.append(o)
  1960. fn = self.get_cgroup_job_dir('cpuset', jid, self.hostA)
  1961. fn = os.path.join(fn, 'cpuset.mems')
  1962. result = self.du.cat(hostname=self.hostA, filename=fn, sudo=True)
  1963. self.assertEqual(result['rc'], 0)
  1964. self.assertNotEqual(result['out'][0], '0')
  1965. def test_cgroup_cpuset_mem_hardwall(self):
  1966. """
  1967. Confirm that mem_hardwall affects setting of cpuset.mem_hardwall
  1968. """
  1969. self.load_config(self.cfg5 % ('false', '', 'true', 'false',
  1970. 'false', self.swapctl))
  1971. self.server.expect(NODE, {'state': 'free'},
  1972. id=self.nodeA, interval=3, offset=10)
  1973. a = {'Resource_List.select': '1:ncpus=1:mem=100mb:host=%s' %
  1974. self.hostA}
  1975. j = Job(TEST_USER, attrs=a)
  1976. j.create_script(self.sleep15_job)
  1977. jid = self.server.submit(j)
  1978. a = {'job_state': 'R'}
  1979. self.server.expect(JOB, a, jid)
  1980. self.server.status(JOB, ATTR_o, jid)
  1981. o = j.attributes[ATTR_o]
  1982. self.tempfile.append(o)
  1983. memh_path = 'cpuset.mem_hardwall'
  1984. fn = self.get_cgroup_job_dir('cpuset', jid, self.hostA)
  1985. if self.noprefix:
  1986. memh_path = 'mem_hardwall'
  1987. fn = os.path.join(fn, memh_path)
  1988. result = self.du.cat(hostname=self.hostA, filename=fn, sudo=True)
  1989. self.assertEqual(result['rc'], 0)
  1990. self.assertEqual(result['out'][0], '0')
  1991. self.load_config(self.cfg5 % ('false', '', 'true', 'true',
  1992. 'false', self.swapctl))
  1993. self.server.expect(NODE, {'state': 'free'},
  1994. id=self.nodeA, interval=3, offset=10)
  1995. a = {'Resource_List.select': '1:ncpus=1:mem=100mb:host=%s' %
  1996. self.hostA}
  1997. j = Job(TEST_USER, attrs=a)
  1998. j.create_script(self.sleep15_job)
  1999. jid = self.server.submit(j)
  2000. a = {'job_state': 'R'}
  2001. self.server.expect(JOB, a, jid)
  2002. self.server.status(JOB, ATTR_o, jid)
  2003. o = j.attributes[ATTR_o]
  2004. self.tempfile.append(o)
  2005. fn = self.get_cgroup_job_dir('cpuset', jid, self.hostA)
  2006. fn = os.path.join(fn, memh_path)
  2007. result = self.du.cat(hostname=self.hostA, filename=fn, sudo=True)
  2008. self.assertEqual(result['rc'], 0)
  2009. self.assertEqual(result['out'][0], '1')
  2010. def test_cgroup_find_gpus(self):
  2011. """
  2012. Confirm that the hook finds the correct number
  2013. of GPUs.
  2014. """
  2015. if not self.paths['devices']:
  2016. self.skipTest('Skipping test since no devices subsystem defined')
  2017. name = 'CGROUP3'
  2018. self.load_config(self.cfg2)
  2019. cmd = ['nvidia-smi', '-L']
  2020. try:
  2021. rv = self.du.run_cmd(cmd=cmd)
  2022. except OSError:
  2023. rv = {'err': True}
  2024. if rv['err'] or 'GPU' not in rv['out'][0]:
  2025. self.skipTest('Skipping test since nvidia-smi not found')
  2026. gpus = int(len(rv['out']))
  2027. if gpus < 1:
  2028. self.skipTest('Skipping test since no gpus found')
  2029. self.server.expect(NODE, {'state': 'free'}, id=self.nodeA)
  2030. ngpus = self.server.status(NODE, 'resources_available.ngpus',
  2031. id=self.nodeA)[0]
  2032. ngpus = int(ngpus['resources_available.ngpus'])
  2033. self.assertEqual(gpus, ngpus, 'ngpus is incorrect')
  2034. a = {'Resource_List.select': '1:ngpus=1', ATTR_N: name}
  2035. j = Job(TEST_USER, attrs=a)
  2036. j.create_script(self.check_gpu_script)
  2037. jid = self.server.submit(j)
  2038. self.server.expect(JOB, {'job_state': 'R'}, jid)
  2039. self.server.status(JOB, [ATTR_o, 'exec_host'], jid)
  2040. filename = j.attributes[ATTR_o]
  2041. self.tempfile.append(filename)
  2042. ehost = j.attributes['exec_host']
  2043. tmp_file = filename.split(':')[1]
  2044. tmp_host = ehost.split('/')[0]
  2045. tmp_out = self.wait_and_read_file(filename=tmp_file, host=tmp_host)
  2046. self.logger.info(tmp_out)
  2047. self.assertIn('There are 1 GPUs', tmp_out, 'No gpus were assigned')
  2048. self.assertIn('c 195:255 rwm', tmp_out, 'Nvidia controller not found')
  2049. m = re.search(r'195:(?!255)', '\n'.join(tmp_out))
  2050. self.assertIsNotNone(m.group(0), 'No gpu assigned in cgroups')
  2051. def test_cgroup_cpuset_memory_spread_page(self):
  2052. """
  2053. Confirm that mem_spread_page affects setting of
  2054. cpuset.memory_spread_page
  2055. """
  2056. self.load_config(self.cfg5 % ('false', '', 'true', 'false',
  2057. 'false', self.swapctl))
  2058. self.server.expect(NODE, {'state': 'free'},
  2059. id=self.nodeA, interval=3, offset=10)
  2060. a = {'Resource_List.select': '1:ncpus=1:mem=100mb:host=%s' %
  2061. self.hostA}
  2062. j = Job(TEST_USER, attrs=a)
  2063. j.create_script(self.sleep15_job)
  2064. jid = self.server.submit(j)
  2065. a = {'job_state': 'R'}
  2066. self.server.expect(JOB, a, jid)
  2067. self.server.status(JOB, ATTR_o, jid)
  2068. o = j.attributes[ATTR_o]
  2069. self.tempfile.append(o)
  2070. spread_path = 'cpuset.memory_spread_page'
  2071. fn = self.get_cgroup_job_dir('cpuset', jid, self.hostA)
  2072. if self.noprefix:
  2073. spread_path = 'memory_spread_page'
  2074. fn = os.path.join(fn, spread_path)
  2075. result = self.du.cat(hostname=self.hostA, filename=fn, sudo=True)
  2076. self.assertEqual(result['rc'], 0)
  2077. self.assertEqual(result['out'][0], '0')
  2078. self.load_config(self.cfg5 % ('false', '', 'true', 'false',
  2079. 'true', self.swapctl))
  2080. self.server.expect(NODE, {'state': 'free'},
  2081. id=self.nodeA, interval=3, offset=10)
  2082. a = {'Resource_List.select': '1:ncpus=1:mem=100mb:host=%s' %
  2083. self.hostA}
  2084. j = Job(TEST_USER, attrs=a)
  2085. j.create_script(self.sleep15_job)
  2086. jid = self.server.submit(j)
  2087. a = {'job_state': 'R'}
  2088. self.server.expect(JOB, a, jid)
  2089. self.server.status(JOB, ATTR_o, jid)
  2090. o = j.attributes[ATTR_o]
  2091. self.tempfile.append(o)
  2092. fn = self.get_cgroup_job_dir('cpuset', jid, self.hostA)
  2093. fn = os.path.join(fn, spread_path)
  2094. result = self.du.cat(hostname=self.hostA, filename=fn, sudo=True)
  2095. self.assertEqual(result['rc'], 0)
  2096. self.assertEqual(result['out'][0], '1')
  2097. def test_cgroup_use_hierarchy(self):
  2098. """
  2099. Test that memory.use_hierarchy is enabled by default
  2100. when PBS cgroups hook is instantiated
  2101. """
  2102. now = int(time.time())
  2103. # Remove PBS directories from memory subsystem
  2104. if 'memory' in self.paths and self.paths['memory']:
  2105. cdir = self.paths['memory']
  2106. if os.path.isdir(cdir):
  2107. cpath = os.path.join(cdir, 'pbspro')
  2108. if not os.path.isdir(cpath):
  2109. cpath = os.path.join(cdir, 'pbspro.slice')
  2110. else:
  2111. self.skipTest(
  2112. "memory subsystem is not enabled for cgroups")
  2113. cmd = ["rmdir", cpath]
  2114. self.logger.info("Removing %s" % cpath)
  2115. self.du.run_cmd(cmd=cmd, sudo=True)
  2116. self.load_config(self.cfg6 % (self.swapctl))
  2117. self.momA.restart()
  2118. # Wait for exechost_startup hook to run
  2119. self.momA.log_match("Hook handler returned success for"
  2120. " exechost_startup event",
  2121. starttime=now)
  2122. # Verify that memory.use_hierarchy is enabled
  2123. fpath = os.path.join(cpath, "memory.use_hierarchy")
  2124. self.logger.info("looking for file %s" % fpath)
  2125. if os.path.isfile(fpath):
  2126. with open(fpath, 'r') as fd:
  2127. val = fd.read()
  2128. self.assertEqual(
  2129. val.rstrip(), "1", "%s is not equal to 1"
  2130. % val.rstrip())
  2131. self.logger.info("memory.use_hierarchy is enabled")
  2132. else:
  2133. self.assertFalse(1, "File %s not present" % fpath)
  2134. def test_cgroup_periodic_update_known_jobs(self):
  2135. """
  2136. Verify that jobs known to mom are updated, not orphans
  2137. """
  2138. conf = {'freq': 5, 'order': 100}
  2139. self.server.manager(MGR_CMD_SET, HOOK, conf, self.hook_name)
  2140. self.load_config(self.cfg3 % ('', '', '', self.swapctl, ''))
  2141. # Submit a short job and let it run to completion
  2142. a = {'Resource_List.select': '1:ncpus=1:mem=100mb:host=%s' %
  2143. self.hostA}
  2144. j = Job(TEST_USER, attrs=a)
  2145. j.create_script(self.sleep5_job)
  2146. jid1 = self.server.submit(j)
  2147. a = {'job_state': 'R'}
  2148. self.server.expect(JOB, a, jid1)
  2149. self.server.status(JOB, ATTR_o, jid1)
  2150. o = j.attributes[ATTR_o]
  2151. self.tempfile.append(o)
  2152. err_msg = "Unexpected error in pbs_cgroups " + \
  2153. "handling exechost_periodic event: TypeError"
  2154. self.mom.log_match(err_msg, max_attempts=3,
  2155. interval=1, n=100, existence=False)
  2156. self.server.log_match(jid1 + ';Exit_status=0')
  2157. # Create a periodic hook that runs more frequently than the
  2158. # cgroup hook to prepend jid1 to mom_priv/hooks/hook_data/cgroup_jobs
  2159. hookname = 'prependjob'
  2160. hookbody = """
  2161. import pbs
  2162. import os
  2163. import re
  2164. import traceback
  2165. event = pbs.event()
  2166. jid_to_prepend = '%s'
  2167. pbs_home = ''
  2168. pbs_mom_home = ''
  2169. if 'PBS_HOME' in os.environ:
  2170. pbs_home = os.environ['PBS_HOME']
  2171. if 'PBS_MOM_HOME' in os.environ:
  2172. pbs_mom_home = os.environ['PBS_MOM_HOME']
  2173. pbs_conf = pbs.get_pbs_conf()
  2174. if pbs_conf:
  2175. if not pbs_home and 'PBS_HOME' in pbs_conf:
  2176. pbs_home = pbs_conf['PBS_HOME']
  2177. if not pbs_mom_home and 'PBS_MOM_HOME' in pbs_conf:
  2178. pbs_mom_home = pbs_conf['PBS_MOM_HOME']
  2179. if not pbs_home or not pbs_mom_home:
  2180. if 'PBS_CONF_FILE' in os.environ:
  2181. pbs_conf_file = os.environ['PBS_CONF_FILE']
  2182. else:
  2183. pbs_conf_file = os.path.join(os.sep, 'etc', 'pbs.conf')
  2184. regex = re.compile(r'\\s*([^\\s]+)\\s*=\\s*([^\\s]+)\\s*')
  2185. try:
  2186. with open(pbs_conf_file, 'r') as desc:
  2187. for line in desc:
  2188. match = regex.match(line)
  2189. if match:
  2190. if not pbs_home and match.group(1) == 'PBS_HOME':
  2191. pbs_home = match.group(2)
  2192. if not pbs_mom_home and (match.group(1) ==
  2193. 'PBS_MOM_HOME'):
  2194. pbs_mom_home = match.group(2)
  2195. except Exception:
  2196. pass
  2197. if not pbs_home:
  2198. pbs.logmsg(pbs.EVENT_DEBUG, 'Failed to locate PBS_HOME')
  2199. event.reject()
  2200. if not pbs_mom_home:
  2201. pbs_mom_home = pbs_home
  2202. jobsfile = os.path.join(pbs_mom_home, 'mom_priv', 'hooks',
  2203. 'hook_data', 'cgroup_jobs')
  2204. try:
  2205. with open(jobsfile, 'r+') as desc:
  2206. joblist = desc.readline().split()
  2207. jobset = set(joblist)
  2208. if jid_to_prepend not in jobset:
  2209. jobset.add(jid_to_prepend)
  2210. desc.seek(0)
  2211. desc.write(' '.join(jobset))
  2212. desc.truncate()
  2213. except Exception as exc:
  2214. pbs.logmsg(pbs.EVENT_DEBUG, 'Failed to modify ' + jobsfile)
  2215. pbs.logmsg(pbs.EVENT_DEBUG,
  2216. str(traceback.format_exc().strip().splitlines()))
  2217. event.reject()
  2218. event.accept()
  2219. """ % jid1
  2220. events = '"execjob_begin,exechost_periodic"'
  2221. hookconf = {'enabled': 'True', 'freq': 2, 'alarm': 30, 'event': events}
  2222. self.server.create_import_hook(hookname, hookconf, hookbody,
  2223. overwrite=True)
  2224. # Submit a second job and verify that the following message
  2225. # does NOT appear in the mom log:
  2226. # _exechost_periodic_handler: Failed to update jid1
  2227. presubmit = int(time.time())
  2228. a = {'Resource_List.select': '1:ncpus=1:mem=100mb:host=%s' %
  2229. self.hostA}
  2230. j = Job(TEST_USER, attrs=a)
  2231. j.create_script(self.sleep15_job)
  2232. jid2 = self.server.submit(j)
  2233. a = {'job_state': 'R'}
  2234. self.server.expect(JOB, a, jid2)
  2235. self.server.status(JOB, ATTR_o, jid2)
  2236. o = j.attributes[ATTR_o]
  2237. self.tempfile.append(o)
  2238. err_msg = "Unexpected error in pbs_cgroups " + \
  2239. "handling exechost_periodic event: TypeError"
  2240. self.mom.log_match(err_msg, max_attempts=3,
  2241. interval=1, n=100, existence=False)
  2242. self.server.log_match(jid2 + ';Exit_status=0')
  2243. self.server.manager(MGR_CMD_DELETE, HOOK, None, hookname)
  2244. command = ['truncate', '-s0',
  2245. os.path.join(self.momA.pbs_conf['PBS_HOME'], 'mom_priv',
  2246. 'hooks', 'hook_data', 'cgroup_jobs')]
  2247. self.du.run_cmd(cmd=command, hosts=self.hostA, sudo=True)
  2248. logmsg = '_exechost_periodic_handler: Failed to update %s' % jid1
  2249. self.momA.log_match(msg=logmsg, starttime=presubmit,
  2250. max_attempts=1, existence=False)
  2251. def check_req_rjs(self):
  2252. """
  2253. Check the requirements for the reliable job startup tests.
  2254. MomA must have two free vnodes and MomB has one free vnode.
  2255. Return 1 if requirements are not satisfied.
  2256. """
  2257. # Check that momA has two free vnodes
  2258. attr = {'state': 'free'}
  2259. rv1 = True
  2260. try:
  2261. self.server.expect(VNODE, attr, id='%s[0]' % self.hostA,
  2262. max_attempts=3, interval=2)
  2263. except PtlExpectError as exc:
  2264. rv1 = exc.rv
  2265. rv2 = True
  2266. try:
  2267. self.server.expect(VNODE, attr, id='%s[1]' % self.hostA,
  2268. max_attempts=3, interval=2)
  2269. except PtlExpectError as exc:
  2270. rv2 = exc.rv
  2271. # Check that momB has one free vnode
  2272. rv3 = True
  2273. try:
  2274. self.server.expect(VNODE, attr, id='%s[0]' % self.hostB,
  2275. max_attempts=3, interval=2)
  2276. except PtlExpectError as exc:
  2277. rv3 = exc.rv
  2278. if not rv1 or not rv2 or not rv3:
  2279. return 1
  2280. return 0
  2281. def test_cgroup_release_nodes(self):
  2282. """
  2283. Verify that exec_vnode values are trimmed
  2284. when execjob_launch hook prunes job via release_nodes(),
  2285. tolerate_node_failures=job_start
  2286. """
  2287. # Test requires 2 nodes
  2288. if len(self.moms) != 2:
  2289. self.skipTest('Test requires two Moms as input, '
  2290. 'use -p moms=<mom1:mom2>')
  2291. self.load_config(self.cfg7)
  2292. # Check that MomA has two free vnodes and MomB has a free vnode
  2293. if self.check_req_rjs() == 1:
  2294. self.skipTest(
  2295. 'MomA must have two free vnodes and MomB one free vnode')
  2296. # instantiate queuejob hook
  2297. hook_event = 'queuejob'
  2298. hook_name = 'qjob'
  2299. a = {'event': hook_event, 'enabled': 'true'}
  2300. self.server.create_import_hook(hook_name, a, self.qjob_hook_body)
  2301. # instantiate execjob_launch hook
  2302. hook_event = 'execjob_launch'
  2303. hook_name = 'launch'
  2304. a = {'event': hook_event, 'enabled': 'true'}
  2305. self.keep_select = 'e.job.Resource_List["site"]'
  2306. self.server.create_import_hook(
  2307. hook_name, a, self.launch_hook_body % (self.keep_select))
  2308. # Submit a job that requires 2 nodes
  2309. j = Job(TEST_USER)
  2310. j.create_script(self.job_scr2)
  2311. jid = self.server.submit(j)
  2312. # Check the exec_vnode while in substate 41
  2313. self.server.expect(JOB, {ATTR_substate: '41'}, id=jid)
  2314. self.server.expect(JOB, 'exec_vnode', id=jid, op=SET)
  2315. job_stat = self.server.status(JOB, id=jid)
  2316. execvnode1 = job_stat[0]['exec_vnode']
  2317. self.logger.info("initial exec_vnode: %s" % execvnode1)
  2318. initial_vnodes = execvnode1.split('+')
  2319. # Check the exec_vnode after job is in substate 42
  2320. self.server.expect(JOB, {ATTR_substate: '42'}, id=jid)
  2321. self.server.expect(JOB, 'exec_vnode', id=jid, op=SET)
  2322. job_stat = self.server.status(JOB, id=jid)
  2323. execvnode2 = job_stat[0]['exec_vnode']
  2324. self.logger.info("pruned exec_vnode: %s" % execvnode2)
  2325. pruned_vnodes = execvnode2.split('+')
  2326. # Check that the pruned exec_vnode has one less than initial value
  2327. self.assertEqual(len(pruned_vnodes) + 1, len(initial_vnodes))
  2328. # Find the released vnode
  2329. for vn in initial_vnodes:
  2330. if vn not in pruned_vnodes:
  2331. rel_vn = vn
  2332. vnodeB = rel_vn.split(':')[0].split('(')[1]
  2333. self.logger.info("released vnode: %s" % vnodeB)
  2334. # Submit a second job requesting the released vnode, job runs
  2335. j2 = Job(TEST_USER,
  2336. {ATTR_l + '.select': '1:ncpus=1:mem=2gb:vnode=%s' % vnodeB})
  2337. jid2 = self.server.submit(j2)
  2338. self.server.expect(JOB, {ATTR_state: 'R'}, id=jid2)
  2339. def test_cgroup_sismom_resize_fail(self):
  2340. """
  2341. Verify that exec_vnode values are trimmed
  2342. when execjob_launch hook prunes job via release_nodes(),
  2343. exec_job_resize failure in sister mom,
  2344. tolerate_node_failures=job_start
  2345. """
  2346. # Test requires 2 nodes
  2347. if len(self.moms) != 2:
  2348. self.skipTest('Test requires two Moms as input, '
  2349. 'use -p moms=<mom1:mom2>')
  2350. self.load_config(self.cfg7)
  2351. # Check that MomA has two free vnodes and MomB has a free vnode
  2352. if self.check_req_rjs() == 1:
  2353. self.skipTest(
  2354. 'MomA must have two free vnodes and MomB one free vnode')
  2355. # instantiate queuejob hook
  2356. hook_event = 'queuejob'
  2357. hook_name = 'qjob'
  2358. a = {'event': hook_event, 'enabled': 'true'}
  2359. self.server.create_import_hook(hook_name, a, self.qjob_hook_body)
  2360. # instantiate execjob_launch hook
  2361. hook_event = 'execjob_launch'
  2362. hook_name = 'launch'
  2363. a = {'event': hook_event, 'enabled': 'true'}
  2364. self.keep_select = 'e.job.Resource_List["site"]'
  2365. self.server.create_import_hook(
  2366. hook_name, a, self.launch_hook_body % (self.keep_select))
  2367. # instantiate execjob_resize hook
  2368. hook_event = 'execjob_resize'
  2369. hook_name = 'resize'
  2370. a = {'event': hook_event, 'enabled': 'true'}
  2371. self.server.create_import_hook(
  2372. hook_name, a, self.resize_hook_body % ('not'))
  2373. # Submit a job that requires 2 nodes
  2374. j = Job(TEST_USER)
  2375. j.create_script(self.job_scr2)
  2376. stime = int(time.time())
  2377. jid = self.server.submit(j)
  2378. # Check the exec_vnode while in substate 41
  2379. self.server.expect(JOB, {ATTR_substate: '41'}, id=jid)
  2380. self.server.expect(JOB, 'exec_vnode', id=jid, op=SET)
  2381. job_stat = self.server.status(JOB, id=jid)
  2382. execvnode1 = job_stat[0]['exec_vnode']
  2383. self.logger.info("initial exec_vnode: %s" % execvnode1)
  2384. initial_vnodes = execvnode1.split('+')
  2385. # Check the exec_resize hook reject message in sister mom logs
  2386. self.momA.log_match(
  2387. "Job;%s;Cannot resize the job" % (jid),
  2388. starttime=stime, interval=2)
  2389. # Check the exec_vnode after job is in substate 42
  2390. self.server.expect(JOB, {ATTR_substate: '42'}, id=jid)
  2391. # Check for the pruned exec_vnode due to release_nodes() in launch hook
  2392. self.server.expect(JOB, 'exec_vnode', id=jid, op=SET)
  2393. job_stat = self.server.status(JOB, id=jid)
  2394. execvnode2 = job_stat[0]['exec_vnode']
  2395. self.logger.info("pruned exec_vnode: %s" % execvnode2)
  2396. pruned_vnodes = execvnode2.split('+')
  2397. # Check that the pruned exec_vnode has one less than initial value
  2398. self.assertEqual(len(pruned_vnodes) + 1, len(initial_vnodes))
  2399. # Check that the exec_vnode got pruned
  2400. self.momB.log_match("Job;%s;pruned from exec_vnode=%s" % (
  2401. jid, execvnode1), starttime=stime)
  2402. self.momB.log_match("Job;%s;pruned to exec_vnode=%s" % (
  2403. jid, execvnode2), starttime=stime)
  2404. # Check that the sister mom failed to update the job
  2405. self.momB.log_match(
  2406. "Job;%s;sister node %s.* failed to update job" % (jid, self.hostA),
  2407. starttime=stime, interval=2, regexp=True)
  2408. # Because of resize hook reject Mom failed to update the job.
  2409. # Check that job got requeued.
  2410. self.server.log_match("Job;%s;Job requeued" % (jid), starttime=stime)
  2411. def test_cgroup_msmom_resize_fail(self):
  2412. """
  2413. Verify that exec_vnode values are trimmed
  2414. when execjob_launch hook prunes job via release_nodes(),
  2415. exec_job_resize failure in mom superior,
  2416. tolerate_node_failures=job_start
  2417. """
  2418. # Test requires 2 nodes
  2419. if len(self.moms) != 2:
  2420. self.skipTest('Test requires two Moms as input, '
  2421. 'use -p moms=<mom1:mom2>')
  2422. self.load_config(self.cfg7)
  2423. # Check that MomA has two free vnodes and MomB has a free vnode
  2424. if self.check_req_rjs() == 1:
  2425. self.skipTest(
  2426. 'MomA must have two free vnodes and MomB one free vnode')
  2427. # instantiate queuejob hook
  2428. hook_event = 'queuejob'
  2429. hook_name = 'qjob'
  2430. a = {'event': hook_event, 'enabled': 'true'}
  2431. self.server.create_import_hook(hook_name, a, self.qjob_hook_body)
  2432. # instantiate execjob_launch hook
  2433. hook_event = 'execjob_launch'
  2434. hook_name = 'launch'
  2435. a = {'event': hook_event, 'enabled': 'true'}
  2436. self.keep_select = 'e.job.Resource_List["site"]'
  2437. self.server.create_import_hook(
  2438. hook_name, a, self.launch_hook_body % (self.keep_select))
  2439. # instantiate execjob_resize hook
  2440. hook_event = 'execjob_resize'
  2441. hook_name = 'resize'
  2442. a = {'event': hook_event, 'enabled': 'true'}
  2443. self.server.create_import_hook(
  2444. hook_name, a, self.resize_hook_body % (''))
  2445. # Submit a job that requires 2 nodes
  2446. j = Job(TEST_USER)
  2447. j.create_script(self.job_scr2)
  2448. stime = int(time.time())
  2449. jid = self.server.submit(j)
  2450. # Check the exec_vnode while in substate 41
  2451. self.server.expect(JOB, {ATTR_substate: '41'}, id=jid)
  2452. self.server.expect(JOB, 'exec_vnode', id=jid, op=SET)
  2453. job_stat = self.server.status(JOB, id=jid)
  2454. execvnode1 = job_stat[0]['exec_vnode']
  2455. self.logger.info("initial exec_vnode: %s" % execvnode1)
  2456. initial_vnodes = execvnode1.split('+')
  2457. # Check the exec_resize hook reject message in mom superior logs
  2458. self.momB.log_match(
  2459. "Job;%s;Cannot resize the job" % (jid),
  2460. starttime=stime, interval=2)
  2461. # Check the exec_vnode after job is in substate 42
  2462. self.server.expect(JOB, {ATTR_substate: '42'}, id=jid)
  2463. self.server.expect(JOB, 'exec_vnode', id=jid, op=SET)
  2464. job_stat = self.server.status(JOB, id=jid)
  2465. execvnode2 = job_stat[0]['exec_vnode']
  2466. self.logger.info("pruned exec_vnode: %s" % execvnode2)
  2467. pruned_vnodes = execvnode2.split('+')
  2468. # Check that the pruned exec_vnode has one less than initial value
  2469. self.assertEqual(len(pruned_vnodes) + 1, len(initial_vnodes))
  2470. # Check that the exec_vnode got pruned
  2471. self.momB.log_match("Job;%s;pruned from exec_vnode=%s" % (
  2472. jid, execvnode1), starttime=stime)
  2473. self.momB.log_match("Job;%s;pruned to exec_vnode=%s" % (
  2474. jid, execvnode2), starttime=stime)
  2475. # Because of resize hook reject Mom failed to update the job.
  2476. # Check that job got requeued
  2477. self.server.log_match("Job;%s;Job requeued" % (jid), starttime=stime)
  2478. def test_cgroup_msmom_nodes_only(self):
  2479. """
  2480. Verify that exec_vnode values are trimmed
  2481. when execjob_launch hook prunes job via release_nodes(),
  2482. job is using only vnodes from mother superior host,
  2483. tolerate_node_failures=job_start
  2484. """
  2485. # Test requires 2 nodes
  2486. if len(self.moms) != 2:
  2487. self.skipTest('Test requires two Moms as input, '
  2488. 'use -p moms=<mom1:mom2>')
  2489. self.load_config(self.cfg7)
  2490. # Check that MomA has two free vnodes and MomB has a free vnode
  2491. if self.check_req_rjs() == 1:
  2492. self.skipTest(
  2493. 'MomA must have two free vnodes and MomB one free vnode')
  2494. # disable queuejob hook
  2495. hook_event = 'queuejob'
  2496. hook_name = 'qjob'
  2497. a = {'event': hook_event, 'enabled': 'false'}
  2498. self.server.create_import_hook(hook_name, a, self.qjob_hook_body)
  2499. # instantiate execjob_launch hook
  2500. hook_event = 'execjob_launch'
  2501. hook_name = 'launch'
  2502. a = {'event': hook_event, 'enabled': 'true'}
  2503. self.keep_select = '"ncpus=4:mem=2gb"'
  2504. self.server.create_import_hook(
  2505. hook_name, a, self.launch_hook_body % (self.keep_select))
  2506. # disable execjob_resize hook
  2507. hook_event = 'execjob_resize'
  2508. hook_name = 'resize'
  2509. a = {'event': hook_event, 'enabled': 'false'}
  2510. self.server.create_import_hook(
  2511. hook_name, a, self.resize_hook_body % (''))
  2512. # Submit a job that requires two vnodes on one host
  2513. j = Job(TEST_USER)
  2514. j.create_script(self.job_scr3)
  2515. stime = int(time.time())
  2516. jid = self.server.submit(j)
  2517. # Check the exec_vnode while in substate 41
  2518. self.server.expect(JOB, {ATTR_substate: '41'}, id=jid)
  2519. self.server.expect(JOB, 'exec_vnode', id=jid, op=SET)
  2520. job_stat = self.server.status(JOB, id=jid)
  2521. execvnode1 = job_stat[0]['exec_vnode']
  2522. self.logger.info("initial exec_vnode: %s" % execvnode1)
  2523. initial_vnodes = execvnode1.split('+')
  2524. # Check the exec_vnode after job is in substate 42
  2525. self.server.expect(JOB, {ATTR_substate: '42'}, id=jid)
  2526. self.server.expect(JOB, 'exec_vnode', id=jid, op=SET)
  2527. job_stat = self.server.status(JOB, id=jid)
  2528. execvnode2 = job_stat[0]['exec_vnode']
  2529. self.logger.info("pruned exec_vnode: %s" % execvnode2)
  2530. pruned_vnodes = execvnode2.split('+')
  2531. # Check that the pruned exec_vnode has one less than initial value
  2532. self.assertEqual(len(pruned_vnodes) + 1, len(initial_vnodes))
  2533. # Check that the exec_vnode got pruned
  2534. self.momA.log_match("Job;%s;pruned from exec_vnode=%s" % (
  2535. jid, execvnode1), starttime=stime)
  2536. self.momA.log_match("Job;%s;pruned to exec_vnode=%s" % (
  2537. jid, execvnode2), starttime=stime)
  2538. # Find out the released vnode
  2539. if initial_vnodes[0] == execvnode2:
  2540. execvnodeB = initial_vnodes[1]
  2541. else:
  2542. execvnodeB = initial_vnodes[0]
  2543. vnodeB = execvnodeB.split(':')[0].split('(')[1]
  2544. self.logger.info("released vnode: %s" % vnodeB)
  2545. # Submit job2 requesting the released vnode, job runs
  2546. j2 = Job(TEST_USER, {
  2547. ATTR_l + '.select': '1:ncpus=1:mem=2gb:vnode=%s' % vnodeB})
  2548. jid2 = self.server.submit(j2)
  2549. self.server.expect(JOB, {ATTR_state: 'R'}, id=jid2)
  2550. def tearDown(self):
  2551. TestFunctional.tearDown(self)
  2552. self.load_config(self.cfg0)
  2553. if not self.iscray:
  2554. self.remove_vntype()
  2555. self.momA.delete_vnode_defs()
  2556. if self.hostA != self.hostB:
  2557. self.momB.delete_vnode_defs()
  2558. events = '"execjob_begin,execjob_launch,execjob_attach,'
  2559. events += 'execjob_epilogue,execjob_end,exechost_startup,'
  2560. events += 'exechost_periodic,execjob_resize"'
  2561. # Disable the cgroups hook
  2562. conf = {'enabled': 'False', 'freq': 30, 'event': events}
  2563. self.server.manager(MGR_CMD_SET, HOOK, conf, self.hook_name)
  2564. # Cleanup any temp file created
  2565. self.logger.info('Deleting temporary files %s' % self.tempfile)
  2566. self.du.rm(hostname=self.serverA, path=self.tempfile, force=True,
  2567. recursive=True, sudo=True)
  2568. # Cleanup frozen jobs
  2569. if 'freezer' in self.paths:
  2570. self.logger.info('Cleaning up frozen jobs ****')
  2571. fdir = self.paths['freezer']
  2572. if os.path.isdir(fdir):
  2573. self.logger.info('freezer directory present')
  2574. fpath = os.path.join(fdir, 'PtlPbs')
  2575. if os.path.isdir(fpath):
  2576. jid = glob.glob(os.path.join(fpath, '*', ''))
  2577. self.logger.info('found jobs %s' % jid)
  2578. if jid:
  2579. for files in jid:
  2580. self.logger.info('*** found jobdir %s' % files)
  2581. jpath = os.path.join(fpath, files)
  2582. freezer_file = os.path.join(jpath, 'freezer.state')
  2583. # Thaw the cgroup
  2584. state = 'THAWED'
  2585. fn = self.du.create_temp_file(
  2586. hostname=self.hostA, body=state)
  2587. self.du.run_copy(hosts=self.hostA, src=fn,
  2588. dest=freezer_file, sudo=True,
  2589. uid='root', gid='root',
  2590. mode=0644)
  2591. self.du.rm(hostname=self.hostA, path=fn)
  2592. cmd = ['rmdir ', jpath]
  2593. self.logger.info('deleting jobdir %s' % cmd)
  2594. self.du.run_cmd(cmd=cmd, sudo=True)
  2595. self.du.rm(hostname=self.hostA, path=fpath)
  2596. # Remove the jobdir if any under other cgroups
  2597. cgroup_subsys = ('cpuset', 'memory', 'blkio', 'devices', 'cpuacct',
  2598. 'pids', 'systemd')
  2599. for subsys in cgroup_subsys:
  2600. if subsys in self.paths and self.paths[subsys]:
  2601. self.logger.info('Looking for orphaned jobdir in %s' % subsys)
  2602. cdir = self.paths[subsys]
  2603. if os.path.isdir(cdir):
  2604. cpath = os.path.join(cdir, 'pbspro')
  2605. if not os.path.isdir(cpath):
  2606. cpath = os.path.join(cdir, 'pbspro.slice')
  2607. if os.path.isdir(cpath):
  2608. for jdir in glob.glob(os.path.join(cpath, '*', '')):
  2609. if not os.path.isdir(jdir):
  2610. continue
  2611. self.logger.info('deleting jobdir %s' % jdir)
  2612. cmd2 = ['rmdir', jdir]
  2613. self.du.run_cmd(cmd=cmd2, sudo=True)