tm.3 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. .\" Copyright (C) 1994-2018 Altair Engineering, Inc.
  2. .\" For more information, contact Altair at www.altair.com.
  3. .\"
  4. .\" This file is part of the PBS Professional ("PBS Pro") software.
  5. .\"
  6. .\" Open Source License Information:
  7. .\"
  8. .\" PBS Pro is free software. You can redistribute it and/or modify it under the
  9. .\" terms of the GNU Affero General Public License as published by the Free
  10. .\" Software Foundation, either version 3 of the License, or (at your option) any
  11. .\" later version.
  12. .\"
  13. .\" PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
  14. .\" WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  15. .\" FOR A PARTICULAR PURPOSE.
  16. .\" See the GNU Affero General Public License for more details.
  17. .\"
  18. .\" You should have received a copy of the GNU Affero General Public License
  19. .\" along with this program. If not, see <http://www.gnu.org/licenses/>.
  20. .\"
  21. .\" Commercial License Information:
  22. .\"
  23. .\" For a copy of the commercial license terms and conditions,
  24. .\" go to: (http://www.pbspro.com/UserArea/agreement.html)
  25. .\" or contact the Altair Legal Department.
  26. .\"
  27. .\" Altair’s dual-license business model allows companies, individuals, and
  28. .\" organizations to create proprietary derivative works of PBS Pro and
  29. .\" distribute them - whether embedded or bundled with other software -
  30. .\" under a commercial license agreement.
  31. .\"
  32. .\" Use of Altair’s trademarks, including but not limited to "PBS™",
  33. .\" "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
  34. .\" trademark licensing policies.
  35. .\"
  36. .TH TM 3 "24 February 2015" Local "PBS Professional"
  37. .SH NAME
  38. tm_init, tm_nodeinfo, tm_poll, tm_notify, tm_spawn, tm_kill, tm_obit, tm_taskinfo, tm_atnode, tm_rescinfo, tm_publish, tm_subscribe, tm_finalize, tm_attach \- task management API
  39. .SH SYNOPSIS
  40. .B
  41. #include <tm.h>
  42. .LP
  43. .B
  44. int tm_init(info, roots)
  45. .RS 6
  46. void \(**info;
  47. .br
  48. struct tm_roots \(**roots;
  49. .RE
  50. .LP
  51. .B
  52. int tm_nodeinfo(list, nnodes)
  53. .RS 6
  54. tm_node_id \(**\(**list;
  55. .br
  56. int \(**nnodes;
  57. .RE
  58. .LP
  59. .B
  60. int tm_poll(poll_event, result_event, wait, tm_errno)
  61. .RS 6
  62. tm_event_t poll_event;
  63. .br
  64. tm_event_t \(**result_event;
  65. .br
  66. int wait;
  67. .br
  68. int \(**tm_errno;
  69. .RE
  70. .LP
  71. .B
  72. int tm_notify(tm_signal)
  73. .RS 6
  74. int tm_signal;
  75. .RE
  76. .LP
  77. .B
  78. int tm_spawn(argc, argv, envp, where, tid, event)
  79. .RS 6
  80. int argc;
  81. .br
  82. char \(**\(**argv;
  83. .br
  84. char \(**\(**envp;
  85. .br
  86. tm_node_id where;
  87. .br
  88. tm_task_id \(**tid;
  89. .br
  90. tm_event_t \(**event;
  91. .RE
  92. .LP
  93. .B
  94. int tm_kill(tid, sig, event)
  95. .RS 6
  96. tm_task_id tid;
  97. .br
  98. int sig;
  99. .br
  100. tm_event_t \(**event;
  101. .RE
  102. .LP
  103. .B
  104. int tm_obit(tid, obitval, event)
  105. .RS 6
  106. tm_task_id tid;
  107. .br
  108. int \(**obitval;
  109. .br
  110. tm_event_t \(**event;
  111. .RE
  112. .LP
  113. .B
  114. int tm_taskinfo(node, tid_list, list_size, ntasks, event)
  115. .RS 6
  116. tm_node_id node;
  117. .br
  118. tm_task_id \(**tid_list;
  119. .br
  120. int list_size;
  121. .br
  122. int \(**ntasks;
  123. .br
  124. tm_event_t \(**event;
  125. .RE
  126. .LP
  127. .B
  128. int tm_atnode(tid, node)
  129. .RS 6
  130. tm_task_id tid;
  131. .br
  132. tm_node_id \(**node;
  133. .RE
  134. .LP
  135. .B
  136. int tm_rescinfo(node, resource, len, event)
  137. .RS 6
  138. tm_node_id node;
  139. .br
  140. char \(**resource;
  141. .br
  142. int len;
  143. .br
  144. tm_event_t \(**event;
  145. .RE
  146. .LP
  147. .B
  148. int tm_publish(name, info, len, event)
  149. .RS 6
  150. char \(**name;
  151. .br
  152. void \(**info;
  153. .br
  154. int len;
  155. .br
  156. tm_event_t \(**event;
  157. .RE
  158. .LP
  159. .B
  160. int tm_subscribe(tid, name, info, len, info_len, event)
  161. .RS 6
  162. tm_task_id tid;
  163. .br
  164. char \(**name;
  165. .br
  166. void \(**info;
  167. .br
  168. int len;
  169. .br
  170. int \(**info_len;
  171. .br
  172. tm_event_t \(**event;
  173. .RE
  174. .LP
  175. .B
  176. int tm_attach(jobid, cookie, pid, tid, host, port)
  177. .RS 6
  178. char \(**jobid;
  179. .br
  180. char \(**cookie;
  181. .br
  182. pid_t pid;
  183. .br
  184. tm_task_id \(**tid;
  185. .br
  186. char \(**host;
  187. .br
  188. int port;
  189. .RE
  190. .LP
  191. .B
  192. int tm_finalize()
  193. .SH DESCRIPTION
  194. .LP
  195. These functions provide a partial implementation of the task
  196. management interface part of the PSCHED API. In PBS, MOM
  197. provides the task manager functions. This library opens a
  198. tcp socket to the MOM running on the local host and sends
  199. and receives messages using the DIS protocol (described in
  200. the PBS IDS). The
  201. .B tm
  202. interface can only be used by a process within a PBS job.
  203. .LP
  204. The PSCHED Task Management API description used to create this
  205. library was committed to paper on November 15, 1996 and was
  206. given the version number 0.1. Changes may have taken place since
  207. that time which are not reflected in this library.
  208. .LP
  209. The API description uses several data types that it purposefully
  210. does not define. This was done so an implementation would not be
  211. confined in the way it was written. For this specific work,
  212. the definitions follow:
  213. .sp
  214. .nf
  215. typedef int tm_node_id; /* job-relative node id */
  216. #define TM_ERROR_NODE ((tm_node_id)-1)
  217. typedef int tm_event_t; /* > 0 for real events */
  218. #define TM_NULL_EVENT ((tm_event_t)0)
  219. #define TM_ERROR_EVENT ((tm_event_t)-1)
  220. typedef unsigned long tm_task_id;
  221. #define TM_NULL_TASK (tm_task_id)0
  222. .fi
  223. .LP
  224. There are a number of error values defined as well:
  225. .na
  226. TM_SUCCESS, TM_ESYSTEM, TM_ENOEVENT, TM_ENOTCONNECTED, TM_EUNKNOWNCMD,
  227. TM_ENOTIMPLEMENTED, TM_EBADENVIRONMENT, TM_ENOTFOUND.
  228. .ad
  229. .LP
  230. .B tm_init(\|)
  231. initializes the library by opening a socket to the MOM on the local
  232. host and sending a TM_INIT message, then waiting for the reply.
  233. The
  234. .IR info
  235. parameter has no use and is included to conform with the PSCHED
  236. document. The
  237. .IR roots
  238. pointer will contain valid data after the function returns and
  239. has the following structure:
  240. .sp
  241. .nf
  242. struct tm_roots {
  243. tm_task_id tm_me;
  244. tm_task_id tm_parent;
  245. int tm_nnodes;
  246. int tm_ntasks;
  247. int tm_taskpoolid;
  248. tm_task_id *tm_tasklist;
  249. };
  250. .fi
  251. .sp
  252. .IP tm_me 20
  253. The task id of this calling task.
  254. .IP tm_parent 20
  255. The task id of the task which spawned this task or TM_NULL_TASK if
  256. the calling task is the initial task started by PBS.
  257. .IP tm_nnodes 20
  258. The number of nodes allocated to the job.
  259. .IP tm_ntasks 20
  260. This will always be 0 for PBS.
  261. .IP tm_taskpoolid 20
  262. PBS does not support task pools so this will always be -1.
  263. .IP tm_tasklist 20
  264. This will be NULL for PBS.
  265. .LP
  266. The
  267. .IR tm_ntasks ,
  268. .IR tm_taskpoolid
  269. and
  270. .IR tm_tasklist
  271. fields are not filled with data specified by the PSCHED document. PBS does
  272. not support task pools and, at this time, does not return information
  273. about current running tasks from
  274. .B tm_init.
  275. There is a separate call to get information for current running tasks called
  276. .B tm_taskinfo
  277. which is described below. The return value from
  278. .B tm_init
  279. is TM_SUCCESS if the library initialization was successful, or an error
  280. is returned otherwise.
  281. .LP
  282. .B tm_nodeinfo(\|)
  283. places a pointer to a malloc'ed
  284. array of tm_node_id's in the pointer pointed at by
  285. .IR list .
  286. The order of the tm_node_id's in
  287. .IR list
  288. is the same as that specified to MOM in the "exec_host" attribute. The
  289. int pointed to by
  290. .IR nnodes
  291. contains the number of nodes allocated to the job.
  292. This is information that is returned during initialization and does
  293. not require communication with MOM. If
  294. .B tm_init
  295. has not been called, TM_ESYSTEM is returned, otherwise TM_SUCCESS is
  296. returned.
  297. .LP
  298. .B tm_poll(\|)
  299. is the function which will retrieve information about the task management
  300. system to locations specified when other routines request an action
  301. take place. The bookkeeping for this is done by generating an
  302. .IR event
  303. for each action. When the task manager (MOM) sends a message that an
  304. action is complete, the event is reported by
  305. .B tm_poll
  306. and information is placed where the caller requested it.
  307. The argument
  308. .IR poll_event
  309. is meant to be used to request a specific event. This implementation
  310. does not use it and it must be set to TM_NULL_EVENT or an error
  311. is returned. Upon return, the argument
  312. .IR result_event
  313. will contain a valid event number or TM_ERROR_EVENT on error. If
  314. .IR wait
  315. is zero and there are no events to report,
  316. .IR result_event
  317. is set to TM_NULL_EVENT. If
  318. .IR wait
  319. is non-zero an there are no events to report, the function will block
  320. waiting for an event. If no local error takes place, TM_SUCCESS is
  321. returned. If an error is reported by MOM for an event, then the argument
  322. .IR tm_errno
  323. will be set to an error code.
  324. .LP
  325. .B tm_notify(\|)
  326. is described in the PSCHED documentation, but is not implemented for
  327. PBS yet. It will return TM_ENOTIMPLEMENTED.
  328. .LP
  329. .B tm_spawn(\|)
  330. sends a message to MOM to start a new task. The node id of the
  331. host to run the task is given by
  332. .IR where .
  333. The parameters
  334. .IR argc ,
  335. .IR argv
  336. and
  337. .IR envp
  338. specify the program to run and its arguments and environment very
  339. much like
  340. .B exec(\|).
  341. The full path of the program executable must be given by
  342. .IR argv[0]
  343. and the number of elements in the argv array is given by
  344. .IR argc .
  345. The array
  346. .IR envp
  347. is NULL terminated. The argument
  348. .IR event
  349. points to a tm_event_t variable which is filled in with an event
  350. number. When this event is returned by
  351. .B tm_poll ,
  352. the tm_task_id pointed to by
  353. .IR tid
  354. will contain the task id of the newly created task.
  355. .LP
  356. .B tm_kill(\|)
  357. sends a signal specified by
  358. .IR sig
  359. to the task
  360. .IR tid
  361. and puts an event number in the tm_event_t pointed to by
  362. .IR event .
  363. .LP
  364. .B tm_obit(\|)
  365. creates an event which will be reported when the task
  366. .IR tid
  367. exits. The int pointed to by
  368. .IR obitval
  369. will contain the exit value of the task when the event is reported.
  370. .LP
  371. .B tm_taskinfo(\|)
  372. returns the list of tasks running on the node specified by
  373. .IR node .
  374. The PSCHED documentation mentions a special ability to retrieve
  375. all tasks running in the job. This is not supported by PBS.
  376. The argument
  377. .IR tid_list
  378. points to an array of tm_task_id's which contains
  379. .IR list_size
  380. elements. Upon return,
  381. .IR event
  382. will contain an event number. When this event is polled, the int
  383. pointed to by
  384. .IR ntasks
  385. will contain the number of tasks running on the node and the array
  386. will be filled in with tm_task_id's. If
  387. .IR ntasks
  388. is greater than
  389. .IR list_size ,
  390. only
  391. .IR list_size
  392. tasks will be returned.
  393. .LP
  394. .B tm_atnode(\|)
  395. will place the node id where the task
  396. .IR tid
  397. exists in the tm_node_id pointed to by
  398. .IR node .
  399. .LP
  400. .B tm_rescinfo(\|)
  401. makes a request for a string specifying the resources available on
  402. a node given by the argument
  403. .IR node .
  404. The string is returned in the buffer pointed to by
  405. .IR resource
  406. and is terminated by a NUL character unless the number of characters
  407. of information is greater than specified by
  408. .IR len .
  409. The resource string PBS returns is formatted as follows:
  410. .sp
  411. A space separated set of strings from the
  412. .B uname
  413. system call. The order of the strings is
  414. .B sysname,
  415. .B nodename,
  416. .B release,
  417. .B version,
  418. .B machine.
  419. .sp
  420. A comma separated set of strings giving the components of the
  421. "Resource_List" attribute of the job, preceded by a colon (:).
  422. Each component has the
  423. resource name, an equal sign, and the limit value.
  424. .LP
  425. .B tm_publish(\|)
  426. causes
  427. .IR len
  428. bytes of information pointed at by
  429. .IR info
  430. to be sent to the local MOM to be saved under the name given by
  431. .IR name .
  432. .LP
  433. .B tm_subscribe(\|)
  434. returns a copy of the information named by
  435. .IR name
  436. for the task given by
  437. .IR tid .
  438. The argument
  439. .IR info
  440. points to a buffer of size
  441. .IR len
  442. where the information will be returned. The argument
  443. .IR info_len
  444. will be set with the size of the published data. If this is larger
  445. than the supplied buffer, the data will have been truncated.
  446. .LP
  447. .B tm_attach(\|)
  448. commands MOM to create a new PBS "attached task" out of a session running on MOM's host.
  449. The
  450. .IR jobid
  451. parameter specifies the job which is to have a new task attached. If it is NULL, the system
  452. will try to determine the correct
  453. .IR jobid.
  454. The
  455. .IR cookie
  456. parameter must be NULL. The
  457. .IR pid
  458. parameter must be a non-zero process id for the process which is to be
  459. added to the job specified by
  460. .IR jobid.
  461. If
  462. .IR tid
  463. is non-NULL, it will be used to store the task id of the new task. The
  464. .IR host
  465. and
  466. .IR port
  467. parameters specify where to contact MOM.
  468. .IR host
  469. should be NULL. The return value will be 0 if a new
  470. task has been successfully
  471. created and non-zero on error. The return value will be one of the
  472. TM error numbers defined in
  473. .B tm.h
  474. as follows:
  475. TM_ESYSTEM MOM cannot be contacted
  476. TM_ENOTFOUND No matching job was found
  477. TM_ENOTIMPLEMENTED The call is not implemented/supported
  478. TM_ESESSION The session specified is already attached
  479. TM_EUSER The calling user is not permitted to attach
  480. TM_EOWNER The process owner does not match the job
  481. TM_ENOPROC The process does not exist
  482. .LP
  483. .B tm_finalize(\|)
  484. may be called to free any memory in use by the library and close
  485. the connection to MOM.
  486. .SH SEE ALSO
  487. pbs_mom(8B),
  488. pbs_sched(8B)