123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494 |
- .\" Copyright (C) 1994-2018 Altair Engineering, Inc.
- .\" For more information, contact Altair at www.altair.com.
- .\"
- .\" This file is part of the PBS Professional ("PBS Pro") software.
- .\"
- .\" Open Source License Information:
- .\"
- .\" PBS Pro is free software. You can redistribute it and/or modify it under the
- .\" terms of the GNU Affero General Public License as published by the Free
- .\" Software Foundation, either version 3 of the License, or (at your option) any
- .\" later version.
- .\"
- .\" PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
- .\" WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- .\" FOR A PARTICULAR PURPOSE.
- .\" See the GNU Affero General Public License for more details.
- .\"
- .\" You should have received a copy of the GNU Affero General Public License
- .\" along with this program. If not, see <http://www.gnu.org/licenses/>.
- .\"
- .\" Commercial License Information:
- .\"
- .\" For a copy of the commercial license terms and conditions,
- .\" go to: (http://www.pbspro.com/UserArea/agreement.html)
- .\" or contact the Altair Legal Department.
- .\"
- .\" Altair’s dual-license business model allows companies, individuals, and
- .\" organizations to create proprietary derivative works of PBS Pro and
- .\" distribute them - whether embedded or bundled with other software -
- .\" under a commercial license agreement.
- .\"
- .\" Use of Altair’s trademarks, including but not limited to "PBS™",
- .\" "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
- .\" trademark licensing policies.
- .\"
- .TH TM 3 "24 February 2015" Local "PBS Professional"
- .SH NAME
- tm_init, tm_nodeinfo, tm_poll, tm_notify, tm_spawn, tm_kill, tm_obit, tm_taskinfo, tm_atnode, tm_rescinfo, tm_publish, tm_subscribe, tm_finalize, tm_attach \- task management API
- .SH SYNOPSIS
- .B
- #include <tm.h>
- .LP
- .B
- int tm_init(info, roots)
- .RS 6
- void \(**info;
- .br
- struct tm_roots \(**roots;
- .RE
- .LP
- .B
- int tm_nodeinfo(list, nnodes)
- .RS 6
- tm_node_id \(**\(**list;
- .br
- int \(**nnodes;
- .RE
- .LP
- .B
- int tm_poll(poll_event, result_event, wait, tm_errno)
- .RS 6
- tm_event_t poll_event;
- .br
- tm_event_t \(**result_event;
- .br
- int wait;
- .br
- int \(**tm_errno;
- .RE
- .LP
- .B
- int tm_notify(tm_signal)
- .RS 6
- int tm_signal;
- .RE
- .LP
- .B
- int tm_spawn(argc, argv, envp, where, tid, event)
- .RS 6
- int argc;
- .br
- char \(**\(**argv;
- .br
- char \(**\(**envp;
- .br
- tm_node_id where;
- .br
- tm_task_id \(**tid;
- .br
- tm_event_t \(**event;
- .RE
- .LP
- .B
- int tm_kill(tid, sig, event)
- .RS 6
- tm_task_id tid;
- .br
- int sig;
- .br
- tm_event_t \(**event;
- .RE
- .LP
- .B
- int tm_obit(tid, obitval, event)
- .RS 6
- tm_task_id tid;
- .br
- int \(**obitval;
- .br
- tm_event_t \(**event;
- .RE
- .LP
- .B
- int tm_taskinfo(node, tid_list, list_size, ntasks, event)
- .RS 6
- tm_node_id node;
- .br
- tm_task_id \(**tid_list;
- .br
- int list_size;
- .br
- int \(**ntasks;
- .br
- tm_event_t \(**event;
- .RE
- .LP
- .B
- int tm_atnode(tid, node)
- .RS 6
- tm_task_id tid;
- .br
- tm_node_id \(**node;
- .RE
- .LP
- .B
- int tm_rescinfo(node, resource, len, event)
- .RS 6
- tm_node_id node;
- .br
- char \(**resource;
- .br
- int len;
- .br
- tm_event_t \(**event;
- .RE
- .LP
- .B
- int tm_publish(name, info, len, event)
- .RS 6
- char \(**name;
- .br
- void \(**info;
- .br
- int len;
- .br
- tm_event_t \(**event;
- .RE
- .LP
- .B
- int tm_subscribe(tid, name, info, len, info_len, event)
- .RS 6
- tm_task_id tid;
- .br
- char \(**name;
- .br
- void \(**info;
- .br
- int len;
- .br
- int \(**info_len;
- .br
- tm_event_t \(**event;
- .RE
- .LP
- .B
- int tm_attach(jobid, cookie, pid, tid, host, port)
- .RS 6
- char \(**jobid;
- .br
- char \(**cookie;
- .br
- pid_t pid;
- .br
- tm_task_id \(**tid;
- .br
- char \(**host;
- .br
- int port;
- .RE
- .LP
- .B
- int tm_finalize()
- .SH DESCRIPTION
- .LP
- These functions provide a partial implementation of the task
- management interface part of the PSCHED API. In PBS, MOM
- provides the task manager functions. This library opens a
- tcp socket to the MOM running on the local host and sends
- and receives messages using the DIS protocol (described in
- the PBS IDS). The
- .B tm
- interface can only be used by a process within a PBS job.
- .LP
- The PSCHED Task Management API description used to create this
- library was committed to paper on November 15, 1996 and was
- given the version number 0.1. Changes may have taken place since
- that time which are not reflected in this library.
- .LP
- The API description uses several data types that it purposefully
- does not define. This was done so an implementation would not be
- confined in the way it was written. For this specific work,
- the definitions follow:
- .sp
- .nf
- typedef int tm_node_id; /* job-relative node id */
- #define TM_ERROR_NODE ((tm_node_id)-1)
- typedef int tm_event_t; /* > 0 for real events */
- #define TM_NULL_EVENT ((tm_event_t)0)
- #define TM_ERROR_EVENT ((tm_event_t)-1)
- typedef unsigned long tm_task_id;
- #define TM_NULL_TASK (tm_task_id)0
- .fi
- .LP
- There are a number of error values defined as well:
- .na
- TM_SUCCESS, TM_ESYSTEM, TM_ENOEVENT, TM_ENOTCONNECTED, TM_EUNKNOWNCMD,
- TM_ENOTIMPLEMENTED, TM_EBADENVIRONMENT, TM_ENOTFOUND.
- .ad
- .LP
- .B tm_init(\|)
- initializes the library by opening a socket to the MOM on the local
- host and sending a TM_INIT message, then waiting for the reply.
- The
- .IR info
- parameter has no use and is included to conform with the PSCHED
- document. The
- .IR roots
- pointer will contain valid data after the function returns and
- has the following structure:
- .sp
- .nf
- struct tm_roots {
- tm_task_id tm_me;
- tm_task_id tm_parent;
- int tm_nnodes;
- int tm_ntasks;
- int tm_taskpoolid;
- tm_task_id *tm_tasklist;
- };
- .fi
- .sp
- .IP tm_me 20
- The task id of this calling task.
- .IP tm_parent 20
- The task id of the task which spawned this task or TM_NULL_TASK if
- the calling task is the initial task started by PBS.
- .IP tm_nnodes 20
- The number of nodes allocated to the job.
- .IP tm_ntasks 20
- This will always be 0 for PBS.
- .IP tm_taskpoolid 20
- PBS does not support task pools so this will always be -1.
- .IP tm_tasklist 20
- This will be NULL for PBS.
- .LP
- The
- .IR tm_ntasks ,
- .IR tm_taskpoolid
- and
- .IR tm_tasklist
- fields are not filled with data specified by the PSCHED document. PBS does
- not support task pools and, at this time, does not return information
- about current running tasks from
- .B tm_init.
- There is a separate call to get information for current running tasks called
- .B tm_taskinfo
- which is described below. The return value from
- .B tm_init
- is TM_SUCCESS if the library initialization was successful, or an error
- is returned otherwise.
- .LP
- .B tm_nodeinfo(\|)
- places a pointer to a malloc'ed
- array of tm_node_id's in the pointer pointed at by
- .IR list .
- The order of the tm_node_id's in
- .IR list
- is the same as that specified to MOM in the "exec_host" attribute. The
- int pointed to by
- .IR nnodes
- contains the number of nodes allocated to the job.
- This is information that is returned during initialization and does
- not require communication with MOM. If
- .B tm_init
- has not been called, TM_ESYSTEM is returned, otherwise TM_SUCCESS is
- returned.
- .LP
- .B tm_poll(\|)
- is the function which will retrieve information about the task management
- system to locations specified when other routines request an action
- take place. The bookkeeping for this is done by generating an
- .IR event
- for each action. When the task manager (MOM) sends a message that an
- action is complete, the event is reported by
- .B tm_poll
- and information is placed where the caller requested it.
- The argument
- .IR poll_event
- is meant to be used to request a specific event. This implementation
- does not use it and it must be set to TM_NULL_EVENT or an error
- is returned. Upon return, the argument
- .IR result_event
- will contain a valid event number or TM_ERROR_EVENT on error. If
- .IR wait
- is zero and there are no events to report,
- .IR result_event
- is set to TM_NULL_EVENT. If
- .IR wait
- is non-zero an there are no events to report, the function will block
- waiting for an event. If no local error takes place, TM_SUCCESS is
- returned. If an error is reported by MOM for an event, then the argument
- .IR tm_errno
- will be set to an error code.
- .LP
- .B tm_notify(\|)
- is described in the PSCHED documentation, but is not implemented for
- PBS yet. It will return TM_ENOTIMPLEMENTED.
- .LP
- .B tm_spawn(\|)
- sends a message to MOM to start a new task. The node id of the
- host to run the task is given by
- .IR where .
- The parameters
- .IR argc ,
- .IR argv
- and
- .IR envp
- specify the program to run and its arguments and environment very
- much like
- .B exec(\|).
- The full path of the program executable must be given by
- .IR argv[0]
- and the number of elements in the argv array is given by
- .IR argc .
- The array
- .IR envp
- is NULL terminated. The argument
- .IR event
- points to a tm_event_t variable which is filled in with an event
- number. When this event is returned by
- .B tm_poll ,
- the tm_task_id pointed to by
- .IR tid
- will contain the task id of the newly created task.
- .LP
- .B tm_kill(\|)
- sends a signal specified by
- .IR sig
- to the task
- .IR tid
- and puts an event number in the tm_event_t pointed to by
- .IR event .
- .LP
- .B tm_obit(\|)
- creates an event which will be reported when the task
- .IR tid
- exits. The int pointed to by
- .IR obitval
- will contain the exit value of the task when the event is reported.
- .LP
- .B tm_taskinfo(\|)
- returns the list of tasks running on the node specified by
- .IR node .
- The PSCHED documentation mentions a special ability to retrieve
- all tasks running in the job. This is not supported by PBS.
- The argument
- .IR tid_list
- points to an array of tm_task_id's which contains
- .IR list_size
- elements. Upon return,
- .IR event
- will contain an event number. When this event is polled, the int
- pointed to by
- .IR ntasks
- will contain the number of tasks running on the node and the array
- will be filled in with tm_task_id's. If
- .IR ntasks
- is greater than
- .IR list_size ,
- only
- .IR list_size
- tasks will be returned.
- .LP
- .B tm_atnode(\|)
- will place the node id where the task
- .IR tid
- exists in the tm_node_id pointed to by
- .IR node .
- .LP
- .B tm_rescinfo(\|)
- makes a request for a string specifying the resources available on
- a node given by the argument
- .IR node .
- The string is returned in the buffer pointed to by
- .IR resource
- and is terminated by a NUL character unless the number of characters
- of information is greater than specified by
- .IR len .
- The resource string PBS returns is formatted as follows:
- .sp
- A space separated set of strings from the
- .B uname
- system call. The order of the strings is
- .B sysname,
- .B nodename,
- .B release,
- .B version,
- .B machine.
- .sp
- A comma separated set of strings giving the components of the
- "Resource_List" attribute of the job, preceded by a colon (:).
- Each component has the
- resource name, an equal sign, and the limit value.
- .LP
- .B tm_publish(\|)
- causes
- .IR len
- bytes of information pointed at by
- .IR info
- to be sent to the local MOM to be saved under the name given by
- .IR name .
- .LP
- .B tm_subscribe(\|)
- returns a copy of the information named by
- .IR name
- for the task given by
- .IR tid .
- The argument
- .IR info
- points to a buffer of size
- .IR len
- where the information will be returned. The argument
- .IR info_len
- will be set with the size of the published data. If this is larger
- than the supplied buffer, the data will have been truncated.
- .LP
- .B tm_attach(\|)
- commands MOM to create a new PBS "attached task" out of a session running on MOM's host.
- The
- .IR jobid
- parameter specifies the job which is to have a new task attached. If it is NULL, the system
- will try to determine the correct
- .IR jobid.
- The
- .IR cookie
- parameter must be NULL. The
- .IR pid
- parameter must be a non-zero process id for the process which is to be
- added to the job specified by
- .IR jobid.
- If
- .IR tid
- is non-NULL, it will be used to store the task id of the new task. The
- .IR host
- and
- .IR port
- parameters specify where to contact MOM.
- .IR host
- should be NULL. The return value will be 0 if a new
- task has been successfully
- created and non-zero on error. The return value will be one of the
- TM error numbers defined in
- .B tm.h
- as follows:
- TM_ESYSTEM MOM cannot be contacted
- TM_ENOTFOUND No matching job was found
- TM_ENOTIMPLEMENTED The call is not implemented/supported
- TM_ESESSION The session specified is already attached
- TM_EUSER The calling user is not permitted to attach
- TM_EOWNER The process owner does not match the job
- TM_ENOPROC The process does not exist
- .LP
- .B tm_finalize(\|)
- may be called to free any memory in use by the library and close
- the connection to MOM.
- .SH SEE ALSO
- pbs_mom(8B),
- pbs_sched(8B)
|