Skip to content

Commit d072f27

Browse files
committed
Extend timeout to child jobs
When a timeout is specified and the primary job is timed-out, then we need to ensure we also report and kill any child jobs it started. This includes reporting any requested stack traces. Also all inheritance of output directives like tag and timestamp. Signed-off-by: Ralph Castain <rhc@pmix.org>
1 parent 2ff7d6b commit d072f27

File tree

7 files changed

+258
-76
lines changed

7 files changed

+258
-76
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ test/iostress
193193
test/spawn_multiple
194194
test/clichk
195195
test/chkfs
196+
test/spawn_timeout
196197

197198
test/mpi/spawn_multiple
198199
test/mpi/create_comm_from_group

src/mca/plm/base/plm_base_launch_support.c

Lines changed: 130 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -343,8 +343,9 @@ static void spawn_timeout_cb(int fd, short event, void *cbdata)
343343
}
344344
}
345345

346-
static void stack_trace_recv(int status, pmix_proc_t *sender, pmix_data_buffer_t *buffer,
347-
prte_rml_tag_t tag, void *cbdata)
346+
void prte_plm_base_stack_trace_recv(int status, pmix_proc_t *sender,
347+
pmix_data_buffer_t *buffer,
348+
prte_rml_tag_t tag, void *cbdata)
348349
{
349350
pmix_byte_object_t pbo;
350351
pmix_data_buffer_t blob;
@@ -494,13 +495,101 @@ static void stack_trace_timeout(int sd, short args, void *cbdata)
494495
PMIX_DESTRUCT(&parray);
495496
}
496497

498+
static void dump_job(prte_job_t *jdata)
499+
{
500+
pmix_proc_t pc;
501+
prte_proc_t *proc;
502+
pmix_byte_object_t bo;
503+
char *st;
504+
int i;
505+
506+
PMIX_LOAD_PROCID(&pc, jdata->nspace, PMIX_RANK_WILDCARD);
507+
pmix_asprintf(&st, "DATA FOR JOB: %s\n", PRTE_JOBID_PRINT(jdata->nspace));
508+
bo.bytes = st;
509+
bo.size = strlen(st);
510+
PMIx_server_IOF_deliver(&pc, PMIX_FWD_STDERR_CHANNEL, &bo, NULL, 0, NULL, NULL);
511+
free(st);
512+
pmix_asprintf(&st, "\tNum apps: %d\tNum procs: %d\tJobState: %s\tAbort: %s\n",
513+
(int) jdata->num_apps, (int) jdata->num_procs, prte_job_state_to_str(jdata->state),
514+
(PRTE_FLAG_TEST(jdata, PRTE_JOB_FLAG_ABORTED)) ? "True" : "False");
515+
bo.bytes = st;
516+
bo.size = strlen(st);
517+
PMIx_server_IOF_deliver(&pc, PMIX_FWD_STDERR_CHANNEL, &bo, NULL, 0, NULL, NULL);
518+
free(st);
519+
pmix_asprintf(&st, "\tNum launched: %ld\tNum reported: %ld\tNum terminated: %ld\n\n\tProcs:\n",
520+
(long) jdata->num_launched, (long) jdata->num_reported,
521+
(long) jdata->num_terminated);
522+
bo.bytes = st;
523+
bo.size = strlen(st);
524+
PMIx_server_IOF_deliver(&pc, PMIX_FWD_STDERR_CHANNEL, &bo, NULL, 0, NULL, NULL);
525+
free(st);
526+
for (i = 0; i < jdata->procs->size; i++) {
527+
if (NULL != (proc = (prte_proc_t *) pmix_pointer_array_get_item(jdata->procs, i))) {
528+
pmix_asprintf(&st, "\t\tRank: %s\tNode: %s\tPID: %u\tState: %s\tExitCode %d\n",
529+
PRTE_VPID_PRINT(proc->name.rank),
530+
(NULL == proc->node) ? "UNKNOWN" : proc->node->name,
531+
(unsigned int) proc->pid, prte_proc_state_to_str(proc->state),
532+
proc->exit_code);
533+
bo.bytes = st;
534+
bo.size = strlen(st);
535+
PMIx_server_IOF_deliver(&pc, PMIX_FWD_STDERR_CHANNEL, &bo, NULL, 0, NULL, NULL);
536+
free(st);
537+
}
538+
}
539+
st = "\n";
540+
bo.bytes = st;
541+
bo.size = strlen(st);
542+
PMIx_server_IOF_deliver(&pc, PMIX_FWD_STDERR_CHANNEL, &bo, NULL, 0, NULL, NULL);
543+
}
544+
545+
static int get_traces(prte_job_t *jdata)
546+
{
547+
prte_daemon_cmd_flag_t command = PRTE_DAEMON_GET_STACK_TRACES;
548+
pmix_data_buffer_t buffer;
549+
pmix_byte_object_t bo;
550+
pmix_proc_t pc;
551+
pmix_status_t rc;
552+
553+
PMIX_LOAD_PROCID(&pc, jdata->nspace, PMIX_RANK_WILDCARD);
554+
bo.bytes = "Waiting for stack traces (this may take a few moments)...\n";
555+
bo.size = strlen(bo.bytes);
556+
PMIx_server_IOF_deliver(&pc, PMIX_FWD_STDERR_CHANNEL, &bo, NULL, 0, NULL, NULL);
557+
558+
559+
/* setup the buffer */
560+
PMIX_DATA_BUFFER_CONSTRUCT(&buffer);
561+
/* pack the command */
562+
rc = PMIx_Data_pack(NULL, &buffer, &command, 1, PMIX_UINT8);
563+
if (PMIX_SUCCESS != rc) {
564+
PMIX_ERROR_LOG(rc);
565+
PMIX_DATA_BUFFER_DESTRUCT(&buffer);
566+
return PRTE_ERROR;
567+
}
568+
/* pack the jobid */
569+
rc = PMIx_Data_pack(NULL, &buffer, &jdata->nspace, 1, PMIX_PROC_NSPACE);
570+
if (PMIX_SUCCESS != rc) {
571+
PMIX_ERROR_LOG(rc);
572+
PMIX_DATA_BUFFER_DESTRUCT(&buffer);
573+
return PRTE_ERROR;
574+
}
575+
/* goes to all daemons */
576+
if (PRTE_SUCCESS != (rc = prte_grpcomm.xcast(PRTE_RML_TAG_DAEMON, &buffer))) {
577+
PRTE_ERROR_LOG(rc);
578+
PMIX_DATA_BUFFER_DESTRUCT(&buffer);
579+
return PRTE_ERROR;
580+
}
581+
PMIX_DATA_BUFFER_DESTRUCT(&buffer);
582+
return PRTE_SUCCESS;
583+
}
584+
497585
static void job_timeout_cb(int fd, short event, void *cbdata)
498586
{
499587
prte_job_t *jdata = (prte_job_t *) cbdata;
500588
prte_timer_t *timer = NULL;
501-
prte_proc_t *proc, prc;
589+
prte_proc_t *prc;
590+
prte_job_t *child;
502591
pmix_proc_t pc;
503-
int i, rc, timeout, *tp;
592+
int rc, timeout, *tp, i;
504593
pmix_pointer_array_t parray;
505594
pmix_byte_object_t bo;
506595
char *st;
@@ -534,83 +623,31 @@ static void job_timeout_cb(int fd, short event, void *cbdata)
534623
if (prte_get_attribute(&jdata->attributes, PRTE_JOB_REPORT_STATE, NULL, PMIX_BOOL)) {
535624
/* output the results - note that the output might need to go to a
536625
* tool instead of just to stderr, so we use the PMIx IOF deliver
537-
* function to ensure it gets where it needs to go */
538-
pmix_asprintf(&st, "DATA FOR JOB: %s\n", PRTE_JOBID_PRINT(jdata->nspace));
539-
bo.bytes = st;
540-
bo.size = strlen(st);
541-
PMIx_server_IOF_deliver(&pc, PMIX_FWD_STDERR_CHANNEL, &bo, NULL, 0, NULL, NULL);
542-
free(st);
543-
pmix_asprintf(&st, "\tNum apps: %d\tNum procs: %d\tJobState: %s\tAbort: %s\n",
544-
(int) jdata->num_apps, (int) jdata->num_procs, prte_job_state_to_str(jdata->state),
545-
(PRTE_FLAG_TEST(jdata, PRTE_JOB_FLAG_ABORTED)) ? "True" : "False");
546-
bo.bytes = st;
547-
bo.size = strlen(st);
548-
PMIx_server_IOF_deliver(&pc, PMIX_FWD_STDERR_CHANNEL, &bo, NULL, 0, NULL, NULL);
549-
free(st);
550-
pmix_asprintf(&st, "\tNum launched: %ld\tNum reported: %ld\tNum terminated: %ld\n\n\tProcs:\n",
551-
(long) jdata->num_launched, (long) jdata->num_reported,
552-
(long) jdata->num_terminated);
553-
bo.bytes = st;
554-
bo.size = strlen(st);
555-
PMIx_server_IOF_deliver(&pc, PMIX_FWD_STDERR_CHANNEL, &bo, NULL, 0, NULL, NULL);
556-
free(st);
557-
for (i = 0; i < jdata->procs->size; i++) {
558-
if (NULL != (proc = (prte_proc_t *) pmix_pointer_array_get_item(jdata->procs, i))) {
559-
pmix_asprintf(&st, "\t\tRank: %s\tNode: %s\tPID: %u\tState: %s\tExitCode %d\n",
560-
PRTE_VPID_PRINT(proc->name.rank),
561-
(NULL == proc->node) ? "UNKNOWN" : proc->node->name,
562-
(unsigned int) proc->pid, prte_proc_state_to_str(proc->state),
563-
proc->exit_code);
564-
bo.bytes = st;
565-
bo.size = strlen(st);
566-
PMIx_server_IOF_deliver(&pc, PMIX_FWD_STDERR_CHANNEL, &bo, NULL, 0, NULL, NULL);
567-
free(st);
568-
}
569-
}
570-
st = "\n";
571-
bo.bytes = st;
572-
bo.size = strlen(st);
573-
PMIx_server_IOF_deliver(&pc, PMIX_FWD_STDERR_CHANNEL, &bo, NULL, 0, NULL, NULL);
626+
* function to ensure it gets where it needs to go. */
627+
dump_job(jdata);
628+
}
629+
630+
/* Do this for all its child jobs, if any */
631+
PMIX_LIST_FOREACH(child, &jdata->children, prte_job_t) {
632+
dump_job(child);
574633
}
575634

576635
/* see if they want stacktraces */
577636
if (prte_get_attribute(&jdata->attributes, PRTE_JOB_STACKTRACES, NULL, PMIX_BOOL)) {
578637
/* if they asked for stack_traces, attempt to get them, but timeout
579638
* if we cannot do so */
580-
prte_daemon_cmd_flag_t command = PRTE_DAEMON_GET_STACK_TRACES;
581-
pmix_data_buffer_t buffer;
582-
583-
bo.bytes = "Waiting for stack traces (this may take a few moments)...\n";
584-
bo.size = strlen(bo.bytes);
585-
PMIx_server_IOF_deliver(&pc, PMIX_FWD_STDERR_CHANNEL, &bo, NULL, 0, NULL, NULL);
586-
587-
/* set the recv */
588-
PRTE_RML_RECV(PRTE_NAME_WILDCARD, PRTE_RML_TAG_STACK_TRACE,
589-
PRTE_RML_PERSISTENT, stack_trace_recv, NULL);
590-
591-
/* setup the buffer */
592-
PMIX_DATA_BUFFER_CONSTRUCT(&buffer);
593-
/* pack the command */
594-
rc = PMIx_Data_pack(NULL, &buffer, &command, 1, PMIX_UINT8);
595-
if (PMIX_SUCCESS != rc) {
596-
PMIX_ERROR_LOG(rc);
597-
PMIX_DATA_BUFFER_DESTRUCT(&buffer);
598-
goto giveup;
599-
}
600-
/* pack the jobid */
601-
rc = PMIx_Data_pack(NULL, &buffer, &jdata->nspace, 1, PMIX_PROC_NSPACE);
602-
if (PMIX_SUCCESS != rc) {
603-
PMIX_ERROR_LOG(rc);
604-
PMIX_DATA_BUFFER_DESTRUCT(&buffer);
639+
rc = get_traces(jdata);
640+
if (PRTE_SUCCESS != rc) {
605641
goto giveup;
606642
}
607-
/* goes to all daemons */
608-
if (PRTE_SUCCESS != (rc = prte_grpcomm.xcast(PRTE_RML_TAG_DAEMON, &buffer))) {
609-
PRTE_ERROR_LOG(rc);
610-
PMIX_DATA_BUFFER_DESTRUCT(&buffer);
611-
goto giveup;
643+
// get traces for child jobs too
644+
PMIX_LIST_FOREACH(child, &jdata->children, prte_job_t) {
645+
rc = get_traces(child);
646+
if (PRTE_SUCCESS != rc) {
647+
goto giveup;
648+
}
612649
}
613-
PMIX_DATA_BUFFER_DESTRUCT(&buffer);
650+
614651
/* we will terminate after we get the stack_traces, but set a timeout
615652
* just in case we never hear back from everyone */
616653
if (prte_stack_trace_wait_timeout > 0) {
@@ -629,11 +666,30 @@ static void job_timeout_cb(int fd, short event, void *cbdata)
629666
giveup:
630667
/* abort the job */
631668
PMIX_CONSTRUCT(&parray, pmix_pointer_array_t);
632-
PMIX_LOAD_PROCID(&prc.name, jdata->nspace, PMIX_RANK_WILDCARD);
633-
pmix_pointer_array_add(&parray, &prc);
669+
pmix_pointer_array_init(&parray,
670+
PRTE_GLOBAL_ARRAY_BLOCK_SIZE,
671+
PRTE_GLOBAL_ARRAY_MAX_SIZE,
672+
PRTE_GLOBAL_ARRAY_BLOCK_SIZE);
673+
674+
prc = PMIX_NEW(prte_proc_t);
675+
PMIX_LOAD_PROCID(&prc->name, jdata->nspace, PMIX_RANK_WILDCARD);
676+
pmix_pointer_array_add(&parray, prc);
677+
PMIX_LIST_FOREACH(child, &jdata->children, prte_job_t) {
678+
prc = PMIX_NEW(prte_proc_t);
679+
PMIX_LOAD_PROCID(&prc->name, child->nspace, PMIX_RANK_WILDCARD);
680+
pmix_pointer_array_add(&parray, prc);
681+
}
634682
if (PRTE_SUCCESS != (rc = prte_plm.terminate_procs(&parray))) {
635683
PRTE_ERROR_LOG(rc);
636684
}
685+
for (i=0; i < parray.size; i++) {
686+
prc = (prte_proc_t *) pmix_pointer_array_get_item(&parray, i);
687+
if (NULL == prc) {
688+
continue;
689+
}
690+
pmix_pointer_array_set_item(&parray, i, NULL);
691+
PMIX_RELEASE(prc);
692+
}
637693
PMIX_DESTRUCT(&parray);
638694
}
639695

src/mca/plm/base/plm_base_receive.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ int prte_plm_base_comm_start(void)
8686
PRTE_RML_PERSISTENT, prte_plm_base_daemon_failed, NULL);
8787
PRTE_RML_RECV(PRTE_NAME_WILDCARD, PRTE_RML_TAG_TOPOLOGY_REPORT,
8888
PRTE_RML_PERSISTENT, prte_plm_base_daemon_topology, NULL);
89+
PRTE_RML_RECV(PRTE_NAME_WILDCARD, PRTE_RML_TAG_STACK_TRACE,
90+
PRTE_RML_PERSISTENT, prte_plm_base_stack_trace_recv, NULL);
8991
}
9092
recv_issued = true;
9193

@@ -106,6 +108,7 @@ int prte_plm_base_comm_stop(void)
106108
PRTE_RML_CANCEL(PRTE_NAME_WILDCARD, PRTE_RML_TAG_PRTED_CALLBACK);
107109
PRTE_RML_CANCEL(PRTE_NAME_WILDCARD, PRTE_RML_TAG_REPORT_REMOTE_LAUNCH);
108110
PRTE_RML_CANCEL(PRTE_NAME_WILDCARD, PRTE_RML_TAG_TOPOLOGY_REPORT);
111+
PRTE_RML_CANCEL(PRTE_NAME_WILDCARD, PRTE_RML_TAG_STACK_TRACE);
109112
}
110113
recv_issued = false;
111114

src/mca/plm/base/plm_private.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ PRTE_EXPORT void prte_plm_base_daemon_failed(int status, pmix_proc_t *sender,
8989
PRTE_EXPORT void prte_plm_base_daemon_topology(int status, pmix_proc_t *sender,
9090
pmix_data_buffer_t *buffer, prte_rml_tag_t tag,
9191
void *cbdata);
92+
PRTE_EXPORT void prte_plm_base_stack_trace_recv(int status, pmix_proc_t *sender,
93+
pmix_data_buffer_t *buffer,
94+
prte_rml_tag_t tag, void *cbdata);
9295

9396
PRTE_EXPORT int prte_plm_base_create_jobid(prte_job_t *jdata);
9497
PRTE_EXPORT int prte_plm_base_set_hnp_name(void);

src/mca/rmaps/base/rmaps_base_map_job.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,23 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata)
317317
prte_set_attribute(&jdata->attributes, PRTE_JOB_GPU_SUPPORT, PRTE_ATTR_GLOBAL, fptr, PMIX_BOOL);
318318
}
319319
}
320+
/* if not already assigned, inherit the parent's output directives */
321+
if (!prte_get_attribute(&jdata->attributes, PRTE_JOB_TAG_OUTPUT, NULL, PMIX_BOOL)) {
322+
if (prte_get_attribute(&parent->attributes, PRTE_JOB_TAG_OUTPUT, (void **) &fptr, PMIX_BOOL)) {
323+
prte_set_attribute(&jdata->attributes, PRTE_JOB_TAG_OUTPUT, PRTE_ATTR_GLOBAL, fptr, PMIX_BOOL);
324+
}
325+
}
326+
if (!prte_get_attribute(&jdata->attributes, PRTE_JOB_TIMESTAMP_OUTPUT, NULL, PMIX_BOOL)) {
327+
if (prte_get_attribute(&parent->attributes, PRTE_JOB_TIMESTAMP_OUTPUT, (void **) &fptr, PMIX_BOOL)) {
328+
prte_set_attribute(&jdata->attributes, PRTE_JOB_TIMESTAMP_OUTPUT, PRTE_ATTR_GLOBAL, fptr, PMIX_BOOL);
329+
}
330+
}
331+
if (!prte_get_attribute(&jdata->attributes, PRTE_JOB_MERGE_STDERR_STDOUT, NULL, PMIX_BOOL)) {
332+
if (prte_get_attribute(&parent->attributes, PRTE_JOB_MERGE_STDERR_STDOUT, (void **) &fptr, PMIX_BOOL)) {
333+
prte_set_attribute(&jdata->attributes, PRTE_JOB_MERGE_STDERR_STDOUT, PRTE_ATTR_GLOBAL, fptr, PMIX_BOOL);
334+
}
335+
}
336+
320337
// copy over any env directives, but do not overwrite anything already specified
321338
inherit_env_directives(jdata, parent, nptr);
322339
} else {

test/Makefile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
1515
# Copyright (c) 2013 Mellanox Technologies, Inc. All rights reserved.
1616
# Copyright (c) 2016-2020 Intel, Inc. All rights reserved.
17-
# Copyright (c) 2021-2023 Nanook Consulting. All rights reserved.
17+
# Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
1818
# Copyright (c) 2023 Triad National Security, LLC. All rights reserved.
1919
# $COPYRIGHT$
2020
#
@@ -52,7 +52,8 @@ TESTS = \
5252
iostress \
5353
filegen \
5454
clichk \
55-
chkfs
55+
chkfs \
56+
spawn_timeout
5657

5758
all: $(TESTS)
5859

0 commit comments

Comments
 (0)