Skip to content

Commit

Permalink
Merge pull request #5438 from chu11/issue5436_t2602_signaling_race
Browse files Browse the repository at this point in the history
testsuite: handle job signal race in more tests
  • Loading branch information
mergify[bot] authored Sep 19, 2023
2 parents 0931188 + ec69c8c commit ec8aece
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 13 deletions.
2 changes: 1 addition & 1 deletion t/t2601-job-shell-standalone.t
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ test_description='Test flux-shell in --standalone mode'
. `dirname $0`/sharness.sh

# Run flux-shell under flux command to get correct paths
FLUX_SHELL="run_timeout 60 flux ${FLUX_BUILD_DIR}/src/shell/flux-shell"
FLUX_SHELL="run_timeout 300 flux ${FLUX_BUILD_DIR}/src/shell/flux-shell"

PMI_INFO=${FLUX_BUILD_DIR}/src/common/libpmi/test_pmi_info
KVSTEST=${FLUX_BUILD_DIR}/src/common/libpmi/test_kvstest
Expand Down
35 changes: 25 additions & 10 deletions t/t2602-job-shell.t
Original file line number Diff line number Diff line change
Expand Up @@ -184,30 +184,44 @@ test_expect_success LONGTEST 'job-shell: verify 10K line lptest output works' '
flux job attach -l $id >lptestXXL.out &&
test_cmp lptestXXL.exp lptestXXL.out
'
# N.B. sleepinf.sh and wait-event on job data to workaround
# rare job startup race. See #5210
test_expect_success 'create helper job submission script' '
cat >sleepinf.sh <<-EOT &&
#!/bin/sh
echo "job started"
sleep inf
EOT
chmod +x sleepinf.sh
'
test_expect_success 'job-shell: test shell kill event handling' '
id=$(flux submit -n4 -N4 sleep 300) &&
flux job wait-event $id start &&
id=$(flux submit -n4 -N4 ./sleepinf.sh) &&
flux job wait-event -p guest.exec.eventlog $id shell.init &&
flux job wait-event -p guest.output $id data &&
flux job kill $id &&
flux job wait-event $id finish >kill1.finish.out &&
grep status=$((15+128<<8)) kill1.finish.out
'
test_expect_success 'job-shell: test shell kill event handling: SIGKILL' '
id=$(flux submit -n4 -N4 sleep 300) &&
flux job wait-event $id start &&
id=$(flux submit -n4 -N4 ./sleepinf.sh) &&
flux job wait-event -p guest.exec.eventlog $id shell.init &&
flux job wait-event -p guest.output $id data &&
flux job kill -s SIGKILL $id &&
flux job wait-event $id finish >kill2.finish.out &&
grep status=$((9+128<<8)) kill2.finish.out
'
test_expect_success 'job-shell: test shell kill event handling: numeric signal' '
id=$(flux submit -n4 -N4 sleep 300) &&
flux job wait-event $id start &&
id=$(flux submit -n4 -N4 ./sleepinf.sh) &&
flux job wait-event -p guest.exec.eventlog $id shell.init &&
flux job wait-event -p guest.output $id data &&
flux job kill -s 2 $id &&
flux job wait-event $id finish >kill3.finish.out &&
grep status=$((2+128<<8)) kill3.finish.out
'
test_expect_success 'job-shell: mangled shell kill event logged' '
id=$(flux submit -n4 -N4 sleep 300 | flux job id) &&
flux job wait-event $id start &&
id=$(flux submit -n4 -N4 ./sleepinf.sh | flux job id) &&
flux job wait-event -p guest.exec.eventlog $id shell.init &&
flux job wait-event -p guest.output $id data &&
flux event pub shell-${id}.kill "{}" &&
flux job kill ${id} &&
flux job wait-event -vt 1 $id finish >kill4.finish.out &&
Expand All @@ -216,8 +230,9 @@ test_expect_success 'job-shell: mangled shell kill event logged' '
grep "ignoring malformed event" kill4.log
'
test_expect_success 'job-shell: shell kill event: kill(2) failure logged' '
id=$(flux submit -n4 -N4 sleep 300 | flux job id) &&
flux job wait-event $id start &&
id=$(flux submit -n4 -N4 ./sleepinf.sh | flux job id) &&
flux job wait-event -p guest.exec.eventlog $id shell.init &&
flux job wait-event -p guest.output $id data &&
flux event pub shell-${id}.kill "{\"signum\":199}" &&
flux job kill ${id} &&
flux job wait-event $id finish >kill5.finish.out &&
Expand Down
4 changes: 2 additions & 2 deletions t/t2611-debug-emulate.t
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ stop_tasks_test() {

test_under_flux 2

TIMEOUT=10
TIMEOUT=100

parse_jobid() {
outfile=$1 &&
Expand Down Expand Up @@ -103,7 +103,7 @@ test_expect_success 'debugger: job attach --debug must not continue target' '
flux_job_attach ${jobid} jobid.out3 &&
tv_jobid=$(parse_totalview_jobid jobid.out3) &&
test ${tv_jobid} = "${jobid}" &&
test_must_fail flux job wait-event -vt ${TIMEOUT} ${jobid} finish &&
test_must_fail flux job wait-event -vt 2 ${jobid} finish &&
flux cancel ${jobid} &&
flux job wait-event -vt ${TIMEOUT} ${jobid} finish
'
Expand Down

0 comments on commit ec8aece

Please sign in to comment.