diff options
Diffstat (limited to 'tools/testing/selftests/rcutorture/bin/kvm-remote.sh')
| -rwxr-xr-x | tools/testing/selftests/rcutorture/bin/kvm-remote.sh | 37 |
1 files changed, 30 insertions, 7 deletions
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh index a2328163eba1..48a8052d5dae 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh @@ -137,14 +137,20 @@ chmod +x $T/bin/kvm-remote-*.sh # Check first to avoid the need for cleanup for system-name typos for i in $systems do - ncpus="`ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN 2> /dev/null`" + ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN > $T/ssh.stdout 2> $T/ssh.stderr ret=$? if test "$ret" -ne 0 then - echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log" + echo "System $i unreachable ($ret), giving up." | tee -a "$oldrun/remote-log" + echo ' --- ssh stdout: vvv' | tee -a "$oldrun/remote-log" + cat $T/ssh.stdout | tee -a "$oldrun/remote-log" + echo ' --- ssh stdout: ^^^' | tee -a "$oldrun/remote-log" + echo ' --- ssh stderr: vvv' | tee -a "$oldrun/remote-log" + cat $T/ssh.stderr | tee -a "$oldrun/remote-log" + echo ' --- ssh stderr: ^^^' | tee -a "$oldrun/remote-log" exit 4 fi - echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log" + echo $i: `cat $T/ssh.stdout` CPUs " " `date` | tee -a "$oldrun/remote-log" done # Download and expand the tarball on all systems. @@ -175,10 +181,11 @@ done # Function to check for presence of a file on the specified system. # Complain if the system cannot be reached, and retry after a wait. -# Currently just waits forever if a machine disappears. +# Currently just waits 15 minutes if a machine disappears. # # Usage: checkremotefile system pathname checkremotefile () { + local nsshfails=0 local ret local sleeptime=60 @@ -189,6 +196,11 @@ checkremotefile () { if test "$ret" -eq 255 then echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log" + nsshfails=$((nsshfails+1)) + if ((nsshfails > 15)) + then + return 255 + fi elif test "$ret" -eq 0 then return 0 @@ -262,12 +274,23 @@ echo All batches started. `date` | tee -a "$oldrun/remote-log" for i in $systems do echo " ---" Waiting for $i `date` | tee -a "$oldrun/remote-log" - while checkremotefile "$i" "$resdir/$ds/remote.run" + while : do + checkremotefile "$i" "$resdir/$ds/remote.run" + ret=$? + if test "$ret" -eq 1 + then + echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log" + ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) + break; + fi + if test "$ret" -eq 255 + then + echo System $i persistent ssh failure, lost results `date` | tee -a "$oldrun/remote-log" + break; + fi sleep 30 done - echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log" - ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) done ( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log" |
