Index: testing/testsuite/testfaults.conf =================================================================== --- testing/testsuite/testfaults.conf (.../trunk) (revision 4440) +++ testing/testsuite/testfaults.conf (.../branches/panic-2) (revision 4440) @@ -98,23 +98,27 @@ cmd set_fr, on node1; cmd set_md_rd, on node1; - # attach/detach a few times (want at least one failure! - for (my $i = 0; $i < 5; ++$i) { + # attach/detach a few times (want at least 10 failures! + for (my $i = 0,$errcount=0; $errcount < 10; ++$i) { + INFO " $i. attach"; cmd '/sbin/drbdadm attach {resource}', on node1; sleep 2; if ((get state_ds, on node1) =~ /Diskless/) { - VERBOSE ("goodness: meta data read fault fired"); + $errcount = $errcount + 1; + INFO (" goodness: meta data read fault fired ($errcount)"); } else { # might need to wait for resync here... disable # faults whilst we wait cmd clr_fr, on node1; + INFO " device attached - wait for data to be synchronized..."; expected 'cs', state 'Connected', timeout 500; expected 'ds', state 'UpToDate', timeout 500; + INFO " ... and detach device"; cmd '/sbin/drbdadm detach {resource}', on node1; cmd set_fr, on node1; @@ -133,22 +137,26 @@ cmd set_md_wr, on node1; # attach/detach a few times (want at least one failure! - for (my $i = 0; $i < 5; ++$i) { + for (my $i = 0,$errcount=0; $errcount < 10; ++$i) { + INFO " $i. attach"; cmd '/sbin/drbdadm attach {resource}', on node1; sleep 2; if ((get state_ds, on node1) =~ /Diskless/) { - VERBOSE ("goodness: meta data write fault fired"); + $errcount = $errcount+1; + INFO(" goodness: meta data write fault fired ($errcount)"); } else { # might need to wait for resync here... disable # faults whilst we wait cmd clr_fr, on node1; + INFO " device attached - wait for data to be synchronized..."; expected 'cs', state 'Connected', timeout 500; expected 'ds', state 'UpToDate', timeout 500; + INFO " ... and detach device"; cmd '/sbin/drbdadm detach {resource}', on node1; cmd set_fr, on node1; @@ -164,36 +172,61 @@ expected 'cs', state 'Connected', timeout 15; expected 'ds', state 'UpToDate', timeout 15; + # simulate meta data write failures on partner node + INFO "3. Simulate remote meta data write failures during attach"; + # switch to primary + INFO " First, switch local to primary -- this will cause md write on remote"; cmd '/sbin/drbdadm primary {resource}', on node1; # Check node1 went primary... expected 'st', state 'Primary', timeout 15, on node1; - # simulate meta data write failures on partner node - INFO "3. Simulate remote meta data write failures during attach"; + cmd '/sbin/drbdadm detach {resource}', on node1; + cmd set_fr, on node2; - cmd set_md_rd, on node2; + cmd set_md_wr, on node2; - cmd '/sbin/drbdadm detach {resource}', on node1; - # attach/detach a few times (want to see at least one failure! - for (my $i = 0; $i < 5; ++$i) { + for (my $i = 0,$errcount=0; $errcount < 5; ++$i) { + INFO " $i. attach"; cmd '/sbin/drbdadm attach {resource}', on node1; sleep 2; - if ((get state_ds, on node1) =~ /Diskless/) { - VERBOSE ("goodness: meta data write fault fired"); + if ((get state_ds, on node2) =~ /Diskless/) { + $errcount = $errcount + 1; + INFO(" goodness: meta data write fault fired ($errcount)"); + + # recover everything... + cmd clr_fr, on node2; + if ((get state_ds, on node1) =~ /Diskless/) { + INFO(" Reattach on node1"); + cmd '/sbin/drbdadm attach {resource}', on node1; + } + INFO (" reattach on node2"); + cmd '/sbin/drbdadm attach {resource}', on node2; + INFO " device attached - wait for data to be synchronized..."; + expected 'cs', state 'Connected', timeout 500; + expected 'ds', state 'UpToDate', timeout 500; + + INFO " ... and detach device"; + cmd '/sbin/drbdadm detach {resource}', on node1; + + cmd set_fr, on node2; + cmd set_md_rd, on node2; + } else { # might need to wait for resync here... disable # faults whilst we wait cmd clr_fr, on node2; + INFO " device attached - wait for data to be synchronized..."; expected 'cs', state 'Connected', timeout 500; expected 'ds', state 'UpToDate', timeout 500; + INFO " ... and detach device"; cmd '/sbin/drbdadm detach {resource}', on node1; cmd set_fr, on node2; @@ -203,8 +236,9 @@ cmd clr_fr, on node2; - # make sure partner is attached... - cmd '/sbin/drbdadm attach {resource}', on node2; + # make sure disk is attached... + INFO " Finally, ensure all disks attached"; + cmd '/sbin/drbdadm attach {resource}', on node1; # everyone should be in connected cstate, uptodate dstate expected 'cs', state 'Connected', timeout 500; @@ -216,8 +250,10 @@ cmd set_dt_rd, on node1; # mount file system a few times. - for (my $i = 0; $i < 5; ++$i) { + for (my $i = 0; $i < 20; ++$i) { + INFO " $i. mount device"; cmd 'mount /dev/{device} {mountpoint}', on node1; + INFO " unmount device"; cmd 'umount /dev/{device}', on node1; sleep 2; @@ -235,12 +271,14 @@ cmd set_dt_wr, on node1; # mount file system and modify - check for errors! - for (my $i = 0; $i < 5; ++$i) { + for (my $i = 0; $i < 20; ++$i) { + INFO " $i. mount device"; cmd 'mount /dev/{device} {mountpoint}', on node1; # create some files - should get some errors... cmd 'cp -f /boot/* {mountpoint}', on node1; + INFO " unmount device"; cmd 'umount /dev/{device}', on node1; sleep 2; @@ -259,8 +297,10 @@ cmd set_dt_rd, on node2; # mount file system - check for errors! - for (my $i = 0; $i < 5; ++$i) { + for (my $i = 0; $i < 20; ++$i) { + INFO " $i. mount device"; cmd 'mount /dev/{device} {mountpoint}', on node1; + INFO " unmount device"; cmd 'umount /dev/{device}', on node1; sleep 2; @@ -278,17 +318,19 @@ expected 'ds', state 'UpToDate', timeout 15; # simulate write errors remotely - INFO "6. Simulate remote user data write failures"; + INFO "7. Simulate remote user data write failures"; cmd set_fr, on node2; cmd set_dt_wr, on node2; - for (my $i = 0; $i < 5; ++$i) { + for (my $i = 0; $i < 20; ++$i) { # mount file system - check for errors! + INFO " $i. mount device"; cmd 'mount /dev/{device} {mountpoint}', on node1; # create some files - should get some errors... cmd 'cp -f /boot/* {mountpoint}', on node1; + INFO " unmount device"; cmd 'umount /dev/{device}', on node1; sleep 2; Index: testing/testsuite/agent.pl =================================================================== --- testing/testsuite/agent.pl (.../trunk) (revision 4440) +++ testing/testsuite/agent.pl (.../branches/panic-2) (revision 4440) @@ -215,7 +215,7 @@ do { $output = `$command 2>&1`; chomp($output); - } while (defined($expected) && !($output eq $expected)); + } while (defined($expected) && !($output eq $expected) && usleep(100000)); alarm 0; }; if ($@) {