Index: drbd/drbd_main.c =================================================================== --- drbd/drbd_main.c (.../trunk) (revision 5989) +++ drbd/drbd_main.c (.../branches/panic-2) (revision 5989) @@ -101,8 +101,10 @@ #ifdef DRBD_ENABLE_FAULTS int enable_faults = 0; int fault_rate; +int fault_count; module_param(enable_faults,int,0664); // bitmap of enabled faults module_param(fault_rate,int,0664); // fault rate % value - applies to all enabled faults +module_param(fault_count,int,0664); // count of faults inserted #endif // module parameter, defined @@ -2830,30 +2832,34 @@ STATIC char * _drbd_fault_str(unsigned int type) { - static char *_faults[] = { - "Meta-data write", - "Meta-data read", - "Resync write", - "Resync read", - "Data write", - "Data read", - }; + static char *_faults[] = { + "Meta-data write", + "Meta-data read", + "Resync write", + "Resync read", + "Data write", + "Data read", + }; - return (type < DRBD_FAULT_MAX)? _faults[type] : "**Unknown**"; + return (type < DRBD_FAULT_MAX)? _faults[type] : "**Unknown**"; } unsigned int _drbd_insert_fault(unsigned int type) { - static struct fault_random_state rrs = {0,0}; + static struct fault_random_state rrs = {0,0}; - unsigned int rnd = ((_drbd_fault_random(&rrs) % 100) + 1); - unsigned int ret = (rnd <= fault_rate); + unsigned int rnd = ((_drbd_fault_random(&rrs) % 100) + 1); + unsigned int ret = (rnd <= fault_rate); - if (ret && printk_ratelimit()) - printk(KERN_ALERT "Simulating %s failure\n", _drbd_fault_str(type)); + if (ret) { + fault_count++; - return ret; + if (printk_ratelimit()) + printk(KERN_ALERT "Simulating %s failure\n", _drbd_fault_str(type)); + } + + return ret; } #endif Index: drbd/drbd_req.c =================================================================== --- drbd/drbd_req.c (.../trunk) (revision 5989) +++ drbd/drbd_req.c (.../branches/panic-2) (revision 5989) @@ -84,7 +84,9 @@ s & RQ_NET_SENT ? 's' : '-', s & RQ_NET_DONE ? 'd' : '-', s & RQ_NET_OK ? 'o' : '-', - req->epoch, req->sector, req->size, + req->epoch, + (unsigned long long)req->sector, + req->size, conns_to_name(mdev->state.conn)); } @@ -655,7 +657,9 @@ s & RQ_NET_SENT ? 's' : '-', s & RQ_NET_DONE ? 'd' : '-', s & RQ_NET_OK ? 'o' : '-', - req->epoch, req->sector, req->size, + req->epoch, + (unsigned long long)req->sector, + req->size, conns_to_name(mdev->state.conn)); } D_ASSERT(req->rq_state & RQ_NET_SENT); Index: testing/testsuite/testfaults.conf =================================================================== --- testing/testsuite/testfaults.conf (.../trunk) (revision 5989) +++ testing/testsuite/testfaults.conf (.../branches/panic-2) (revision 5989) @@ -101,11 +101,12 @@ # attach/detach a few times (want at least 10 failures! for (my $i = 0,$errcount=0; $errcount < 10; ++$i) { INFO " $i. attach"; + cmd clr_fault_ct, on node1; cmd '/sbin/drbdadm attach {resource}', on node1; sleep 2; - if ((get state_ds, on node1) =~ /Diskless/) { + if ((get fault_ct, on node1) > 0) { $errcount = $errcount + 1; INFO (" goodness: meta data read fault fired ($errcount)"); } @@ -139,11 +140,12 @@ # attach/detach a few times (want at least one failure! for (my $i = 0,$errcount=0; $errcount < 10; ++$i) { INFO " $i. attach"; + cmd clr_fault_ct, on node1; cmd '/sbin/drbdadm attach {resource}', on node1; sleep 2; - if ((get state_ds, on node1) =~ /Diskless/) { + if ((get fault_ct, on node1) > 0) { $errcount = $errcount+1; INFO(" goodness: meta data write fault fired ($errcount)"); } @@ -172,86 +174,15 @@ expected 'cs', state 'Connected', timeout 15; expected 'ds', state 'UpToDate', timeout 15; - # simulate meta data write failures on partner node - INFO "3. Simulate remote meta data write failures during attach"; - - # switch to primary - INFO " First, switch local to primary -- this will cause md write on remote"; - cmd '/sbin/drbdadm primary {resource}', on node1; - - # Check node1 went primary... - expected 'st', state 'Primary', timeout 15, on node1; - - cmd '/sbin/drbdadm detach {resource}', on node1; - - cmd set_fr, on node2; - cmd set_md_wr, on node2; - - # attach/detach a few times (want to see at least one failure! - for (my $i = 0,$errcount=0; $errcount < 5; ++$i) { - INFO " $i. attach"; - cmd '/sbin/drbdadm attach {resource}', on node1; - - sleep 2; - - if ((get state_ds, on node2) =~ /Diskless/) { - $errcount = $errcount + 1; - INFO(" goodness: meta data write fault fired ($errcount)"); - - # recover everything... - cmd clr_fr, on node2; - if ((get state_ds, on node1) =~ /Diskless/) { - INFO(" Reattach on node1"); - cmd '/sbin/drbdadm attach {resource}', on node1; - } - INFO (" reattach on node2"); - cmd '/sbin/drbdadm attach {resource}', on node2; - INFO " device attached - wait for data to be synchronized..."; - expected 'cs', state 'Connected', timeout 500; - expected 'ds', state 'UpToDate', timeout 500; - - INFO " ... and detach device"; - cmd '/sbin/drbdadm detach {resource}', on node1; - - cmd set_fr, on node2; - cmd set_md_rd, on node2; - - } - else { - # might need to wait for resync here... disable - # faults whilst we wait - cmd clr_fr, on node2; - - INFO " device attached - wait for data to be synchronized..."; - expected 'cs', state 'Connected', timeout 500; - expected 'ds', state 'UpToDate', timeout 500; - - INFO " ... and detach device"; - cmd '/sbin/drbdadm detach {resource}', on node1; - - cmd set_fr, on node2; - cmd set_md_rd, on node2; - } - } - - cmd clr_fr, on node2; - - # make sure disk is attached... - INFO " Finally, ensure all disks attached"; - cmd '/sbin/drbdadm attach {resource}', on node1; - - # everyone should be in connected cstate, uptodate dstate - expected 'cs', state 'Connected', timeout 500; - expected 'ds', state 'UpToDate', timeout 500; - # simulate read errors locally - INFO "4. Simulate local user data read failures"; + INFO "3. Simulate local user data read failures"; cmd set_fr, on node1; cmd set_dt_rd, on node1; # mount file system a few times. for (my $i = 0; $i < 20; ++$i) { INFO " $i. mount device"; + cmd clr_fault_ct, on node1; cmd 'mount /dev/{device} {mountpoint}', on node1; INFO " unmount device"; cmd 'umount /dev/{device}', on node1; @@ -266,13 +197,14 @@ cmd clr_fr, on node1; # simulate write errors locally - INFO "5. Simulate local user data write failures"; + INFO "4. Simulate local user data write failures"; cmd set_fr, on node1; cmd set_dt_wr, on node1; # mount file system and modify - check for errors! for (my $i = 0; $i < 20; ++$i) { INFO " $i. mount device"; + cmd clr_fault_ct, on node1; cmd 'mount /dev/{device} {mountpoint}', on node1; # create some files - should get some errors... @@ -290,7 +222,7 @@ cmd clr_fr, on node1; # simulate read errors remotely - INFO "6. Simulate remote user data read failures"; + INFO "5. Simulate remote user data read failures"; cmd 'drbdadm detach {resource}', on node1; cmd set_fr, on node2; @@ -299,6 +231,7 @@ # mount file system - check for errors! for (my $i = 0; $i < 20; ++$i) { INFO " $i. mount device"; + cmd clr_fault_ct, on node1; cmd 'mount /dev/{device} {mountpoint}', on node1; INFO " unmount device"; cmd 'umount /dev/{device}', on node1; @@ -318,13 +251,14 @@ expected 'ds', state 'UpToDate', timeout 15; # simulate write errors remotely - INFO "7. Simulate remote user data write failures"; + INFO "6. Simulate remote user data write failures"; cmd set_fr, on node2; cmd set_dt_wr, on node2; for (my $i = 0; $i < 20; ++$i) { # mount file system - check for errors! INFO " $i. mount device"; + cmd clr_fault_ct, on node1; cmd 'mount /dev/{device} {mountpoint}', on node1; # create some files - should get some errors... Index: testing/testsuite/testsuite.pl =================================================================== --- testing/testsuite/testsuite.pl (.../trunk) (revision 5989) +++ testing/testsuite/testsuite.pl (.../branches/panic-2) (revision 5989) @@ -92,6 +92,8 @@ $commands{'set_rs_rd'} = 'echo 8 >/sys/module/drbd/parameters/enable_faults'; $commands{'set_dt_wr'} = 'echo 16 >/sys/module/drbd/parameters/enable_faults'; $commands{'set_dt_rd'} = 'echo 32 >/sys/module/drbd/parameters/enable_faults'; +$commands{'fault_ct'} = 'cat /sys/module/drbd/parameters/fault_count'; +$commands{'clear_fault_ct'} = 'echo 0 >/sys/module/drbd/parameters/fault_count'; ###############################################################################