Index: drbd/drbd_receiver.c =================================================================== --- drbd/drbd_receiver.c (.../trunk) (revision 4048) +++ drbd/drbd_receiver.c (.../branches/drbd-panic) (revision 4048) @@ -1000,7 +1000,7 @@ set_bit(SYNC_STARTED,&mdev->flags); } else { ok = drbd_send_ack(mdev,NegAck,e); - ok&= drbd_io_error(mdev); + ok&= drbd_io_error(mdev, FALSE); } dec_unacked(mdev); @@ -1027,7 +1027,7 @@ list_add(&e->w.list,&mdev->sync_ee); spin_unlock_irq(&mdev->req_lock); - drbd_generic_make_request(WRITE,e->private_bio); + drbd_generic_make_request(WRITE,DRBD_FAULT_RS_WR,e->private_bio); /* accounting done in endio */ maybe_kick_lo(mdev); @@ -1154,14 +1154,14 @@ * may not list_del() the ee before this callback did run... * maybe even move the list_del(e) in here... */ ok = drbd_send_ack(mdev,NegAck,e); - ok&= drbd_io_error(mdev); + ok&= drbd_io_error(mdev, FALSE); /* we expect it to be marked out of sync anyways... * maybe assert this? */ } dec_unacked(mdev); return ok; } else if(unlikely(!drbd_bio_uptodate(e->private_bio))) { - ok = drbd_io_error(mdev); + ok = drbd_io_error(mdev, FALSE); } // warning LGE "FIXME code missing" @@ -1428,7 +1428,7 @@ } /* FIXME drbd_al_begin_io in case we have two primaries... */ - drbd_generic_make_request(WRITE,e->private_bio); + drbd_generic_make_request(WRITE,DRBD_FAULT_DT_WR,e->private_bio); /* accounting done in endio */ maybe_kick_lo(mdev); @@ -1449,6 +1449,7 @@ const sector_t capacity = drbd_get_capacity(mdev->this_bdev); struct Tl_epoch_entry *e; int size; + unsigned int fault_type; Drbd_BlockRequest_Packet *p = (Drbd_BlockRequest_Packet*)h; ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; @@ -1493,9 +1494,11 @@ switch (h->command) { case DataRequest: e->w.cb = w_e_end_data_req; + fault_type = DRBD_FAULT_DT_RD; break; case RSDataRequest: e->w.cb = w_e_end_rsdata_req; + fault_type = DRBD_FAULT_RS_RD; /* Eventually this should become asynchrously. Currently it * blocks the whole receiver just to delay the reading of a * resync data block. @@ -1511,11 +1514,12 @@ } break; default:; /* avoid compiler warning */ + fault_type = DRBD_FAULT_MAX; } inc_unacked(mdev); /* FIXME actually, it could be a READA originating from the peer ... */ - drbd_generic_make_request(READ,e->private_bio); + drbd_generic_make_request(READ,fault_type,e->private_bio); maybe_kick_lo(mdev); return TRUE; @@ -1745,15 +1749,22 @@ //WARN("uuid_compare()=%d\n",hg); if (hg == 100) { - if ( mdev->state.role==Secondary && peer_role==Secondary ) { + int pcount = (mdev->state.role==Primary) + (peer_role==Primary); + + switch (pcount) { + case 0: hg = drbd_asb_recover_0p(mdev); - } else if (mdev->state.role==Primary && peer_role==Primary) { + break; + case 1: + hg = drbd_asb_recover_1p(mdev); + break; + case 2: hg = drbd_asb_recover_2p(mdev); - } else { - hg = drbd_asb_recover_1p(mdev); + break; } if ( abs(hg) < 100 ) { - WARN("Split-Brain detected, automatically solved.\n"); + WARN("Split-Brain detected, %d primaries, automatically solved. Sync from %s node\n", + pcount, (hg < 0) ? "peer":"this"); } } @@ -1766,7 +1777,8 @@ } if ( abs(hg) < 100 ) { - WARN("Split-Brain detected, manually solved.\n"); + WARN("Split-Brain detected, manually solved. Sync from %s node\n", + (hg < 0) ? "peer":"this"); } } @@ -1786,6 +1798,8 @@ if (hg == -100) { ALERT("Split-Brain detected, dropping connection!\n"); + drbd_uuid_dump(mdev,"self",mdev->bc->md.uuid); + drbd_uuid_dump(mdev,"peer",mdev->p_uuid); drbd_force_state(mdev,NS(conn,StandAlone)); drbd_thread_stop_nowait(&mdev->receiver); return conn_mask; @@ -2407,8 +2421,6 @@ else handler = NULL; - dump_packet(mdev,mdev->data.socket,2,&mdev->data.rbuf, __FILE__, __LINE__); - if (unlikely(!handler)) { ERR("unknown packet type %d, l: %d!\n", header->command, header->length); @@ -2419,6 +2431,8 @@ cmdname(header->command), header->length); break; } + + dump_packet(mdev,mdev->data.socket,2,&mdev->data.rbuf, __FILE__, __LINE__); } } @@ -2887,17 +2901,15 @@ return FALSE; } - /* FIXME what for ?? list_del(&req->w.list); */ + ERR("Got NegDReply; Sector %llx, len %x; Fail original request.\n", + (unsigned long long)sector,be32_to_cpu(p->blksize)); + _req_mod(req, neg_acked); spin_unlock_irq(&mdev->req_lock); // warning LGE "ugly and wrong" drbd_khelper(mdev,"pri-on-incon-degr"); - drbd_panic("Got NegDReply. WE ARE LOST. We lost our up-to-date disk.\n"); - // THINK do we have other options, but panic? - // what about bio_endio, in case we don't panic ?? - return TRUE; } Index: drbd/drbd_nl.c =================================================================== --- drbd/drbd_nl.c (.../trunk) (revision 4048) +++ drbd/drbd_nl.c (.../branches/drbd-panic) (revision 4048) @@ -738,7 +738,7 @@ goto release_bdev3_fail; } - // Since ware are diskless, fix the AL first... + // Since we are are diskless, fix the AL first... if (drbd_check_al_size(mdev)) { retcode = KMallocFailed; goto release_bdev3_fail; @@ -877,8 +877,10 @@ return 0; release_bdev3_fail: + nbc = NULL; /* will be freed by state change below */ drbd_force_state(mdev,NS(disk,Diskless)); drbd_md_sync(mdev); + goto fail; release_bdev2_fail: bd_release(nbc->md_bdev); release_bdev_fail: Index: drbd/drbd_actlog.c =================================================================== --- drbd/drbd_actlog.c (.../trunk) (revision 4048) +++ drbd/drbd_actlog.c (.../branches/drbd-panic) (revision 4048) @@ -49,12 +49,18 @@ bio->bi_private = &event; bio->bi_end_io = drbd_md_io_complete; + if (FAULT_ACTIVE((rw & WRITE)? DRBD_FAULT_MD_WR:DRBD_FAULT_MD_RD)) { + bio->bi_rw |= rw; + bio_endio(bio,bio->bi_size,-EIO); + } + else { #ifdef BIO_RW_SYNC - submit_bio(rw | (1 << BIO_RW_SYNC), bio); + submit_bio(rw | (1 << BIO_RW_SYNC), bio); #else - submit_bio(rw, bio); - drbd_blk_run_queue(bdev_get_queue(bdev->md_bdev)); + submit_bio(rw, bio); + drbd_blk_run_queue(bdev_get_queue(bdev->md_bdev)); #endif + } wait_for_completion(&event); ok = test_bit(BIO_UPTODATE, &bio->bi_flags); @@ -108,7 +114,11 @@ ok = _drbd_md_sync_page_io(bdev,iop, sector,READ,hardsect); - if (unlikely(!ok)) return 0; + if (unlikely(!ok)) { + ERR("drbd_md_sync_page_io(,%llu,READ [hardsect!=512]) failed!\n", + (unsigned long long)sector); + return 0; + } memcpy(hp + offset*MD_HARDSECT , p, MD_HARDSECT); } @@ -130,6 +140,7 @@ if (unlikely(!ok)) { ERR("drbd_md_sync_page_io(,%llu,%s) failed!\n", (unsigned long long)sector,rw ? "WRITE" : "READ"); + return 0; } if( hardsect != MD_HARDSECT && rw == READ ) { @@ -326,8 +337,8 @@ sector = mdev->bc->md.md_offset + mdev->bc->md.al_offset + mdev->al_tr_pos; if(!drbd_md_sync_page_io(mdev,mdev->bc,sector,WRITE)) { - drbd_chk_io_error(mdev, 1); - drbd_io_error(mdev); + drbd_chk_io_error(mdev, 1, TRUE); + drbd_io_error(mdev, TRUE); } if( ++mdev->al_tr_pos > div_ceil(mdev->act_log->nr_elements,AL_EXTENTS_PT) ) { @@ -361,6 +372,8 @@ sector = bdev->md.md_offset + bdev->md.al_offset + index; if(!drbd_md_sync_page_io(mdev,bdev,sector,READ)) { + // Dont process error normally as this is done before + // disk is atached! return -1; } @@ -500,22 +513,20 @@ wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); - i=inc_local_if_state(mdev,Attaching); - D_ASSERT( i ); // Assertions should not have side effects. - // I do not want to have D_ASSERT( inc_md_only(mdev,Attaching) ); + if (inc_local_if_state(mdev,Attaching)) { + for(i=0;iact_log->nr_elements;i++) { + enr = lc_entry(mdev->act_log,i)->lc_number; + if(enr == LC_FREE) continue; + /* TODO encapsulate and optimize within drbd_bitmap + * currently, if we have al-extents 16..19 active, + * sector 4 will be written four times! */ + drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT ); + } - for(i=0;iact_log->nr_elements;i++) { - enr = lc_entry(mdev->act_log,i)->lc_number; - if(enr == LC_FREE) continue; - /* TODO encapsulate and optimize within drbd_bitmap - * currently, if we have al-extents 16..19 active, - * sector 4 will be written four times! */ - drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT ); + lc_unlock(mdev->act_log); + wake_up(&mdev->al_wait); + dec_local(mdev); } - - lc_unlock(mdev->act_log); - wake_up(&mdev->al_wait); - dec_local(mdev); } /** Index: drbd/drbd_worker.c =================================================================== --- drbd/drbd_worker.c (.../trunk) (revision 4048) +++ drbd/drbd_worker.c (.../branches/drbd-panic) (revision 4048) @@ -87,7 +87,7 @@ if(list_empty(&mdev->read_ee)) wake_up(&mdev->ee_wait); spin_unlock_irqrestore(&mdev->req_lock,flags); - drbd_chk_io_error(mdev,error); + drbd_chk_io_error(mdev,error,FALSE); drbd_queue_work(&mdev->data.work,&e->w); dec_local(mdev); return 0; @@ -129,7 +129,7 @@ ? list_empty(&mdev->sync_ee) : list_empty(&mdev->active_ee); - if (error) __drbd_chk_io_error(mdev); + if (error) __drbd_chk_io_error(mdev,FALSE); spin_unlock_irqrestore(&mdev->req_lock,flags); if (do_wake) wake_up(&mdev->ee_wait); @@ -182,7 +182,7 @@ if(unlikely(cancel)) return 1; - ok = drbd_io_error(mdev); + ok = drbd_io_error(mdev, FALSE); if(unlikely(!ok)) ERR("Sending in w_io_error() failed\n"); return ok; } @@ -206,7 +206,7 @@ /* FIXME this is ugly. we should not detach for read io-error, * but try to WRITE the DataReply to the failed location, * to give the disk the chance to relocate that block */ - drbd_io_error(mdev); /* tries to schedule a detach and notifies peer */ + drbd_io_error(mdev,FALSE); /* tries to schedule a detach and notifies peer */ return w_send_read_req(mdev,w,0); } @@ -438,7 +438,7 @@ /* FIXME we should not detach for read io-errors, in particular * not now: when the peer asked us for our data, we are likely * the only remaining disk... */ - drbd_io_error(mdev); + drbd_io_error(mdev,FALSE); } dec_unacked(mdev); @@ -484,8 +484,8 @@ } else { ok=drbd_send_ack(mdev,NegRSDReply,e); if (DRBD_ratelimit(5*HZ,5)) - ERR("Sending NegDReply. I guess it gets messy.\n"); - drbd_io_error(mdev); + ERR("Sending NegRSDReply. I guess it gets messy.\n"); + drbd_io_error(mdev, FALSE); } dec_unacked(mdev); Index: drbd/drbd_compat_wrappers.h =================================================================== --- drbd/drbd_compat_wrappers.h (.../trunk) (revision 4048) +++ drbd/drbd_compat_wrappers.h (.../branches/drbd-panic) (revision 4048) @@ -129,7 +129,7 @@ /* * used to submit our private bio */ -static inline void drbd_generic_make_request(int rw, struct bio *bio) +static inline void drbd_generic_make_request(int rw, int fault_type, struct bio *bio) { bio->bi_rw = rw; // on the receiver side, e->..rw was not yet defined. @@ -140,7 +140,10 @@ return; } - generic_make_request(bio); + if (FAULT_ACTIVE(fault_type)) + bio_endio(bio,bio->bi_size,-EIO); + else + generic_make_request(bio); } static inline void drbd_plug_device(drbd_dev *mdev) Index: drbd/drbd_bitmap.c =================================================================== --- drbd/drbd_bitmap.c (.../trunk) (revision 4048) +++ drbd/drbd_bitmap.c (.../branches/drbd-panic) (revision 4048) @@ -653,7 +653,13 @@ bio_add_page(bio, page, len, 0); bio->bi_private = b; bio->bi_end_io = drbd_bm_async_io_complete; - submit_bio(rw, bio); + + if (FAULT_ACTIVE((rw&WRITE)?DRBD_FAULT_MD_WR:DRBD_FAULT_MD_RD)) { + bio->bi_rw |= rw; + bio_endio(bio,bio->bi_size,-EIO); + } + else + submit_bio(rw, bio); } /* read one sector of the on disk bitmap into memory. * on disk bitmap is little endian. @@ -684,8 +690,8 @@ ERR( "IO ERROR reading bitmap sector %lu " "(meta-disk sector %llu)\n", enr, (unsigned long long)on_disk_sector ); - drbd_chk_io_error(mdev, 1); - drbd_io_error(mdev); + drbd_chk_io_error(mdev, 1, TRUE); + drbd_io_error(mdev, TRUE); for (i = 0; i < AL_EXT_PER_BM_SECT; i++) drbd_bm_ALe_set_all(mdev,enr*AL_EXT_PER_BM_SECT+i); } @@ -755,6 +761,8 @@ now = jiffies; atomic_set(&b->bm_async_io, num_pages); + __clear_bit(BM_MD_IO_ERROR,&b->bm_flags); + for (i = 0; i < num_pages; i++) { /* let the layers below us try to merge these bios... */ drbd_bm_page_io_async(mdev,b,i,rw); @@ -770,8 +778,8 @@ * detach? */ ALERT("we had at least one MD IO ERROR during bitmap IO\n"); - drbd_chk_io_error(mdev, 1); - drbd_io_error(mdev); + drbd_chk_io_error(mdev, 1, TRUE); + drbd_io_error(mdev, TRUE); } now = jiffies; @@ -836,8 +844,8 @@ ERR( "IO ERROR writing bitmap sector %lu " "(meta-disk sector %lu)\n", enr, (unsigned long)on_disk_sector ); - drbd_chk_io_error(mdev, 1); - drbd_io_error(mdev); + drbd_chk_io_error(mdev, 1, TRUE); + drbd_io_error(mdev, TRUE); for (i = 0; i < AL_EXT_PER_BM_SECT; i++) drbd_bm_ALe_set_all(mdev,enr*AL_EXT_PER_BM_SECT+i); } Index: drbd/linux/drbd_config.h =================================================================== --- drbd/linux/drbd_config.h (.../trunk) (revision 4048) +++ drbd/linux/drbd_config.h (.../branches/drbd-panic) (revision 4048) @@ -48,4 +48,7 @@ // for troubles. // #define DRBD_DISABLE_SENDPAGE +// Enable fault insertion code +//#define DRBD_ENABLE_FAULTS + #endif Index: drbd/drbd_main.c =================================================================== --- drbd/drbd_main.c (.../trunk) (revision 4048) +++ drbd/drbd_main.c (.../branches/drbd-panic) (revision 4048) @@ -50,6 +50,7 @@ #include #include #include +#include #define __KERNEL_SYSCALLS__ #include @@ -106,6 +107,13 @@ module_param(minor_count, int,0); module_param(disable_bd_claim,bool,0); +#ifdef DRBD_ENABLE_FAULTS +int enable_faults = 0; +int fault_rate; +module_param(enable_faults,int,0664); // bitmap of enabled faults +module_param(fault_rate,int,0664); // fault rate % value - applies to all enabled faults +#endif + // module parameter, defined int major_nr = LANANA_DRBD_MAJOR; int minor_count = 32; @@ -340,17 +348,22 @@ * unlikely(!drbd_bio_uptodate(e->bio)) case from kernel thread context. * See also drbd_chk_io_error * - * NOTE: we set ourselves DISKLESS here. - * But we try to write the "need full sync bit" here anyways. This is to make sure - * that you get a resynchronisation of the full device the next time you - * connect. + * NOTE: we set ourselves FAILED here if on_io_error is Detach or Panic OR + * if the forcedetach flag is set. This flag is set when failures + * occur writing the meta data portion of the disk as they are + * not recoverable. We also try to write the "need full sync bit" here + * anyways. This is to make sure that you get a resynchronisation of + * the full device the next time you connect. */ -int drbd_io_error(drbd_dev* mdev) +int drbd_io_error(drbd_dev* mdev, int forcedetach) { unsigned long flags; int send,ok=1; - if(mdev->bc->dc.on_io_error != Panic && mdev->bc->dc.on_io_error != Detach) return 1; + if(!forcedetach && + mdev->bc->dc.on_io_error != Panic && + mdev->bc->dc.on_io_error != Detach) + return 1; spin_lock_irqsave(&mdev->req_lock,flags); if( (send = (mdev->state.disk == Failed)) ) { @@ -365,9 +378,18 @@ if (ok) WARN("Notified peer that my disk is broken.\n"); else ERR("Sending state in drbd_io_error() failed\n"); +#if 0 +// warning SPG +// This code seems wrong -- we only get here if we are set to +// detach in which case we have no local disk, so there's no +// point asserting that a full sync is needed. +// Flushing the meta data is probably also wrong -- we want +// this node to appear out of date so we should deliberately +// NOT update the meta data with the latest epoch info! D_ASSERT(drbd_md_test_flag(mdev->bc,MDF_FullSync)); D_ASSERT(!drbd_md_test_flag(mdev->bc,MDF_Consistent)); drbd_md_sync(mdev); +#endif /* Releasing the backing device is done in after_state_ch() */ @@ -750,13 +772,6 @@ D_ASSERT(i); } - if ( ns.role == Primary && ns.conn < Connected && - ns.disk < Consistent ) { -// warning LGE "ugly and wrong" -// warning LGE "FIXME code missing" - drbd_panic("No access to good data anymore.\n"); - } - if( flags & ScheduleAfter ) { struct after_state_chg_work* ascw; @@ -853,23 +868,26 @@ kfree(mdev->p_uuid); mdev->p_uuid = NULL; } - if (ns.role == Primary && mdev->bc->md.uuid[Bitmap] == 0 ) { - /* Only do it if we have not yet done it... */ - INFO("Creating new current UUID\n"); - drbd_uuid_new_current(mdev); + if (inc_local(mdev)) { + if (ns.role == Primary && mdev->bc->md.uuid[Bitmap] == 0 ) { + /* Only do it if we have not yet done it... */ + INFO("Creating new current UUID\n"); + drbd_uuid_new_current(mdev); + } + if (ns.peer == Primary ) { + /* Note: The condition ns.peer == Primary implies + that we are connected. Otherwise it would + be ns.peer == Unknown. */ + /* Our peer lost its disk. + Not rotation into BitMap-UUID! A FullSync is + required after a primary detached from it disk! */ + u64 uuid; + INFO("Creating new current UUID [no BitMap]\n"); + get_random_bytes(&uuid, sizeof(u64)); + drbd_uuid_set(mdev, Current, uuid); + } + dec_local(mdev); } - if (ns.peer == Primary ) { - /* Note: The condition ns.peer == Primary implies - that we are connected. Otherwise it would - be ns.peer == Unknown. */ - /* Our peer lost its disk. - Not rotation into BitMap-UUID! A FullSync is - required after a primary detached from it disk! */ - u64 uuid; - INFO("Creating new current UUID [no BitMap]\n"); - get_random_bytes(&uuid, sizeof(u64)); - drbd_uuid_set(mdev, Current, uuid); - } } /* We want to pause resync, tell peer. */ @@ -1013,7 +1031,7 @@ spin_lock(&thi->t_lock); - /* INFO("%s [%d]: %s %d -> Running\n", + /* INFO("drbd_thread_start: %s [%d]: %s %d -> Running\n", current->comm, current->pid, thi == &mdev->receiver ? "receiver" : thi == &mdev->asender ? "asender" : @@ -1046,7 +1064,7 @@ spin_lock(&thi->t_lock); - /* INFO("%s [%d]: %s %d -> %d; %d\n", + /* INFO("drbd_thread_stop: %s [%d]: %s %d -> %d; %d\n", current->comm, current->pid, thi->task ? thi->task->comm : "NULL", thi->t_state, ns, wait); */ @@ -1073,7 +1091,6 @@ force_sig(DRBD_SIGKILL,thi->task); else D_ASSERT(!wait); - } spin_unlock(&thi->t_lock); @@ -1313,16 +1330,15 @@ drbd_bm_set_all(mdev); drbd_bm_write(mdev); if (unlikely(mdev->state.disk <= Failed )) { - /* write_bm did fail! panic. - * FIXME can we do something better than panic? - */ -// warning LGE "ugly and wrong" - drbd_panic("Failed to write bitmap to disk\n!"); - ok = FALSE; - goto out; + /* write_bm did fail! Leave full sync flag set in Meta Data + * but otherwise process as per normal - need to tell other + * side that a full resync is required! */ + ERR("Failed to write bitmap to disk!\n"); } - drbd_md_clear_flag(mdev,MDF_FullSync); - drbd_md_sync(mdev); + else { + drbd_md_clear_flag(mdev,MDF_FullSync); + drbd_md_sync(mdev); + } } /* @@ -1340,7 +1356,6 @@ bm_i += num_words; } while (ok && want); - out: vfree(p); return ok; } @@ -2503,17 +2518,11 @@ if (drbd_md_sync_page_io(mdev,mdev->bc,sector,WRITE)) { clear_bit(MD_DIRTY,&mdev->flags); } else { - if (mdev->state.disk <= Failed) { - /* this was a try anyways ... */ - ERR("meta data update failed!\n"); - } else { - /* If we cannot write our meta data, - * but we are supposed to be able to, - * tough! - */ -// warning LGE "ugly and wrong" - drbd_panic("meta data update failed!\n"); - } + /* this was a try anyways ... */ + ERR("meta data update failed!\n"); + + drbd_chk_io_error(mdev, 1, TRUE); + drbd_io_error(mdev, TRUE); } // Update mdev->bc->md.la_size_sect, since we updated it on metadata. @@ -2541,6 +2550,8 @@ buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); if ( ! drbd_md_sync_page_io(mdev,bdev,bdev->md.md_offset,READ) ) { + /* NOTE: cant do normal error processing here as this is + called BEFORE disk is attached */ ERR("Error while reading metadata.\n"); rv = MDIOError; goto err; @@ -2705,6 +2716,65 @@ return 1; } +#ifdef DRBD_ENABLE_FAULTS +// Fault insertion support including random number generator shamelessly +// stolen from kernel/rcutorture.c +struct fault_random_state { + unsigned long state; + unsigned long count; +}; + +#define FAULT_RANDOM_MULT 39916801 /* prime */ +#define FAULT_RANDOM_ADD 479001701 /* prime */ +#define FAULT_RANDOM_REFRESH 10000 + +/* + * Crude but fast random-number generator. Uses a linear congruential + * generator, with occasional help from get_random_bytes(). + */ +static unsigned long +_drbd_fault_random(struct fault_random_state *rsp) +{ + long refresh; + + if (--rsp->count < 0) { + get_random_bytes(&refresh, sizeof(refresh)); + rsp->state += refresh; + rsp->count = FAULT_RANDOM_REFRESH; + } + rsp->state = rsp->state * FAULT_RANDOM_MULT + FAULT_RANDOM_ADD; + return swahw32(rsp->state); +} + +static char * +_drbd_fault_str(unsigned int type) { + static char *_faults[] = { + "Meta-data write", + "Meta-data read", + "Resync write", + "Resync read", + "Data write", + "Data read", + }; + + return (type < DRBD_FAULT_MAX)? _faults[type] : "**Unknown**"; +} + +unsigned int +_drbd_insert_fault(unsigned int type) +{ + static struct fault_random_state rrs = {0,0}; + + unsigned int rnd = ((_drbd_fault_random(&rrs) % 100) + 1); + unsigned int ret = (rnd <= fault_rate); + + if (ret) + printk(KERN_ALERT "Simulating %s failure\n", _drbd_fault_str(type)); + + return ret; +} +#endif + #ifdef DUMP_EACH_PACKET #define PSM(A) \ do { \ Index: drbd/drbd_req.c =================================================================== --- drbd/drbd_req.c (.../trunk) (revision 4048) +++ drbd/drbd_req.c (.../branches/drbd-panic) (revision 4048) @@ -116,7 +116,7 @@ /* * figure out whether to report success or failure. * - * report success when at least one of the oprations suceeded. + * report success when at least one of the operations suceeded. * or, to put the other way, * only report failure, when both operations failed. * @@ -302,7 +302,7 @@ " [DISCARD L] new: %llu +%d; pending: %llu +%d\n", current->comm, current->pid, (unsigned long long)sector, size, - e->sector, e->size); + (unsigned long long)e->sector, e->size); return 1; } } @@ -399,10 +399,10 @@ req->private_bio = NULL; dec_local(mdev); ALERT("Local WRITE failed sec=%llu size=%u\n", - req->sector, req->size); + (unsigned long long)req->sector, req->size); /* and now: check how to handle local io error. * FIXME see comment below in read_completed_with_error */ - __drbd_chk_io_error(mdev); + __drbd_chk_io_error(mdev,FALSE); _req_may_be_done(req); break; @@ -419,7 +419,7 @@ break; /* else */ ALERT("Local READ failed sec=%llu size=%u\n", - req->sector, req->size); + (unsigned long long)req->sector, req->size); /* _req_mod(req,to_be_send); oops, recursion in static inline */ D_ASSERT(!(req->rq_state & RQ_NET_MASK)); req->rq_state |= RQ_NET_PENDING; @@ -434,7 +434,7 @@ * private bio then, and round the offset and size so * we get back enough data to be able to clear the bits again. */ - __drbd_chk_io_error(mdev); + __drbd_chk_io_error(mdev,FALSE); /* fall through: _req_mod(req,queue_for_net_read); */ case queue_for_net_read: @@ -945,7 +945,11 @@ if (local) { req->private_bio->bi_bdev = mdev->bc->backing_bdev; - generic_make_request(req->private_bio); + + if (FAULT_ACTIVE(rw==WRITE? DRBD_FAULT_DT_WR : DRBD_FAULT_DT_RD)) + bio_endio(req->private_bio, req->private_bio->bi_size, -EIO); + else + generic_make_request(req->private_bio); } return 0; @@ -993,7 +997,6 @@ ERR("Sorry, I have no access to good data anymore.\n"); /* * FIXME suspend, loop waiting on cstate wait? - * panic? */ return 1; } Index: drbd/drbd_int.h =================================================================== --- drbd/drbd_int.h (.../trunk) (revision 4048) +++ drbd/drbd_int.h (.../branches/drbd-panic) (revision 4048) @@ -43,6 +43,11 @@ extern int major_nr; extern int use_nbd_major; +#ifdef DRBD_ENABLE_FAULTS +extern int enable_faults; +extern int fault_rate; +#endif + #include #ifdef DRBD_MAJOR # warning "FIXME. DRBD_MAJOR is now officially defined in major.h" @@ -187,6 +192,27 @@ _b; \ })) +// Defines to control fault insertion +enum { + DRBD_FAULT_MD_WR = 0, + DRBD_FAULT_MD_RD, + DRBD_FAULT_RS_WR, + DRBD_FAULT_RS_RD, + DRBD_FAULT_DT_WR, + DRBD_FAULT_DT_RD, + + DRBD_FAULT_MAX, +}; + +#ifdef DRBD_ENABLE_FAULTS +#define FAULT_ACTIVE(_t) \ + (fault_rate && (enable_faults & (1<<(_t))) && _drbd_insert_fault(_t)) + +extern unsigned int _drbd_insert_fault(unsigned int type); +#else +#define FAULT_ACTIVE(_t) (0) +#endif + #include // integer division, round _UP_ to the next integer #define div_ceil(A,B) ( (A)/(B) + ((A)%(B) ? 1 : 0) ) @@ -961,7 +987,7 @@ extern int drbd_send_discard(drbd_dev *mdev, drbd_request_t *req); extern int drbd_send_sr_reply(drbd_dev *mdev, int retcode); extern void drbd_free_bc(struct drbd_backing_dev* bc); -extern int drbd_io_error(drbd_dev* mdev); +extern int drbd_io_error(drbd_dev* mdev, int forcedetach); extern void drbd_mdev_cleanup(drbd_dev *mdev); // drbd_meta-data.c (still in drbd_main.c) @@ -1309,13 +1335,22 @@ * drbd_chk_io_error: Handles the on_io_error setting, should be called from * all io completion handlers. See also drbd_io_error(). */ -static inline void __drbd_chk_io_error(drbd_dev* mdev) +static inline void __drbd_chk_io_error(drbd_dev* mdev, int forcedetach) { /* FIXME cleanup the messages here */ switch(mdev->bc->dc.on_io_error) { - case PassOn: /* FIXME should the better be named "Ignore"? */ - ERR("Ignoring local IO error!\n"); + case PassOn: /* FIXME would this be better named "Ignore"? */ + if (!forcedetach) { + ERR("Local IO failed. Passing error on...\n"); break; + } + /* NOTE fall through to detach case if forcedetach set */ + case Detach: + if (_drbd_set_state(mdev,_NS(disk,Failed),ChgStateHard) + == SS_Success) { + ERR("Local IO failed. Detaching...\n"); + } + break; case Panic: _drbd_set_state(mdev,_NS(disk,Failed),ChgStateHard); /* FIXME this is very ugly anyways. @@ -1323,21 +1358,15 @@ * while holding the req_lock hand with irq disabled. */ drbd_panic("IO error on backing device!\n"); break; - case Detach: - if (_drbd_set_state(mdev,_NS(disk,Failed),ChgStateHard) - == SS_Success) { - ERR("Local IO failed. Detaching...\n"); - } - break; } } -static inline void drbd_chk_io_error(drbd_dev* mdev, int error) +static inline void drbd_chk_io_error(drbd_dev* mdev, int error, int forcedetach) { if (error) { unsigned long flags; spin_lock_irqsave(&mdev->req_lock,flags); - __drbd_chk_io_error(mdev); + __drbd_chk_io_error(mdev,forcedetach); spin_unlock_irqrestore(&mdev->req_lock,flags); } }