[DRBD-cvs] drbd by phil; * Worked the no-io-error = Detach case.
drbd-user@lists.linbit.com
drbd-user@lists.linbit.com
Thu, 12 Feb 2004 19:27:11 +0100 (CET)
DRBD CVS committal
Author : phil
Module : drbd
Dir : drbd/drbd
Modified Files:
Tag: rel-0_7-branch
drbd_int.h drbd_main.c drbd_proc.c drbd_receiver.c
Log Message:
* Worked the no-io-error = Detach case.
* Woks now for IO errors on the secondary node.
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_int.h,v
retrieving revision 1.58.2.123
retrieving revision 1.58.2.124
diff -u -3 -r1.58.2.123 -r1.58.2.124
--- drbd_int.h 11 Feb 2004 22:59:26 -0000 1.58.2.123
+++ drbd_int.h 12 Feb 2004 18:27:06 -0000 1.58.2.124
@@ -726,6 +726,8 @@
u64 block_id);
extern int drbd_send_bitmap(drbd_dev *mdev);
extern void drbd_free_ll_dev(drbd_dev *mdev);
+extern int drbd_io_error(drbd_dev* mdev);
+
// drbd_meta-data.c (still in drbd_main.c)
@@ -921,7 +923,7 @@
/**
* drbd_chk_io_error: Handles the on_io_error setting, should be called from
- * all io completion handlers.
+ * all io completion handlers. See also drbd_io_error().
*/
static inline void drbd_chk_io_error(drbd_dev* mdev, int error)
{
@@ -936,37 +938,12 @@
panic(DEVICE_NAME" : IO error on backing device!\n");
break;
case Detach:
+ ERR("Local IO failed. Detaching...\n");
set_bit(DISKLESS,&mdev->flags);
smp_mb(); // Nack is sent in w_e handlers.
break;
}
}
-}
-
-/**
- * drbd_io_error: Handles the on_io_error setting, should be called in the
- * unlikely(!drbd_bio_uptodate(e->bio)) case from kernel thread context.
- */
-static inline int drbd_io_error(drbd_dev* mdev)
-{
- int ok=1;
-
- if(mdev->on_io_error == Panic || mdev->on_io_error == Detach) {
- if(!test_bit(SENT_DISK_FAILURE,&mdev->flags)) {
- D_ASSERT(test_bit(DISKLESS,&mdev->flags));
- ok = drbd_send_param(mdev,0);
- set_bit(SENT_DISK_FAILURE,&mdev->flags);
- WARN("Notified peer that my disk is broken.\n");
- if(mdev->cstate > Connected ) {
- WARN("Resync aborted.\n");
- if(mdev->cstate == SyncTarget)
- set_bit(STOP_SYNC_TIMER,&mdev->flags);
- set_cstate(mdev,Connected);
- }
- }
- }
-
- return ok;
}
static inline int semaphore_is_locked(struct semaphore* s)
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_main.c,v
retrieving revision 1.73.2.128
retrieving revision 1.73.2.129
diff -u -3 -r1.73.2.128 -r1.73.2.129
--- drbd_main.c 11 Feb 2004 22:59:26 -0000 1.73.2.128
+++ drbd_main.c 12 Feb 2004 18:27:06 -0000 1.73.2.129
@@ -288,6 +288,38 @@
spin_unlock_irq(&mdev->tl_lock);
}
+/**
+ * drbd_io_error: Handles the on_io_error setting, should be called in the
+ * unlikely(!drbd_bio_uptodate(e->bio)) case from kernel thread context.
+ * See also drbd_chk_io_error
+ */
+int drbd_io_error(drbd_dev* mdev)
+{
+ int ok=1;
+
+ if(mdev->on_io_error != Panic && mdev->on_io_error != Detach) return 1;
+ if(test_and_set_bit(SENT_DISK_FAILURE,&mdev->flags)) return 1;
+
+ D_ASSERT(test_bit(DISKLESS,&mdev->flags));
+ ok = drbd_send_param(mdev,0);
+ WARN("Notified peer that my disk is broken.\n");
+ if(mdev->cstate > Connected ) {
+ WARN("Resync aborted.\n");
+ if(mdev->cstate == SyncTarget)
+ set_bit(STOP_SYNC_TIMER,&mdev->flags);
+ set_cstate(mdev,Connected);
+ }
+ if ( wait_event_interruptible_timeout(mdev->cstate_wait,
+ atomic_read(&mdev->local_cnt) == 0 , HZ ) <= 0) {
+ WARN("Not releasing backing storage device.\n");
+ } else {
+ WARN("Releasing backing storage device.\n");
+ drbd_free_ll_dev(mdev);
+ }
+
+ return ok;
+}
+
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,14)
// daemonize was no global symbol before 2.4.14
/* in 2.4.6 is is prototyped as
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_proc.c,v
retrieving revision 1.8.2.19
retrieving revision 1.8.2.20
diff -u -3 -r1.8.2.19 -r1.8.2.20
--- drbd_proc.c 9 Feb 2004 10:36:54 -0000 1.8.2.19
+++ drbd_proc.c 12 Feb 2004 18:27:06 -0000 1.8.2.20
@@ -166,7 +166,8 @@
for (i = 0; i < minor_count; i++) {
sn = cstate_names[drbd_conf[i].cstate];
if(drbd_conf[i].cstate == Connected) {
- if(!drbd_conf[i].lo_file) sn = "DiskLessClient";
+ if(test_bit(DISKLESS,&drbd_conf[i].flags))
+ sn = "DiskLessClient";
if(test_bit(PARTNER_DISKLESS,&drbd_conf[i].flags))
sn = "ServerForDLess";
}
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_receiver.c,v
retrieving revision 1.97.2.109
retrieving revision 1.97.2.110
diff -u -3 -r1.97.2.109 -r1.97.2.110
--- drbd_receiver.c 11 Feb 2004 22:03:52 -0000 1.97.2.109
+++ drbd_receiver.c 12 Feb 2004 18:27:06 -0000 1.97.2.110
@@ -795,6 +795,7 @@
dec_rs_pending(mdev,HERE);
+ e->block_id = ID_SYNCER;
if(!inc_local(mdev)) {
ERR("Can not write resync data to local disk.\n");
drbd_send_ack(mdev,NegAck,e);
@@ -805,7 +806,6 @@
}
drbd_ee_prepare_write(mdev,e,sector,data_size);
- e->block_id = ID_SYNCER;
e->w.cb = e_end_resync_block;
spin_lock_irq(&mdev->ee_lock);
@@ -985,6 +985,7 @@
e = read_in_block(mdev,data_size);
ERR_IF(!e) return FALSE;
+ e->block_id = p->block_id; // no meaning on this side, e* on partner
if(!inc_local(mdev)) {
ERR("Can not write mirrored data block to local disk.\n");
@@ -996,7 +997,6 @@
}
drbd_ee_prepare_write(mdev, e, sector, data_size);
- e->block_id = p->block_id; // no meaning on this side, e* on partner
e->w.cb = e_end_block;
spin_lock_irq(&mdev->ee_lock);
@@ -1606,17 +1606,26 @@
sector_t sector = be64_to_cpu(p->sector);
int blksize = be32_to_cpu(p->blksize);
- if( is_syncer_blk(mdev,p->block_id)) {
- drbd_set_in_sync(mdev,sector,blksize);
- } else {
- req=(drbd_request_t*)(long)p->block_id;
+ if(likely(!test_bit(PARTNER_DISKLESS,&mdev->flags))) {
+ // test_bit(PARTNER_DISKLESS,&mdev->flags)
+ // This happens if one a few IO requests on the peer
+ // failed, and some subsequest completed sucessfull
+ // afterwards.
+
+ // But we killed everything out of the transferlog
+ // as we got the news hat IO is broken on the peer.
- ERR_IF (!VALID_POINTER(req)) return FALSE;
+ if( is_syncer_blk(mdev,p->block_id)) {
+ drbd_set_in_sync(mdev,sector,blksize);
+ } else {
+ req=(drbd_request_t*)(long)p->block_id;
- drbd_end_req(req, RQ_DRBD_SENT, 1, sector);
+ ERR_IF (!VALID_POINTER(req)) return FALSE;
+
+ drbd_end_req(req, RQ_DRBD_SENT, 1, sector);
+ }
}
- // TODO: Make sure that the block is in an active epoch!!
if(is_syncer_blk(mdev,p->block_id)) {
dec_rs_pending(mdev,HERE);
} else {
@@ -1628,22 +1637,17 @@
STATIC int got_NegAck(drbd_dev *mdev, Drbd_Header* h)
{
- drbd_request_t *req;
Drbd_BlockAck_Packet *p = (Drbd_BlockAck_Packet*)h;
sector_t sector = be64_to_cpu(p->sector);
- int blksize = be32_to_cpu(p->blksize);
+ int size = be32_to_cpu(p->blksize);
WARN("Got NegAck packet. Peer is in troubles?\n");
- if( !is_syncer_blk(mdev,p->block_id)) {
- req=(drbd_request_t*)(long)p->block_id;
-
- ERR_IF (!VALID_POINTER(req)) return FALSE;
- drbd_set_out_of_sync(mdev,sector,blksize);
- drbd_end_req(req, RQ_DRBD_SENT, 1, sector);
+ if(!is_syncer_blk(mdev,p->block_id)) {
+ D_ASSERT(bm_get_bit(mdev->mbds_id,sector,size));
+ // tl_clear() must have set this out of sync!
}
- // TODO: Make sure that the block is in an active epoch!!
if(is_syncer_blk(mdev,p->block_id)) {
dec_rs_pending(mdev,HERE);
} else {
@@ -1673,6 +1677,8 @@
STATIC int got_BarrierAck(drbd_dev *mdev, Drbd_Header* h)
{
Drbd_BarrierAck_Packet *p = (Drbd_BarrierAck_Packet*)h;
+
+ if(unlikely(test_bit(PARTNER_DISKLESS,&mdev->flags))) return TRUE;
tl_release(mdev,p->barrier,be32_to_cpu(p->set_size));
dec_ap_pending(mdev,HERE);