[DRBD-cvs] svn commit by phil - r2592 - in trunk/drbd: . linux - A bit of work on the handling of IO errors on the secon

drbd-cvs at lists.linbit.com drbd-cvs at lists.linbit.com
Wed Nov 15 15:12:39 CET 2006


Author: phil
Date: 2006-11-15 15:12:37 +0100 (Wed, 15 Nov 2006)
New Revision: 2592

Modified:
   trunk/drbd/drbd_main.c
   trunk/drbd/drbd_nl.c
   trunk/drbd/drbd_receiver.c
   trunk/drbd/drbd_strings.c
   trunk/drbd/linux/drbd.h
Log:
A bit of work on the handling of IO errors on the secondary node.



Modified: trunk/drbd/drbd_main.c
===================================================================
--- trunk/drbd/drbd_main.c	2006-11-15 08:44:33 UTC (rev 2591)
+++ trunk/drbd/drbd_main.c	2006-11-15 14:12:37 UTC (rev 2592)
@@ -585,6 +585,9 @@
 	if( ns.conn == Disconnecting && os.conn == StandAlone) 
 		rv=SS_AlreadyStandAlone;
 
+	if( ns.disk == Outdated && os.disk == Diskless) 
+		rv=SS_CanNotOutdateDL;
+
 	return rv;
 }
 

Modified: trunk/drbd/drbd_nl.c
===================================================================
--- trunk/drbd/drbd_nl.c	2006-11-15 08:44:33 UTC (rev 2591)
+++ trunk/drbd/drbd_nl.c	2006-11-15 14:12:37 UTC (rev 2592)
@@ -1163,8 +1163,14 @@
 						      pdsk,Outdated));
 	} else if (retcode == SS_CW_FailedByPeer) {
 		// The peer probabely wants to see us outdated.
-		retcode = drbd_request_state(mdev,NS2(conn,Disconnecting,
-						      disk,Outdated));
+		retcode = _drbd_request_state(mdev,NS2(conn,Disconnecting,
+						       disk,Outdated),0);
+		if( retcode == SS_CanNotOutdateDL ) {
+			// We are diskless and our peer wants to outdate us.
+			// So, simply go away, and let the peer try to
+			// outdate us with its 'outdate-peer' handler later.
+			retcode = drbd_request_state(mdev,NS(conn,StandAlone));
+		}
 	}
 
 	if( retcode < SS_Success ) goto fail;

Modified: trunk/drbd/drbd_receiver.c
===================================================================
--- trunk/drbd/drbd_receiver.c	2006-11-15 08:44:33 UTC (rev 2591)
+++ trunk/drbd/drbd_receiver.c	2006-11-15 14:12:37 UTC (rev 2592)
@@ -316,7 +316,7 @@
 
 	bio_put(bio);
 
-	BUG_ON(!hlist_unhashed(&e->colision));
+	D_ASSERT(hlist_unhashed(&e->colision));
 
 	mempool_free(e, drbd_ee_mempool);
 }
@@ -422,6 +422,7 @@
 		   is_syncer_block_id(e->block_id)) {
 			++n;
 		}
+		if(!hlist_unhashed(&e->colision)) hlist_del_init(&e->colision);
 		drbd_free_ee(mdev,e);
 	}
 
@@ -1305,7 +1306,8 @@
 		if (DRBD_ratelimit(5*HZ,5))
 			ERR("Can not write mirrored data block to local disk.\n");
 		drbd_send_ack_dp(mdev,NegAck,p);
-		return TRUE;
+		mdev->epoch_size++; // spin lock ?
+		return drbd_drain_block(mdev,data_size);
 	}
 
 	sector = be64_to_cpu(p->sector);
@@ -2315,7 +2317,8 @@
 	peer_state.i = be32_to_cpu(p->state);
 
 	if (mdev->p_uuid && mdev->state.conn <= Connected && 
-	    inc_local_if_state(mdev,Negotiating) ) {
+	    inc_local_if_state(mdev,Negotiating) && 
+	    peer_state.disk >= Negotiating) {
 		nconn=drbd_sync_handshake(mdev,peer_state.role,peer_state.disk);
 		dec_local(mdev);
 
@@ -3127,16 +3130,14 @@
 STATIC int got_NegAck(drbd_dev *mdev, Drbd_Header* h)
 {
 	Drbd_BlockAck_Packet *p = (Drbd_BlockAck_Packet*)h;
+	sector_t sector = be64_to_cpu(p->sector);
+	drbd_request_t *req;
 
 	if (DRBD_ratelimit(5*HZ,5))
 		WARN("Got NegAck packet. Peer is in troubles?\n");
 
 	update_peer_seq(mdev,be32_to_cpu(p->seq_num));
 
-	/* do nothing here.
-	 * we expect to get a "report param" on the data socket soon,
-	 * and will do the cleanup then and there.
-	 */
 	if(is_syncer_block_id(p->block_id)) {
 		sector_t sector = be64_to_cpu(p->sector);
 		int size = be32_to_cpu(p->blksize);
@@ -3144,6 +3145,16 @@
 		dec_rs_pending(mdev);
 
 		drbd_rs_failed_io(mdev, sector, size);
+	} else {
+		req = _ack_id_to_req(mdev, p->block_id, sector);
+
+		if (unlikely(!req)) {
+			spin_unlock_irq(&mdev->req_lock);
+			ERR("Got a corrupt block_id/sector pair(2).\n");
+			return FALSE;
+		}
+
+		req_mod(req, neg_acked, 0);
 	}
 
 	return TRUE;

Modified: trunk/drbd/drbd_strings.c
===================================================================
--- trunk/drbd/drbd_strings.c	2006-11-15 08:44:33 UTC (rev 2591)
+++ trunk/drbd/drbd_strings.c	2006-11-15 14:12:37 UTC (rev 2592)
@@ -73,7 +73,8 @@
 	[-SS_PrimaryNOP] = "Refusing to be Primary while peer is not outdated",
 	[-SS_ResyncRunning] = "Can not start resync since it is already active",
 	[-SS_AlreadyStandAlone] = "Can not disconnect a StandAlone device",
-	[-SS_CW_FailedByPeer] = "State changed was refused by peer node"
+	[-SS_CW_FailedByPeer] = "State changed was refused by peer node",
+	[-SS_CanNotOutdateDL] = "Can not outdate a diskless device"
 };
 
 const char* conns_to_name(drbd_conns_t s) {

Modified: trunk/drbd/linux/drbd.h
===================================================================
--- trunk/drbd/linux/drbd.h	2006-11-15 08:44:33 UTC (rev 2591)
+++ trunk/drbd/linux/drbd.h	2006-11-15 14:12:37 UTC (rev 2592)
@@ -201,7 +201,8 @@
 	SS_PrimaryNOP=-7,
 	SS_ResyncRunning=-8,
 	SS_AlreadyStandAlone=-9,
-	SS_CW_FailedByPeer=-10
+	SS_CW_FailedByPeer=-10,
+	SS_CanNotOutdateDL=-11
 } set_st_err_t;
 
 /* from drbd_strings.c */



More information about the drbd-cvs mailing list