[DRBD-cvs] svn commit by lars - r2491 - branches/drbd-0.7/drbd - fix races between failure in drbd_send_dblock and concu

drbd-cvs at lists.linbit.com drbd-cvs at lists.linbit.com
Tue Oct 3 12:56:08 CEST 2006


Author: lars
Date: 2006-10-03 12:56:07 +0200 (Tue, 03 Oct 2006)
New Revision: 2491

Modified:
   branches/drbd-0.7/drbd/drbd_int.h
   branches/drbd-0.7/drbd/drbd_main.c
   branches/drbd-0.7/drbd/drbd_receiver.c
   branches/drbd-0.7/drbd/drbd_req.c
   branches/drbd-0.7/drbd/drbd_worker.c
Log:
fix races between failure in drbd_send_dblock
and concurrently running tl_clear

remove drbd_req_get_sector and drbd_req_get_size wrappers,
reduce sector argument of drbd_end_req
fix potential access-afer-free in drbd_dio_end


Modified: branches/drbd-0.7/drbd/drbd_int.h
===================================================================
--- branches/drbd-0.7/drbd/drbd_int.h	2006-10-03 08:29:14 UTC (rev 2490)
+++ branches/drbd-0.7/drbd/drbd_int.h	2006-10-03 10:56:07 UTC (rev 2491)
@@ -1026,8 +1026,8 @@
 extern mempool_t *drbd_request_mempool;
 
 // drbd_req
-extern void _drbd_end_req(drbd_request_t *, int, int, sector_t);
-extern void drbd_end_req(drbd_request_t *, int, int, sector_t);
+extern void _drbd_end_req(drbd_request_t *, int, int);
+extern void drbd_end_req(drbd_request_t *, int, int);
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
 extern int drbd_make_request_24(request_queue_t *q, int rw, struct buffer_head *bio);
 #else
@@ -1231,16 +1231,6 @@
 	}
 }
 
-static inline sector_t drbd_req_get_sector(struct drbd_request *req)
-{
-	return req->sector;
-}
-
-static inline unsigned short drbd_req_get_size(struct drbd_request *req)
-{
-	return req->size;
-}
-
 static inline void
 _drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
 {

Modified: branches/drbd-0.7/drbd/drbd_main.c
===================================================================
--- branches/drbd-0.7/drbd/drbd_main.c	2006-10-03 08:29:14 UTC (rev 2490)
+++ branches/drbd-0.7/drbd/drbd_main.c	2006-10-03 10:56:07 UTC (rev 2491)
@@ -317,15 +317,15 @@
 	while ( b ) {
 		list_for_each_safe(le, tle, &b->requests) {
 			r = list_entry(le, struct drbd_request,w.list);
-			// bi_size and bi_sector are modified in bio_endio!
-			sector = drbd_req_get_sector(r);
-			size   = drbd_req_get_size(r);
+			// r may be freed within drbd_end_req
+			sector = r->sector;
+			size   = r->size;
 
 			r->rq_status &= ~RQ_DRBD_IN_TL;
 			list_del(&r->w.list);
 
 			if( !(r->rq_status & RQ_DRBD_SENT) ) {
-				_drbd_end_req(r,RQ_DRBD_SENT,1, sector);
+				_drbd_end_req(r,RQ_DRBD_SENT,1);
 			} else if ((r->rq_status & RQ_DRBD_DONE) == RQ_DRBD_DONE) {
 				D_ASSERT(r->master_bio == NULL);
 				INVALIDATE_MAGIC(r);
@@ -1013,10 +1013,9 @@
 
 	p.head.magic   = BE_DRBD_MAGIC;
 	p.head.command = cpu_to_be16(Data);
-	p.head.length  = cpu_to_be16( sizeof(p)-sizeof(Drbd_Header)
-				     + drbd_req_get_size(req) );
+	p.head.length  = cpu_to_be16(sizeof(p)-sizeof(Drbd_Header)+req->size);
 
-	p.sector   = cpu_to_be64(drbd_req_get_sector(req));
+	p.sector   = cpu_to_be64(req->sector);
 	p.block_id = (unsigned long)req;
 
 	/* About tl_add():
@@ -1042,6 +1041,22 @@
 
 	old_blocked = drbd_block_all_signals();
 	down(&mdev->data.mutex);
+
+	/* drbd_disconnect may have freed that socket while we were waiting
+	 * in down(). we have to check that, to avoid a race with tl_clear
+	 * cleaning up before we can tl_add */
+	if (unlikely(mdev->data.socket)) {
+		/* this req is not in the tl, tl_clear cannot find it.
+		 * we cannot just tl_add it here, either, because tl_clear
+		 * might be done already.  so we have to mark this request
+		 * "SENT" here, otherwise it won't ever complete.
+		 * FIXME won't work for freeze io.
+		 * FIXME if we are Diskless, we complete a WRITE
+		 * as successful here, that has never been written! */
+		drbd_end_req(req,RQ_DRBD_SENT,1);
+		goto out;
+	}
+
 	spin_lock(&mdev->send_task_lock);
 	mdev->send_task=current;
 	spin_unlock(&mdev->send_task_lock);
@@ -1049,6 +1064,7 @@
 	if(test_and_clear_bit(ISSUE_BARRIER,&mdev->flags))
 		ok = _drbd_send_barrier(mdev);
 
+	/* unconditional tl_add! */
 	tl_add(mdev,req);
 	if(ok) {
 		dump_packet(mdev,mdev->data.socket,0,(void*)&p, __FILE__, __LINE__);
@@ -1057,6 +1073,9 @@
 		if(ok) {
 			if(mdev->conf.wire_protocol == DRBD_PROT_A) {
 				ok = _drbd_send_bio(mdev,drbd_req_private_bio(req));
+				/* drbd_end_req has to be within the data.mutex,
+				 * which protects us from a concurrently running tl_clear */
+				if (ok) drbd_end_req(req,RQ_DRBD_SENT,1);
 			} else {
 				ok = _drbd_send_zc_bio(mdev,drbd_req_private_bio(req));
 			}
@@ -1067,6 +1086,7 @@
 	mdev->send_task=NULL;
 	spin_unlock(&mdev->send_task_lock);
 
+  out:
 	up(&mdev->data.mutex);
 	restore_old_sigset(old_blocked);
 	return ok;

Modified: branches/drbd-0.7/drbd/drbd_receiver.c
===================================================================
--- branches/drbd-0.7/drbd/drbd_receiver.c	2006-10-03 08:29:14 UTC (rev 2490)
+++ branches/drbd-0.7/drbd/drbd_receiver.c	2006-10-03 10:56:07 UTC (rev 2491)
@@ -924,7 +924,7 @@
 
 	bio = req->master_bio;
 
-	D_ASSERT( sector == drbd_req_get_sector(req) );
+	D_ASSERT( sector == req->sector );
 
 	rr=drbd_recv(mdev,drbd_bio_kmap(bio),data_size);
 	drbd_bio_kunmap(bio);
@@ -2157,7 +2157,7 @@
 				req->rq_status &= ~RQ_DRBD_IN_TL;
 				list_del(&req->w.list);
 			}
-			_drbd_end_req(req, RQ_DRBD_SENT, 1, sector);
+			_drbd_end_req(req, RQ_DRBD_SENT, 1);
 			spin_unlock_irq(&mdev->req_lock);
 
 			if (test_bit(SYNC_STARTED,&mdev->flags) &&

Modified: branches/drbd-0.7/drbd/drbd_req.c
===================================================================
--- branches/drbd-0.7/drbd/drbd_req.c	2006-10-03 08:29:14 UTC (rev 2490)
+++ branches/drbd-0.7/drbd/drbd_req.c	2006-10-03 10:56:07 UTC (rev 2491)
@@ -32,10 +32,10 @@
 #include <linux/drbd.h>
 #include "drbd_int.h"
 
-void _drbd_end_req(drbd_request_t *req, int nextstate, int er_flags,
-		  sector_t rsector)
+void _drbd_end_req(drbd_request_t *req, int nextstate, int er_flags)
 {
 	struct Drbd_Conf* mdev = drbd_req_get_mdev(req);
+	sector_t rsector = req->sector;
 	int uptodate;
 
 	if(req->rq_status & nextstate) {
@@ -59,7 +59,7 @@
 
 	uptodate = req->rq_status & 0x0001;
 	if( !uptodate && mdev->on_io_error == Detach) {
-		drbd_set_out_of_sync(mdev,rsector, drbd_req_get_size(req));
+		drbd_set_out_of_sync(mdev,rsector, req->size);
 		// It should also be as out of sync on
 		// the other side!  See w_io_error()
 
@@ -101,13 +101,12 @@
 	}
 }
 
-void drbd_end_req(drbd_request_t *req, int nextstate, int er_flags,
-		  sector_t rsector)
+void drbd_end_req(drbd_request_t *req, int nextstate, int er_flags)
 {
 	struct Drbd_Conf* mdev = drbd_req_get_mdev(req);
 	unsigned long flags=0;
 	spin_lock_irqsave(&mdev->req_lock,flags);
-	_drbd_end_req(req,nextstate,er_flags,rsector);
+	_drbd_end_req(req,nextstate,er_flags);
 	spin_unlock_irqrestore(&mdev->req_lock,flags);
 }
 
@@ -330,8 +329,6 @@
 				if (mdev->cstate >= Connected)
 					set_cstate(mdev,NetworkFailure);
 				drbd_thread_restart_nowait(&mdev->receiver);
-			} else if(mdev->conf.wire_protocol == DRBD_PROT_A) {
-				drbd_end_req(req, RQ_DRBD_SENT, 1, sector);
 			}
 		} else {
 			// this node is diskless ...

Modified: branches/drbd-0.7/drbd/drbd_worker.c
===================================================================
--- branches/drbd-0.7/drbd/drbd_worker.c	2006-10-03 08:29:14 UTC (rev 2490)
+++ branches/drbd-0.7/drbd/drbd_worker.c	2006-10-03 10:56:07 UTC (rev 2491)
@@ -139,6 +139,7 @@
 {
 	struct Drbd_Conf* mdev;
 	drbd_request_t *req;
+	sector_t rsector;
 
 	mdev = bh->b_private;
 	PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
@@ -147,8 +148,10 @@
 	PARANOIA_BUG_ON(!VALID_POINTER(req));
 
 	drbd_chk_io_error(mdev,!uptodate);
-	drbd_end_req(req, RQ_DRBD_LOCAL, uptodate, drbd_req_get_sector(req));
-	drbd_al_complete_io(mdev,drbd_req_get_sector(req));
+        // req may get freed within drbd_end_req
+	rsector = drbd_req_get_sector(req);
+	drbd_end_req(req, RQ_DRBD_LOCAL, uptodate, rsector);
+	drbd_al_complete_io(mdev,rsector);
 	dec_local(mdev);
 }
 
@@ -284,9 +287,9 @@
 		return 1;
 
 	drbd_chk_io_error(mdev,error);
-	rsector = drbd_req_get_sector(req);
-        // the bi_sector of the bio gets modified somewhere in drbd_end_req()!
-	drbd_end_req(req, RQ_DRBD_LOCAL, (error == 0), rsector);
+        // req may get freed within drbd_end_req
+	rsector = req->sector;
+	drbd_end_req(req, RQ_DRBD_LOCAL, (error == 0));
 	drbd_al_complete_io(mdev,rsector);
 	dec_local(mdev);
 	bio_put(bio);



More information about the drbd-cvs mailing list