[DRBD-cvs] drbd by lars; adding ap_bio_cnt while the peers someti...

drbd-user@lists.linbit.com drbd-user@lists.linbit.com
Tue, 18 May 2004 18:20:47 +0200 (CEST)


DRBD CVS committal

Author  : lars
Module  : drbd

Dir     : drbd/drbd


Modified Files:
      Tag: rel-0_7-branch
	drbd_dsender.c drbd_fs.c drbd_int.h drbd_main.c drbd_proc.c 
	drbd_receiver.c drbd_req-2.4.c 


Log Message:
adding ap_bio_cnt

while the peers sometimes do not agree about the amount of data to be synced
and the receiver thread handing in drbd_disconnect because some counters seem to be counted wrong,
maybe adding yet an other counter helps to find the problem.

this adds atomic_t ap_bio_cnt, where ap stands for application.
it shows in /proc/drbd.

gets incremented *only* in drbd_make_request_common,
and gets decremented wherever and whenever we actually
give the bio_endio back to the upper layers.

no functionality changed this time.


===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/Attic/drbd_dsender.c,v
retrieving revision 1.1.2.110
retrieving revision 1.1.2.111
diff -u -3 -r1.1.2.110 -r1.1.2.111
--- drbd_dsender.c	18 May 2004 11:15:21 -0000	1.1.2.110
+++ drbd_dsender.c	18 May 2004 16:20:42 -0000	1.1.2.111
@@ -177,6 +177,7 @@
 	} else {
 	pass_on:
 		req->master_bio->b_end_io(req->master_bio,uptodate);
+		atomic_dec(&mdev->ap_bio_cnt);
 
 		INVALIDATE_MAGIC(req);
 		mempool_free(req,drbd_request_mempool);
@@ -207,15 +208,15 @@
 	struct Tl_epoch_entry *e=NULL;
 	struct Drbd_Conf* mdev;
 
+	mdev=bio->bi_private;
+	PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
+
 	/* we should be called via bio_endio, so this should never be the case
 	 * but "everyone else does it", and so do we ;)		-lge
 	 */
-	if (bio->bi_size)
+	ERR_IF (bio->bi_size)
 		return 1;
 
-	mdev=bio->bi_private;
-	PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
-
 	e = container_of(bio,struct Tl_epoch_entry,private_bio);
 	PARANOIA_BUG_ON(!VALID_POINTER(e));
 	D_ASSERT(e->block_id != ID_VACANT);
@@ -240,13 +241,13 @@
 	struct Tl_epoch_entry *e=NULL;
 	struct Drbd_Conf* mdev;
 
-	// see above
-	if (bio->bi_size)
-		return 1;
-
 	mdev=bio->bi_private;
 	PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
 
+	// see above
+	ERR_IF (bio->bi_size)
+		return 1;
+
 	e = container_of(bio,struct Tl_epoch_entry,private_bio);
 	PARANOIA_BUG_ON(!VALID_POINTER(e));
 	D_ASSERT(e->block_id != ID_VACANT);
@@ -277,21 +278,9 @@
 	sector_t rsector;
 
 	// see above
-	ERR_IF(bio->bi_size)
+	ERR_IF (bio->bi_size)
 		return 1;
 
-#if 0
-	{
-		static int ccc=1;
-
-		if(ccc++ % 100 == 0) {
-			ERR("Injecting IO error.\n");
-			error=-5;
-			clear_bit(BIO_UPTODATE,&bio->bi_flags);
-		}
-	}
-#endif
-
 	PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
 
 	req = container_of(bio,struct drbd_request,private_bio);
@@ -314,7 +303,7 @@
 	drbd_request_t *req;
 
 	// see above
-	ERR_IF(bio->bi_size)
+	ERR_IF (bio->bi_size)
 		return 1;
 
 #if 0
@@ -345,6 +334,7 @@
 	} else {
 	pass_on:
 		bio_endio(req->master_bio,req->master_bio->bi_size,error);
+		atomic_dec(&mdev->ap_bio_cnt);
 
 		INVALIDATE_MAGIC(req);
 		mempool_free(req,drbd_request_mempool);
@@ -383,6 +373,7 @@
 	     test_bit(PARTNER_DISKLESS,&mdev->flags) ) {
 		ERR("WE ARE LOST. Local IO failure, no peer.\n");
 		drbd_bio_endio(req->master_bio,0);
+		atomic_dec(&mdev->ap_bio_cnt);
 		mempool_free(req,drbd_request_mempool);
 		// TODO: Do something like panic() or shut_down_cluster().
 		return 1;
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_fs.c,v
retrieving revision 1.28.2.92
retrieving revision 1.28.2.93
diff -u -3 -r1.28.2.92 -r1.28.2.93
--- drbd_fs.c	18 May 2004 11:15:21 -0000	1.28.2.92
+++ drbd_fs.c	18 May 2004 16:20:42 -0000	1.28.2.93
@@ -570,6 +570,8 @@
 	    !(newstate & DontBlameDrbd) )
 		return -EIO;
 
+// FIXME if necessary set MDF_Consistent
+
 	drbd_sync_me(mdev);
 
 	/* Wait until nothing is on the fly :) */
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_int.h,v
retrieving revision 1.58.2.162
retrieving revision 1.58.2.163
diff -u -3 -r1.58.2.162 -r1.58.2.163
--- drbd_int.h	18 May 2004 11:15:21 -0000	1.58.2.162
+++ drbd_int.h	18 May 2004 16:20:42 -0000	1.58.2.163
@@ -682,6 +682,7 @@
 	unsigned int writ_cnt;
 	unsigned int al_writ_cnt;
 	unsigned int bm_writ_cnt;
+	atomic_t ap_bio_cnt;
 	atomic_t ap_pending_cnt;
 	atomic_t rs_pending_cnt;
 	atomic_t unacked_cnt;
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_main.c,v
retrieving revision 1.73.2.176
retrieving revision 1.73.2.177
diff -u -3 -r1.73.2.176 -r1.73.2.177
--- drbd_main.c	18 May 2004 11:15:22 -0000	1.73.2.176
+++ drbd_main.c	18 May 2004 16:20:42 -0000	1.73.2.177
@@ -1173,6 +1173,7 @@
 
 	drbd_set_defaults(mdev);
 
+	atomic_set(&mdev->ap_bio_cnt,0);
 	atomic_set(&mdev->ap_pending_cnt,0);
 	atomic_set(&mdev->rs_pending_cnt,0);
 	atomic_set(&mdev->unacked_cnt,0);
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_proc.c,v
retrieving revision 1.8.2.26
retrieving revision 1.8.2.27
diff -u -3 -r1.8.2.26 -r1.8.2.27
--- drbd_proc.c	18 May 2004 11:15:22 -0000	1.8.2.26
+++ drbd_proc.c	18 May 2004 16:20:42 -0000	1.8.2.27
@@ -197,7 +197,7 @@
 			rlen += sprintf( buf + rlen,
 			   "%2d: cs:%s st:%s/%s ld:%s\n"
 			   "    ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
-			   "lo:%d pe:%d ua:%d\n",
+			   "lo:%d pe:%d ua:%d ap:%d\n",
 			   i, sn,
 			   nodestate_to_name(drbd_conf[i].state),
 			   nodestate_to_name(drbd_conf[i].o_state),
@@ -212,7 +212,8 @@
 			   atomic_read(&drbd_conf[i].local_cnt),
 			   atomic_read(&drbd_conf[i].ap_pending_cnt) +
 			   atomic_read(&drbd_conf[i].rs_pending_cnt),
-			   atomic_read(&drbd_conf[i].unacked_cnt)
+			   atomic_read(&drbd_conf[i].unacked_cnt),
+			   atomic_read(&drbd_conf[i].ap_bio_cnt)
 			);
 
 		if ( drbd_conf[i].cstate == SyncSource ||
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_receiver.c,v
retrieving revision 1.97.2.157
retrieving revision 1.97.2.158
diff -u -3 -r1.97.2.157 -r1.97.2.158
--- drbd_receiver.c	18 May 2004 11:15:22 -0000	1.97.2.157
+++ drbd_receiver.c	18 May 2004 16:20:42 -0000	1.97.2.158
@@ -837,6 +837,7 @@
 
 	ok=(rr==data_size);
 	drbd_bio_endio(bio,ok);
+	atomic_dec(&mdev->ap_bio_cnt);
 
 	dec_ap_pending(mdev,HERE);
 	return ok;
@@ -1390,6 +1391,7 @@
 		bio = req->master_bio;
 
 		drbd_bio_IO_error(bio);
+		atomic_dec(&mdev->ap_bio_cnt);
 		dec_ap_pending(mdev,HERE);
 
 		INVALIDATE_MAGIC(req);
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_req-2.4.c,v
retrieving revision 1.33.2.78
retrieving revision 1.33.2.79
diff -u -3 -r1.33.2.78 -r1.33.2.79
--- drbd_req-2.4.c	18 May 2004 11:15:22 -0000	1.33.2.78
+++ drbd_req-2.4.c	18 May 2004 16:20:42 -0000	1.33.2.79
@@ -87,8 +87,11 @@
 		// the other side!  See w_io_error()
 
 		drbd_bio_endio(req->master_bio,1);
+		atomic_dec(&mdev->ap_bio_cnt);
 		// The assumption is that we wrote it on the peer.
 
+// FIXME proto A and diskless :)
+
 		req->w.cb = w_io_error;
 		drbd_queue_work(mdev,&mdev->data.work,&req->w);
 
@@ -97,6 +100,7 @@
 	}
 
 	drbd_bio_endio(req->master_bio,uptodate);
+	atomic_dec(&mdev->ap_bio_cnt);
 
 	INVALIDATE_MAGIC(req);
 	mempool_free(req,drbd_request_mempool);
@@ -247,6 +251,7 @@
 	/* we need to plug ALWAYS since we possibly need to kick lo_dev */
 	drbd_plug_device(mdev);
 
+	atomic_inc(&mdev->ap_bio_cnt);
 	if (remote) {
 		/* either WRITE and Connected,
 		 * or READ, and no local disk,