[DRBD-cvs] svn commit by lars - r2377 - trunk/drbd - work on the syncer may or may not ignore the agreed-upo

Fri Aug 18 18:13:51 CEST 2006

Author: lars
Date: 2006-08-18 18:13:50 +0200 (Fri, 18 Aug 2006)
New Revision: 2377

Modified:
   trunk/drbd/drbd_receiver.c
   trunk/drbd/drbd_worker.c
Log:

work on the syncer may or may not ignore the agreed-upon max_segment-size
apart from "it still compiles" no test done.  I'll have to make up my mind
whether it is worth the hassle.  probably not...



Modified: trunk/drbd/drbd_receiver.c
===================================================================

--- trunk/drbd/drbd_receiver.c	2006-08-18 14:09:04 UTC (rev 2376)
+++ trunk/drbd/drbd_receiver.c	2006-08-18 16:13:50 UTC (rev 2377)
@@ -256,31 +256,81 @@
 		page = drbd_pp_alloc(mdev, gfp_mask);
 		if (!page) goto fail2;
 		if (!bio_add_page(bio, page, min_t(int, ds, PAGE_SIZE), 0)) {
-			/* if this happens, it indicates we are not doing correct
-			 * stacking of device limits.
-			 * or that the syncer (which may choose to ignore the
-			 * device limits) happend to request something which
-			 * does not work on this side...
-			 *
-			 * if this happens for the _first_ page, however, my
-			 * understanding is it would indicate a bug in the
-			 * lower levels, since adding _one_ page is
-			 * "guaranteed" to work.
+			/*
+			 * actually:
+			drbd_pp_free(page);
+			goto fail2;
+			 * But see below.
 			 */
+			break;
+		}
+		ds -= min_t(int, ds, PAGE_SIZE);
+	}
+
+	/* D_ASSERT( data_size == bio->bi_size); */
+	if (ds) {
+		/*
+		 * bio_add_page failed.
+		 *
+		 * if this happens, it indicates we are not doing correct
+		 * stacking of device limits.
+		 *
+		 * ---
+		 * this may also happen on the SyncSource for syncer requests:
+		 * for performance, the syncer may choose to ignore the
+		 * agreed-upon device limits (max_segment_size may be
+		 * arbitrarily set to PAGE_SIZE because the lower level device
+		 * happens to have a merge_bvec_fn).
+		 *
+		 * we would then just "split" the request here,
+		 * and then send multiple RSDataReply packets to the peer.
+		 *
+		 * FIXME to implement that, we'd need ot be able to
+		 * "return several Tl_epoch_entry" here,
+		 * so we'd need to either recurse, or add more state to the
+		 * return valud of this function.
+		 * or let the caller check e->ee_size against what he requested,
+		 * and reiterate there.
+		 *
+		 * It is probably just not worth the hassle,
+		 * but I'll check it in unfinished now anyways.
+		 *
+		 * TODO
+		 * either complete support on this side, or rip it out
+		 * and do the one-liner patch in w_make_resync_request
+		 * exchanging DRBD_MAX_SEGMENT_SIZE with q->max_segment_size
+		 * ---
+		 *
+		 * if this happens for the _first_ page, however, my
+		 * understanding is it would indicate a bug in the lower levels,
+		 * since adding _one_ page is "guaranteed" to work.
+		 */
+		if (ds < data_size && id == ID_SYNCER) {
+			/* this currently only serves the first part of that
+			 * request. you have to restart the syncer...
+			 * this is currently still very buggy and get our
+			 * housekeeping about in-sync areas completely wrong.
+			 */
+			ERR("should have split resync request: "
+			    "sector/data_size/rest %llu %u %u\n",
+			    (unsigned long long)sector, data_size, ds);
+		} else {
+			/* this should not happen,
+			 * if we correctly stacked limits. */
 			ERR("bio_add_page failed: "
 			    "id/sector/data_size/rest 0x%llx %llu %u %u\n",
 			    (unsigned long long)id,
 			    (unsigned long long)sector, data_size, ds);
-			break;
+			drbd_pp_free(mdev,page);
+			goto fail2;
 		}
-		ds -= min_t(int, ds, PAGE_SIZE);
 	}
 
 	bio->bi_private = e;
 	e->mdev = mdev;
 	e->ee_sector = sector;
 	e->ee_size = bio->bi_size;
-	D_ASSERT( data_size == bio->bi_size);
+
 	e->private_bio = bio;
 	e->block_id = id;
 	INIT_HLIST_NODE(&e->colision);

Modified: trunk/drbd/drbd_worker.c
===================================================================
--- trunk/drbd/drbd_worker.c	2006-08-18 14:09:04 UTC (rev 2376)
+++ trunk/drbd/drbd_worker.c	2006-08-18 16:13:50 UTC (rev 2377)
@@ -322,6 +322,7 @@
 
 	next_sector:
 		size = BM_BLOCK_SIZE;
+		/* as of now, we are the only user of drbd_bm_find_next */
 		bit  = drbd_bm_find_next(mdev);
 
 		if (bit == -1UL) {
@@ -347,19 +348,38 @@
 		}
 
 #if DRBD_MAX_SEGMENT_SIZE > BM_BLOCK_SIZE
-		// try to find some adjacent bits...
-		while ( drbd_bm_test_bit(mdev,bit+1) == 1 ) {
-			// stop if we have the already the maximum req size
-			if(size == DRBD_MAX_SEGMENT_SIZE) break;
-			// do not leafe the current BM_EXTEND
-			if(( (bit+1) & BM_BLOCKS_PER_BM_EXT_MASK ) == 0) break;
+		/* try to find some adjacent bits.
+		 * we stop if we have already the maximum req size
+		 * or if it the request would cross a 32k boundary
+		 * (play more nicely with most raid devices).
+		 *
+		 * we don't care about the agreed-uppon q->max_segment_size
+		 * here, because we don't keep a reference on this side.
+		 * the sync source will split the requests approrpiately as
+		 * needed, and may send multiple RSDataReply packets.
+		 */
+		for (;;) {
+			if (size < DRBD_MAX_SEGMENT_SIZE)
+				break;
+			if ((sector & ~63ULL) + BM_BIT_TO_SECT(2) <= 64ULL)
+				break;
+			// do not cross extent boundaries
+			if (( (bit+1) & BM_BLOCKS_PER_BM_EXT_MASK ) == 0)
+				break;
+			// now, is it actually dirty, after all?
+			if ( drbd_bm_test_bit(mdev,bit+1) == 0 )
+				break;
 			bit++;
 			size += BM_BLOCK_SIZE;
 			i++;
 		}
-		drbd_bm_set_find(mdev,bit+1);
+		/* if we merged some,
+		 * reset the offset to start the next drbd_bm_find_next from */
+		if (size > BM_BLOCK_SIZE)
+			drbd_bm_set_find(mdev,bit+1);
 #endif
 
+		/* adjust very last sectors, in case we are oddly sized */
 		if (sector + (size>>9) > capacity) size = (capacity-sector)<<9;
 		inc_rs_pending(mdev);
 		if(!drbd_send_drequest(mdev,RSDataRequest,