[DRBD-cvs] svn commit by phil - r2720 - trunk/drbd - Implemented the fast drbd_al_to_on_disk_bm(), for out s

drbd-cvs at lists.linbit.com drbd-cvs at lists.linbit.com
Tue Jan 30 14:44:56 CET 2007


Author: phil
Date: 2007-01-30 14:44:54 +0100 (Tue, 30 Jan 2007)
New Revision: 2720

Modified:
   trunk/drbd/drbd_actlog.c
Log:
Implemented the fast drbd_al_to_on_disk_bm(), for out setups where fast
switchover times are required!

This can be orders of magnitude faster, since we submitt a lot less
IO and all the IO at once. 



Modified: trunk/drbd/drbd_actlog.c
===================================================================
--- trunk/drbd/drbd_actlog.c	2007-01-29 17:08:23 UTC (rev 2719)
+++ trunk/drbd/drbd_actlog.c	2007-01-30 13:44:54 UTC (rev 2720)
@@ -511,6 +511,113 @@
 	return 1;
 }
 
+void drbd_al_to_on_disk_bm_slow(struct Drbd_Conf *mdev)
+{
+	int i;
+	unsigned int enr;
+
+	WARN("Using the slow drbd_al_to_on_disk_bm()\n");
+
+	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
+
+	if (inc_local_if_state(mdev,Attaching)) {
+		for(i=0;i<mdev->act_log->nr_elements;i++) {
+			enr = lc_entry(mdev->act_log,i)->lc_number;
+			if(enr == LC_FREE) continue;
+			/* Really slow: if we have al-extents 16..19 active,
+			 * sector 4 will be written four times! Synchronous! */
+			drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT );
+		}
+
+		lc_unlock(mdev->act_log);
+		wake_up(&mdev->al_wait);
+		dec_local(mdev);
+	} else D_ASSERT(0);
+}
+
+struct drbd_atodb_wait {
+	atomic_t           count;
+	struct completion  io_done;
+	struct Drbd_Conf   *mdev;
+	int                error;
+};
+
+int drbd_atodb_endio(struct bio *bio, unsigned int bytes_done, int error)
+{
+	struct drbd_atodb_wait *wc = bio->bi_private;
+	struct Drbd_Conf *mdev=wc->mdev;
+	struct page *page;
+
+	if (bio->bi_size) return 1;
+
+	drbd_chk_io_error(mdev,error,TRUE);
+	if(error && wc->error == 0) wc->error=error;
+
+	if (atomic_dec_and_test(&wc->count)) {
+		complete(&wc->io_done);
+	}
+
+	page = bio->bi_io_vec[0].bv_page;
+	if(page) put_page(page);
+	bio_put(bio);
+	dec_local(mdev);
+
+	return 0;
+}
+
+#define S2W(s)	((s)<<(BM_EXT_SIZE_B-BM_BLOCK_SIZE_B-LN2_BPL))
+STATIC int drbd_atodb_ensure(struct Drbd_Conf *mdev, 
+			     struct bio **bios, 
+			     struct page **page,
+			     unsigned int *page_offset,
+			     unsigned int enr,
+			     struct drbd_atodb_wait *wc)
+{
+	int i=0,allocated_page=0;
+	struct bio *bio;
+	struct page *np;
+	sector_t on_disk_sector = enr + mdev->bc->md.md_offset + mdev->bc->md.bm_offset;
+	int offset;
+
+	// check if that enr is already covered by an already created bio.
+	while( (bio=bios[i]) ) {
+		if(bio->bi_sector == on_disk_sector) return 0;
+		i++;
+	}
+
+	bio = bio_alloc(GFP_KERNEL, 1);
+	if(bio==NULL) return -ENOMEM;
+
+	bio->bi_bdev = mdev->bc->md_bdev;
+	bio->bi_sector = on_disk_sector;
+
+	if(*page_offset == PAGE_SIZE) {
+		np = alloc_page(__GFP_HIGHMEM);
+		if(np == NULL) return -ENOMEM;
+		*page = np;
+		*page_offset = 0;
+		allocated_page=1;
+	}
+
+	offset = S2W(enr);
+	drbd_bm_get_lel( mdev, offset, 
+			 min_t(size_t,S2W(1), drbd_bm_words(mdev) - offset),
+			 page_address(*page) + *page_offset );
+
+	if(bio_add_page(bio, *page, MD_HARDSECT, *page_offset)!=MD_HARDSECT)
+		return -EIO;
+
+	if(!allocated_page) get_page(*page);
+
+	*page_offset += MD_HARDSECT;
+
+	bio->bi_private = wc;
+	bio->bi_end_io = drbd_atodb_endio;
+
+	bios[i] = bio;
+	return 0;
+}
+
 /**
  * drbd_al_to_on_disk_bm:
  * Writes the areas of the bitmap which are covered by the AL.
@@ -521,23 +628,76 @@
 {
 	int i;
 	unsigned int enr;
+	struct bio **bios;
+	struct page *page;
+	unsigned int page_offset=PAGE_SIZE;
+	struct drbd_atodb_wait wc;
 
+	bios = kzalloc(sizeof(struct bio*) * mdev->act_log->nr_elements,
+		       GFP_KERNEL);
+	if(!bios) {
+		drbd_al_to_on_disk_bm_slow(mdev);
+		return;
+	}
+
 	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
 
 	if (inc_local_if_state(mdev,Attaching)) {
 		for(i=0;i<mdev->act_log->nr_elements;i++) {
 			enr = lc_entry(mdev->act_log,i)->lc_number;
 			if(enr == LC_FREE) continue;
-			/* TODO encapsulate and optimize within drbd_bitmap
-			 * currently, if we have al-extents 16..19 active,
-			 * sector 4 will be written four times! */
-			drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT );
+			if(drbd_atodb_ensure(mdev,bios,&page,&page_offset,
+					     enr/AL_EXT_PER_BM_SECT,&wc))
+				goto abort;
 		}
 
 		lc_unlock(mdev->act_log);
 		wake_up(&mdev->al_wait);
+
+		atomic_set(&wc.count,1);
+		init_completion(&wc.io_done);
+		wc.mdev = mdev;
+		wc.error = 0;
+
+		for(i=0; bios[i]; i++) {
+			atomic_inc(&wc.count);
+			inc_local(mdev);
+
+			if (FAULT_ACTIVE( DRBD_FAULT_MD_WR )) {
+				bios[i]->bi_rw = WRITE;
+				bio_endio(bios[i],bios[i]->bi_size,-EIO);
+			} else {
+				submit_bio(WRITE, bios[i]);
+			}
+
+		}
+		drbd_blk_run_queue(bdev_get_queue(mdev->bc->md_bdev));
+
+		atomic_dec(&wc.count); // for the init_completion(.. ,1)
+		wait_for_completion(&wc.io_done);
+
 		dec_local(mdev);
+
+		if(wc.error) drbd_io_error(mdev, TRUE);
+
 	} else D_ASSERT(0);
+
+	kfree(bios);
+	return;
+
+ abort:
+	lc_unlock(mdev->act_log);
+	wake_up(&mdev->al_wait);
+	dec_local(mdev);
+
+	// free everything by calling the endio callback directly.
+	for(i=0; bios[i]; i++) {
+		inc_local(mdev);
+		bios[i]->bi_size=0;
+		drbd_atodb_endio(bios[i], MD_HARDSECT, 0);
+	}
+	kfree(bios);
+	drbd_al_to_on_disk_bm_slow(mdev); //.. and take the slow path.
 }
 
 /**



More information about the drbd-cvs mailing list