[DRBD-cvs] svn commit by phil - r2112 - trunk/drbd - With the first try of the async bitmap stuff, the

drbd-cvs at lists.linbit.com drbd-cvs at lists.linbit.com
Sun Mar 12 09:38:31 CET 2006


synce
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Author: phil
Date: 2006-03-12 09:38:31 +0100 (Sun, 12 Mar 2006)
New Revision: 2112

Modified:
   trunk/drbd/drbd_bitmap.c
Log:
With the first try of the async bitmap stuff, the
syncer was no longer terminating. Lets start over
with the sync bitmap stuff...


Modified: trunk/drbd/drbd_bitmap.c
===================================================================
--- trunk/drbd/drbd_bitmap.c	2006-03-11 15:59:27 UTC (rev 2111)
+++ trunk/drbd/drbd_bitmap.c	2006-03-12 08:38:31 UTC (rev 2112)
@@ -29,7 +29,6 @@
 #include <linux/bitops.h>
 #include <linux/vmalloc.h>
 #include <linux/string.h> // for memset
-#include <linux/swap.h>   // for total_swapcache_pages
 
 #include <linux/drbd.h>
 #include "drbd_int.h"
@@ -58,13 +57,12 @@
 
 /*
  * NOTE
- *  Access to the *bm_pages is protected by bm_lock.
+ *  Access to the *bm is protected by bm_lock.
  *  It is safe to read the other members within the lock.
  *
  *  drbd_bm_set_bit is called from bio_endio callbacks,
  *  so there we need a spin_lock_irqsave.
  *  Everywhere else we need a spin_lock_irq.
- *  And we need the kmap_atomic :-(
  *
  * FIXME
  *  Actually you need to serialize all resize operations.
@@ -80,28 +78,17 @@
  *  than have two resize operations interfere somewhen.
  */
 struct drbd_bitmap {
-	struct page **bm_pages;
+	unsigned long *bm;
 	spinlock_t bm_lock;
-	/* WARNING unsigned long bm_fo and friends:
-	 * 32bit number of bit offset is just enough for 512 MB bitmap.
-	 * it will blow up if we make the bitmap bigger...
-	 * not that it makes much sense to have a bitmap that large,
-	 * rather change the granularity to 16k or 64k or something.
-	 */
 	unsigned long bm_fo;        // next offset for drbd_bm_find_next
 	unsigned long bm_set;       // nr of set bits; THINK maybe atomic_t ?
 	unsigned long bm_bits;
 	size_t   bm_words;
-	size_t   bm_number_of_pages;
 	sector_t bm_dev_capacity;
 	struct semaphore bm_change; // serializes resize operations
 
-	atomic_t bm_async_io;
-	wait_queue_head_t bm_io_wait;
-
-	unsigned long  bm_flags;
-
 	// { REMOVE
+	unsigned long  bm_flags;     // currently debugging aid only
 	unsigned long  bm_line;
 	char          *bm_file;
 	// }
@@ -109,9 +96,7 @@
 
 // { REMOVE once we serialize all state changes properly
 #define D_BUG_ON(x)	ERR_IF(x) { dump_stack(); }
-#define BM_LOCKED      0
-#define BM_MD_IO_ERROR (BITS_PER_LONG-1) // 31? 63?
-
+#define BM_LOCKED 0
 #if 0 // simply disabled for now...
 #define MUST_NOT_BE_LOCKED() do {					\
 	if (test_bit(BM_LOCKED,&b->bm_flags)) {				\
@@ -173,6 +158,7 @@
 // has been very helpful to indicate that rs_total and rs_left have been
 // used in a non-smp safe way...
 #define BM_PARANOIA_CHECK() do {						\
+	D_ASSERT(b->bm[b->bm_words] == DRBD_MAGIC);				\
 	D_ASSERT(b->bm_dev_capacity == drbd_get_capacity(mdev->this_bdev));	\
 	if ( (b->bm_set != mdev->rs_total) &&					\
 	     (b->bm_set != mdev->rs_left) ) {					\
@@ -185,6 +171,7 @@
 } while (0)
 #else
 #define BM_PARANOIA_CHECK() do {					\
+	D_ASSERT(b->bm[b->bm_words] == DRBD_MAGIC);			\
 	if (b->bm_dev_capacity != drbd_get_capacity(mdev->this_bdev)) {	\
 		ERR("%s:%d: bm_dev_capacity:%llu drbd_get_capacity:%llu\n", \
 		__FILE__, __LINE__,					\
@@ -197,7 +184,6 @@
 
 #if DUMP_MD >= 3
 /* debugging aid */
-#error "FIXME"
 STATIC void bm_end_info(drbd_dev *mdev, const char* where)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
@@ -216,52 +202,9 @@
 #define bm_end_info(ignored...)	((void)(0))
 #endif
 
-#if 0
-#define catch_oob_access_start() do {	\
-	do {				\
-		if ((bm-p_addr) >= PAGE_SIZE/sizeof(long)) { \
-			printk(KERN_ALERT "drbd_bitmap.c:%u %s: p_addr:%p bm:%p %d\n", \
-					__LINE__ , __func__ , p_addr, bm, (bm-p_addr)); \
-			break;		\
-		}
-#define catch_oob_access_end()	\
-	} while(0); } while (0)
-#else
-#define catch_oob_access_start() ((void)0)
-#define catch_oob_access_end() ((void)0)
-#endif
-
-/* word offset to long pointer */
-STATIC unsigned long * bm_map_paddr(struct drbd_bitmap* b, unsigned long offset)
-{
-	struct page * page;
-	unsigned long page_nr;
-
-	// page_nr = (word*sizeof(long)) >> PAGE_SHIFT;
-	page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3);
-	BUG_ON(page_nr >= b->bm_number_of_pages);
-	page = b->bm_pages[page_nr];
-
-	return (unsigned long*) kmap_atomic(page,KM_USER0);
-}
-
-STATIC void bm_unmap(unsigned long *p_addr) {
-	kunmap_atomic(p_addr, KM_USER0);
-};
-
 /* long word offset of _bitmap_ sector */
 #define S2W(s)	((s)<<(BM_EXT_SIZE_B-BM_BLOCK_SIZE_B-LN2_BPL))
-/* word offset from start of bitmap to word number _in_page_
- * modulo longs per page
-#define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long))
- hm, well, Philipp thinks gcc might not optimze the % into & (... - 1)
- so do it explicitly:
- */
-#define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1))
 
-/* Long words per page */
-#define LWPP (PAGE_SIZE/sizeof(long))
-
 /*
  * actually most functions herein should take a struct drbd_bitmap*, not a
  * drbd_dev*, but for the debug macros I like to have the mdev around
@@ -274,113 +217,7 @@
  * But it is NOT strictly ok.
  */
 
-STATIC void bm_free_pages(struct page ** pages, unsigned long number)
-{
-	unsigned long i;
-	if (!pages)
-		return;
-
-	for(i=0;i<number;i++) {
-		if (!pages[i]) {
-			printk(KERN_ALERT "drbd: bm_free_pages tried to free "
-					  "a NULL pointer; i=%lu n=%lu\n",
-					  i, number);
-			continue;
-		}
-		__free_page(pages[i]);
-		pages[i] = NULL;
-	}
-}
-
 /*
- * "have" and "want" are NUMBER OF PAGES.
- */
-STATIC struct page ** bm_realloc_pages(struct page ** old_pages,
-				       unsigned long have,
-				       unsigned long want)
-{
-	struct page** new_pages, *page;
-	unsigned int i, bytes;
-
-	BUG_ON(have == 0 && old_pages != NULL);
-	BUG_ON(have != 0 && old_pages == NULL);
-
-	if (have == want)
-		return old_pages;
-
-	/* don't even try if it is likely to be too much */
-	if (want > have) {
-		unsigned long nfree, nleft;
-		nfree = nr_free_pages();
-		nleft = nfree + get_page_cache_size()-total_swapcache_pages;
-
-		if (nleft <= (want-have)) {
-			printk(KERN_ALERT "drbd: bm_realloc_pages: OOM!\n");
-			return NULL;
-		}
-
-		nleft -= (want-have);
-
-		/* protect me agains the infamous oom killer.
-		 * FIXME sorry, now it does no longer work on embeded boxes with less than 16MB main ram :-)
-		 */
-		if ( nleft < (10 << (20 - PAGE_SHIFT)) ) {
-			printk(KERN_ALERT "drbd: bm_realloc_pages would have left less than 10MB ram free; OOM!\n");
-			printk(KERN_ALERT "drbd: free:%lu buf+cache:%lu want:%lu have:%lu left:%lu\n"
-			,	nfree
-			,	get_page_cache_size()-total_swapcache_pages
-			,	want, have, nleft
-			);
-			return NULL;
-		}
-	}
-
-	/* To use kmalloc here is ok, as long as we support 4TB at max...
-	 * otherwise this might become bigger than 128KB, which is
-	 * the maximum for kmalloc.
-	 *
-	 * no, it is not: on 64bit boxes, sizeof(void*) == 8,
-	 * 128MB bitmap @ 4K pages -> 256K of page pointers.
-	 * ==> use vmalloc for now again.
-	 * then again, we could do something like
-	 *   if (nr_pages > watermark) vmalloc else kmalloc :*> ...
-	 * or do cascading page arrays:
-	 *   one page for the page array of the page array,
-	 *   those pages for the real bitmap pages.
-	 *   there we could even add some optimization members,
-	 *   so we won't need to kmap_atomic in bm_find_next_bit just to see
-	 *   that the page has no bits set ...
-	 */
-	bytes = sizeof(struct page*)*want;
-	new_pages = vmalloc(bytes);
-	if(!new_pages) return NULL;
-
-	memset(new_pages,0,bytes);
-	if( want >= have ) {
-		for(i=0;i<have;i++) {
-			new_pages[i] = old_pages[i];
-		}
-		for(;i<want;i++) {
-			if ( !(page = alloc_page(GFP_HIGHUSER)) ) {
-				bm_free_pages(new_pages + have, i - have);
-				vfree(new_pages);
-				return NULL;
-			}
-			new_pages[i] = page;
-		}
-	} else {
-		for(i=0;i<want;i++) {
-			new_pages[i] = old_pages[i];
-		}
-		/* NOT HERE, we are outside the spinlock!
-		bm_free_pages(old_pages + want, have - want);
-		*/
-	}
-
-	return new_pages;
-}
-
-/*
  * called on driver init only. TODO call when a device is created.
  * allocates the drbd_bitmap, and stores it in mdev->bitmap.
  */
@@ -394,7 +231,6 @@
 	memset(b,0,sizeof(*b));
 	spin_lock_init(&b->bm_lock);
 	init_MUTEX(&b->bm_change);
-	init_waitqueue_head(&b->bm_io_wait);
 	mdev->bitmap = b;
 	return 0;
 }
@@ -415,8 +251,7 @@
 	 *
 	D_BUG_ON(mdev->bitmap->bm);
 	 */
-	bm_free_pages(mdev->bitmap->bm_pages,mdev->bitmap->bm_number_of_pages);
-	vfree(mdev->bitmap->bm_pages);
+	vfree(mdev->bitmap->bm);
 	kfree(mdev->bitmap);
 	mdev->bitmap = NULL;
 }
@@ -431,25 +266,17 @@
 	const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) -1;
 	size_t w = b->bm_bits >> LN2_BPL;
 	int cleared=0;
-	unsigned long *p_addr, *bm;
 
-	p_addr = bm_map_paddr(b,w);
-	bm = p_addr + MLPP(w);
 	if ( w < b->bm_words ) {
-		catch_oob_access_start();
-		cleared = hweight_long(*bm & ~mask);
-		*bm &= mask;
-		catch_oob_access_end();
-		w++; bm++;
+		cleared = hweight_long(b->bm[w] & ~mask);
+		b->bm[w++] &= mask;
 	}
 
 	if ( w < b->bm_words ) {
-		catch_oob_access_start();
-		cleared += hweight_long(*bm);
-		*bm=0;
-		catch_oob_access_end();
+		cleared += hweight_long(b->bm[w]);
+		b->bm[w++]=0;
 	}
-	bm_unmap(p_addr);
+
 	return cleared;
 }
 
@@ -457,80 +284,31 @@
 {
 	const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) -1;
 	size_t w = b->bm_bits >> LN2_BPL;
-	unsigned long *p_addr, *bm;
 
-	p_addr = bm_map_paddr(b,w);
-	bm = p_addr + MLPP(w);
 	if ( w < b->bm_words ) {
-		catch_oob_access_start();
-		*bm |= ~mask;
-		bm++; w++;
-		catch_oob_access_end();
+		b->bm[w++] |= ~mask;
 	}
 
 	if ( w < b->bm_words ) {
-		catch_oob_access_start();
-		*bm = ~(0UL);
-		catch_oob_access_end();
+		b->bm[w++] = ~(0UL);
 	}
-	bm_unmap(p_addr);
 }
 
-STATIC unsigned long bm_count_bits(struct drbd_bitmap * b, int just_read)
+STATIC unsigned long bm_count_bits(struct drbd_bitmap * b)
 {
-	unsigned long *p_addr, *bm, offset = 0;
+	unsigned long *bm = b->bm;
+	unsigned long *ep = b->bm + b->bm_words;
 	unsigned long bits = 0;
-	unsigned long i,do_now;
 
-	while ( offset < b->bm_words ) {
-		i = do_now = min_t( size_t, b->bm_words-offset, LWPP );
-		p_addr = bm_map_paddr(b,offset);
-		bm = p_addr + MLPP(offset);
-		while(i--) {
-			catch_oob_access_start();
-			/* on little endian, this is *bm = *bm;
-			 * and should be optimized away by the compiler */
-			if (just_read) *bm = lel_to_cpu(*bm);
-			bits += hweight_long(*bm++);
-			catch_oob_access_end();
-		}
-		bm_unmap(p_addr);
-		offset += do_now;
+	while ( bm < ep ) {
+		bits += hweight_long(*bm++);
 	}
 
 	return bits;
 }
 
-/* offset and len in long words.*/
-STATIC void bm_memset(struct drbd_bitmap * b, size_t offset, int c, size_t len)
-{
-	unsigned long *p_addr, *bm;
-	size_t do_now, end;
+#define BM_SECTORS_PER_BIT (BM_BLOCK_SIZE/512)
 
-	end = offset+len;
-
-	if( end > b->bm_words ) {
-		printk(KERN_ALERT "drbd: bm_memset end > bm_words\n");
-		return;
-	}
-
-	while (offset < end) {
-		do_now = min_t(size_t,ALIGN(offset+1,LWPP),end) - offset;
-		p_addr = bm_map_paddr(b,offset);
-		bm = p_addr + MLPP(offset);
-		catch_oob_access_start();
-		if ( bm+do_now > p_addr + LWPP ) {
-			printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n",
-					p_addr, bm, do_now);
-			break;
-		}
-		memset(bm,c,do_now*sizeof(long));
-		catch_oob_access_end();
-		bm_unmap(p_addr);
-		offset += do_now;
-	}
-}
-
 /*
  * make sure the bitmap has enough room for the attached storage,
  * if neccessary, resize.
@@ -542,9 +320,7 @@
 int drbd_bm_resize(drbd_dev *mdev, sector_t capacity)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	unsigned long bits, words, owords, obits, *p_addr, *bm;
-	unsigned long want, have, onpages; // number of pages
-	struct page **npages, **opages = NULL;
+	unsigned long bits, bytes, words, *nbm, *obm = 0;
 	int err = 0, growing;
 
 	ERR_IF(!b) return -ENOMEM;
@@ -561,89 +337,79 @@
 
 	if (capacity == 0) {
 		spin_lock_irq(&b->bm_lock);
-		opages = b->bm_pages;
-		onpages = b->bm_number_of_pages;
-		owords = b->bm_words;
-		b->bm_pages = NULL;
-		b->bm_number_of_pages =
+		obm = b->bm;
+		b->bm = NULL;
 		b->bm_fo    =
 		b->bm_set   =
 		b->bm_bits  =
 		b->bm_words =
 		b->bm_dev_capacity = 0;
 		spin_unlock_irq(&b->bm_lock);
-		bm_free_pages(opages, onpages);
-		vfree(opages);
-		goto out;
-	}
-	bits  = BM_SECT_TO_BIT(ALIGN(capacity,BM_SECT_PER_BIT));
-
-	/* if we would use
-	   words = ALIGN(bits,BITS_PER_LONG) >> LN2_BPL;
-	   a 32bit host could present the wrong number of words
-	   to a 64bit host.
-	*/
-	words = ALIGN(bits,64) >> LN2_BPL;
-
-	D_ASSERT((u64)bits <= (((u64)mdev->bc->md.md_size_sect-MD_BM_OFFSET) << 12));
-
-	/* one extra long to catch off by one errors */
-	want = ALIGN( (words+1)*sizeof(long) ,PAGE_SIZE) >> PAGE_SHIFT;
-	have = b->bm_number_of_pages;
-	if (want == have) {
-		D_ASSERT(b->bm_pages != NULL);
-		npages = b->bm_pages;
+		goto free_obm;
 	} else {
-		npages = bm_realloc_pages(b->bm_pages,have,want);
-	}
+		bits = BM_SECT_TO_BIT(ALIGN(capacity,BM_SECTORS_PER_BIT));
+		DUMPI(bits);
 
-	if (!npages) {
-		err = -ENOMEM;
-		goto out;
-	}
+		/* if we would use
+		   words = ALIGN(bits,BITS_PER_LONG) >> LN2_BPL;
+		   a 32bit host could present the wrong number of words
+		   to a 64bit host.
+		*/
+		words = ALIGN(bits,64) >> LN2_BPL;
 
-	spin_lock_irq(&b->bm_lock);
-	opages = b->bm_pages;
-	owords = b->bm_words;
-	obits  = b->bm_bits;
+		D_ASSERT((u64)bits <= (((u64)mdev->bc->md.md_size_sect-MD_BM_OFFSET) << 12));
 
-	growing = bits > obits;
-	if (opages) {
-		bm_set_surplus(b);
+		if ( words == b->bm_words ) {
+			/* optimize: capacity has changed,
+			 * but only within one long word worth of bits.
+			 * just update the bm_dev_capacity and bm_bits members.
+			 */
+			spin_lock_irq(&b->bm_lock);
+			b->bm_bits    = bits;
+			b->bm_dev_capacity = capacity;
+			b->bm_set -= bm_clear_surplus(b);
+			bm_end_info(mdev, __FUNCTION__ );
+			spin_unlock_irq(&b->bm_lock);
+			goto out;
+		} else {
+		        /* one extra long to catch off by one errors */
+			bytes = (words+1)*sizeof(long);
+			nbm = vmalloc(bytes);
+			if (!nbm) {
+				ERR("bitmap: failed to vmalloc %lu bytes\n",bytes);
+				err = -ENOMEM;
+				goto out;
+			}
+		}
+		spin_lock_irq(&b->bm_lock);
+		obm = b->bm;
+		// brgs. move several MB within spinlock...
+		// FIXME this should go into userspace!
+		if (obm) {
+			bm_set_surplus(b);
+			D_ASSERT(b->bm[b->bm_words] == DRBD_MAGIC);
+			memcpy(nbm,obm,min_t(size_t,b->bm_words,words)*sizeof(long));
+		}
+		growing = words > b->bm_words;
+		if (growing) { // set all newly allocated bits
+			// start at -1, just to be sure.
+			memset( nbm + (b->bm_words?:1)-1 , 0xff,
+				(words - ((b->bm_words?:1)-1)) * sizeof(long) );
+			b->bm_set  += bits - b->bm_bits;
+		}
+		nbm[words] = DRBD_MAGIC;
+		b->bm = nbm;
+		b->bm_bits  = bits;
+		b->bm_words = words;
+		b->bm_dev_capacity = capacity;
+		bm_clear_surplus(b);
+		if( !growing ) b->bm_set = bm_count_bits(b);
+		bm_end_info(mdev, __FUNCTION__ );
+		spin_unlock_irq(&b->bm_lock);
+		INFO("resync bitmap: bits=%lu words=%lu\n",bits,words);
 	}
-
-	b->bm_pages = npages;
-	b->bm_number_of_pages = want;
-	b->bm_bits  = bits;
-	b->bm_words = words;
-	b->bm_dev_capacity = capacity;
-
-	if( growing ) {
-		bm_memset(b,owords,0xff,words-owords);
-		b->bm_set += bits - obits;
-	}
-
-	if ( want < have ) {
-		/* implicit: (opages != NULL) && (opages != npages) */
-		bm_free_pages(opages + want, have - want);
-	}
-
-	p_addr = bm_map_paddr(b,words);
-	bm = p_addr + MLPP(words);
-	catch_oob_access_start();
-	*bm = DRBD_MAGIC;
-	catch_oob_access_end();
-	bm_unmap(p_addr);
-
-	(void)bm_clear_surplus(b);
-	if( !growing )
-		b->bm_set = bm_count_bits(b, 0 /* not just read */ );
-
-	bm_end_info(mdev, __FUNCTION__ );
-	spin_unlock_irq(&b->bm_lock);
-	if (opages != npages) vfree(opages);
-	INFO("resync bitmap: bits=%lu words=%lu\n",bits,words);
-
+ free_obm:
+	vfree(obm); // vfree(NULL) is noop
  out:
 	up(&b->bm_change);
 	return err;
@@ -664,7 +430,6 @@
 	unsigned long flags;
 
 	ERR_IF(!b) return 0;
-	ERR_IF(!b->bm_pages) return 0;
 	// MUST_BE_LOCKED(); well. yes. but ...
 
 	spin_lock_irqsave(&b->bm_lock,flags);
@@ -678,7 +443,6 @@
 {
 	struct drbd_bitmap *b = mdev->bitmap;
 	ERR_IF(!b) return 0;
-	ERR_IF(!b->bm_pages) return 0;
 
 	/* FIXME
 	 * actually yes. really. otherwise it could just change its size ...
@@ -696,42 +460,34 @@
 			unsigned long* buffer )
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	unsigned long *p_addr, *bm;
+	unsigned long *bm;
 	unsigned long word, bits;
-	size_t end, do_now;
+	size_t n = number;
 
-	end = offset + number;
-
+	if (number == 0) return;
 	ERR_IF(!b) return;
-	ERR_IF(!b->bm_pages) return;
-	D_BUG_ON(offset >= b->bm_words);
-	D_BUG_ON(end    >  b->bm_words);
+	ERR_IF(!b->bm) return;
+	D_BUG_ON(offset        >= b->bm_words);
+	D_BUG_ON(offset+number >  b->bm_words);
+	D_BUG_ON(number > PAGE_SIZE/sizeof(long));
 
 	MUST_BE_LOCKED();
 
 	spin_lock_irq(&b->bm_lock);
 	// BM_PARANOIA_CHECK(); no.
-	while(offset < end) {
-		do_now = min_t(size_t,ALIGN(offset+1,LWPP),end) - offset;
-		p_addr = bm_map_paddr(b,offset);
-		bm = p_addr + MLPP(offset);
-		offset += do_now;
-		while(do_now--) {
-			catch_oob_access_start();
-			bits = hweight_long(*bm);
-			word = *bm | lel_to_cpu(*buffer++);
-			*bm++ = word;
-			b->bm_set += hweight_long(word) - bits;
-			catch_oob_access_end();
-		}
-		bm_unmap(p_addr);
+	bm = b->bm + offset;
+	while(n--) {
+		bits = hweight_long(*bm);
+		word = *bm | lel_to_cpu(*buffer++);
+		*bm++ = word;
+		b->bm_set += hweight_long(word) - bits;
 	}
 	/* with 32bit <-> 64bit cross-platform connect
 	 * this is only correct for current usage,
 	 * where we _know_ that we are 64 bit aligned,
 	 * and know that this function is used in this way, too...
 	 */
-	if (end == b->bm_words) {
+	if (offset+number == b->bm_words) {
 		b->bm_set -= bm_clear_surplus(b);
 		bm_end_info(mdev, __FUNCTION__ );
 	}
@@ -745,42 +501,34 @@
 		      unsigned long* buffer )
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	unsigned long *p_addr, *bm;
+	unsigned long *bm;
 	unsigned long word, bits;
-	size_t end, do_now;
+	size_t n = number;
 
-	end = offset + number;
-
+	if (number == 0) return;
 	ERR_IF(!b) return;
-	ERR_IF(!b->bm_pages) return;
-	D_BUG_ON(offset >= b->bm_words);
-	D_BUG_ON(end    >  b->bm_words);
+	ERR_IF(!b->bm) return;
+	D_BUG_ON(offset        >= b->bm_words);
+	D_BUG_ON(offset+number >  b->bm_words);
+	D_BUG_ON(number > PAGE_SIZE/sizeof(long));
 
 	MUST_BE_LOCKED();
 
 	spin_lock_irq(&b->bm_lock);
 	// BM_PARANOIA_CHECK(); no.
-	while(offset < end) {
-		do_now = min_t(size_t,ALIGN(offset+1,LWPP),end) - offset;
-		p_addr = bm_map_paddr(b,offset);
-		bm = p_addr + MLPP(offset);
-		offset += do_now;
-		while(do_now--) {
-			catch_oob_access_start();
-			bits = hweight_long(*bm);
-			word = lel_to_cpu(*buffer++);
-			*bm++ = word;
-			b->bm_set += hweight_long(word) - bits;
-			catch_oob_access_end();
-		}
-		bm_unmap(p_addr);
+	bm = b->bm + offset;
+	while(n--) {
+		bits = hweight_long(*bm);
+		word = lel_to_cpu(*buffer++);
+		*bm++ = word;
+		b->bm_set += hweight_long(word) - bits;
 	}
 	/* with 32bit <-> 64bit cross-platform connect
 	 * this is only correct for current usage,
 	 * where we _know_ that we are 64 bit aligned,
 	 * and know that this function is used in this way, too...
 	 */
-	if (end == b->bm_words) {
+	if (offset+number == b->bm_words) {
 		b->bm_set -= bm_clear_surplus(b);
 		bm_end_info(mdev, __FUNCTION__ );
 	}
@@ -794,16 +542,14 @@
 		      unsigned long* buffer )
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	unsigned long *p_addr, *bm;
-	size_t end, do_now;
+	unsigned long *bm;
 
-	end = offset + number;
-
+	if (number == 0) return;
 	ERR_IF(!b) return;
-	ERR_IF(!b->bm_pages) return;
-
-	if ( (offset >= b->bm_words) ||
-	     (end    >  b->bm_words) ||
+	ERR_IF(!b->bm) return;
+	if ( (offset        >= b->bm_words) ||
+	     (offset+number >  b->bm_words) ||
+	     (number > PAGE_SIZE/sizeof(long)) ||
 	     (number <= 0) ) {
 		// yes, there is "%z", but that gives compiler warnings...
 		ERR("offset=%lu number=%lu bm_words=%lu\n",
@@ -817,18 +563,8 @@
 
 	spin_lock_irq(&b->bm_lock);
 	BM_PARANOIA_CHECK();
-	while(offset < end) {
-		do_now = min_t(size_t,ALIGN(offset+1,LWPP),end) - offset;
-		p_addr = bm_map_paddr(b,offset);
-		bm = p_addr + MLPP(offset);
-		offset += do_now;
-		while(do_now--) {
-			catch_oob_access_start();
-			*buffer++ = cpu_to_lel(*bm++);
-			catch_oob_access_end();
-		}
-		bm_unmap(p_addr);
-	}
+	bm = b->bm + offset;
+	while(number--) *buffer++ = cpu_to_lel(*bm++);
 	spin_unlock_irq(&b->bm_lock);
 }
 
@@ -837,62 +573,22 @@
 {
 	struct drbd_bitmap *b = mdev->bitmap;
 	ERR_IF(!b) return;
-	ERR_IF(!b->bm_pages) return;
+	ERR_IF(!b->bm) return;
 
+	D_BUG_ON(!b);
+	if (b->bm_bits == 0) return;
+	D_BUG_ON(!b->bm);
+
 	MUST_BE_LOCKED();
 
 	spin_lock_irq(&b->bm_lock);
-	bm_memset(b,0,0xff,b->bm_words);
-	(void)bm_clear_surplus(b);
+	BM_PARANOIA_CHECK();
+	memset(b->bm,0xff,b->bm_words*sizeof(long));
+	bm_clear_surplus(b);
 	b->bm_set = b->bm_bits;
 	spin_unlock_irq(&b->bm_lock);
 }
 
-int drbd_bm_async_io_complete(struct bio *bio, unsigned int bytes_done, int error)
-{
-	struct drbd_bitmap *b = bio->bi_private;
-
-	if (bio->bi_size)
-		return 1;
-
-	if (error) {
-		/* doh. what now?
-		 * for now, set all bits, and flag MD_IO_ERROR
-		 */
-		/* FIXME kmap_atomic memset etc. pp. */
-		__set_bit(BM_MD_IO_ERROR,&b->bm_flags);
-	}
-	if (atomic_dec_and_test(&b->bm_async_io))
-		wake_up(&b->bm_io_wait);
-
-	bio_put(bio);
-
-	return 0;
-}
-
-STATIC void drbd_bm_page_io_async(drbd_dev *mdev, struct drbd_bitmap *b, int page_nr, int rw)
-{
-	/* we are process context. we always get a bio */
-	struct bio *bio = bio_alloc(GFP_KERNEL, 1);
-	unsigned int len;
-	sector_t on_disk_sector = mdev->bc->md.md_offset + mdev->bc->md.bm_offset;
-	on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9);
-
-	/* this might happen with very small flexible external meta data device */
-	len = min_t(unsigned int, PAGE_SIZE,
-		(drbd_md_last_sector(mdev->bc) - on_disk_sector + 1)<<9);
-
-	D_DUMPLU(on_disk_sector);
-	D_DUMPI(len);
-
-	bio->bi_bdev = mdev->bc->md_bdev;
-	bio->bi_sector = on_disk_sector;
-	bio_add_page(bio, b->bm_pages[page_nr], len, 0);
-	bio->bi_private = b;
-	bio->bi_end_io = drbd_bm_async_io_complete;
-	submit_bio(rw, bio);
-}
-
 /* read one sector of the on disk bitmap into memory.
  * on disk bitmap is little endian.
  * @enr is _sector_ offset from start of on disk bitmap (aka bm-extent nr).
@@ -900,13 +596,11 @@
  */
 int drbd_bm_read_sect(drbd_dev *mdev,unsigned long enr)
 {
-	/* addition of sector_t/u64/s32... works anyways */
 	sector_t on_disk_sector = mdev->bc->md.md_offset + mdev->bc->md.bm_offset + enr;
 	int bm_words, num_words, offset, err  = 0;
 
 	// MUST_BE_LOCKED(); not neccessarily global ...
 
-	D_DUMPLU(on_disk_sector);
 	down(&mdev->md_io_mutex);
 	if(drbd_md_sync_page_io(mdev,mdev->bc,on_disk_sector,READ)) {
 		bm_words  = drbd_bm_words(mdev);
@@ -935,50 +629,12 @@
 
 /**
  * drbd_bm_read: Read the whole bitmap from its on disk location.
- *
- * currently only called from "drbd_ioctl_set_disk"
- * FIXME need to be able to return an error!!
- *
  */
-# if defined(__LITTLE_ENDIAN)
-	/* nothing to do, on disk == in memory */
-# define bm_cpu_to_lel(x) ((void)0)
-# else
-void bm_cpu_to_lel(struct drbd_bitmap *b)
+void drbd_bm_read(struct Drbd_Conf *mdev)
 {
-	/* need to cpu_to_lel all the pages ...
-	 * this may be optimized by using
-	 * cpu_to_lel(-1) == -1 and cpu_to_lel(0) == 0;
-	 * the following is still not optimal, but better than nothing */
-	if (b->bm_set == 0) {
-		/* no page at all; avoid swap if all is 0 */
-		i = b->bm_number_of_pages;
-	} else if (b->bm_set == b->bm_bits) {
-		/* only the last page */
-		i = b->bm_number_of_pages -1;
-	} else {
-		/* all pages */
-		i = 0;
-	}
-	for (; i < b->bm_number_of_pages; i++) {
-		unsigned long *bm;
-		p_addr = kmap_atomic(b->bm_pages[i],KM_USER0)
-		for (bm = p_addr; bm < p_addr+PAGE_SIZE/sizeof(long); bm++) {
-			*bm = cpu_to_lel(*bm);
-		}
-		kunmap_atomic(p_addr);
-	}
-}
-# endif
-/* lel_to_cpu == cpu_to_lel */
-# define bm_lel_to_cpu(x) bm_cpu_to_lel(x)
-
-STATIC void drbd_bm_rw(struct Drbd_Conf *mdev, int rw)
-{
 	struct drbd_bitmap *b = mdev->bitmap;
-	/* sector_t sector; */
-	int bm_words, num_sectors, i;
-	unsigned long now;
+	sector_t sector;
+	int bm_words, num_sectors;
 	char ppb[10];
 
 	MUST_BE_LOCKED();
@@ -986,62 +642,15 @@
 	bm_words    = drbd_bm_words(mdev);
 	num_sectors = (bm_words*sizeof(long) + 511) >> 9;
 
-	/* OK, I manipulate the bitmap low level,
-	 * and I expect to be the exclusive user.
-	 * If not, I am really in a bad mood...
-	 * to catch such bugs early, make all people who want to access the
-	 * bitmap while I read it dereference a NULL pointer :->
-	 */
-	mdev->bitmap = NULL;
-
-	if(rw == WRITE)	bm_cpu_to_lel(b);
-
-	now = jiffies;
-	atomic_set(&b->bm_async_io, b->bm_number_of_pages);
-	for (i = 0; i < b->bm_number_of_pages; i++) {
-		/* let the layers below us try to merge these bios... */
-		drbd_bm_page_io_async(mdev,b,i,rw);
+	for (sector = 0; sector < num_sectors; sector++) {
+		// FIXME do something on io error here?
+		drbd_bm_read_sect(mdev,sector);
 	}
 
-	blk_run_queue(bdev_get_queue(mdev->bc->md_bdev));
-	wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0);
-	INFO("%s of bitmap took %lu jiffies\n",
-	     rw == READ ? "reading" : "writing", jiffies - now);
-
-	if (test_bit(BM_MD_IO_ERROR,&b->bm_flags)) {
-		/* FIXME correct handling of this.
-		 * detach?
-		 */
-		ALERT("we had at least one MD IO ERROR during bitmap IO\n");
-		drbd_chk_io_error(mdev, 1);
-		drbd_io_error(mdev);
-	}
-
-	now = jiffies;
-	/* just read, if neccessary adjust endianness */
-	if(rw == WRITE) {
-		bm_lel_to_cpu(b);
-	} else /* rw == READ */ {
-		b->bm_set = bm_count_bits(b, 1);
-		INFO("recounting of set bits took additional %lu jiffies\n", 
-		     jiffies - now);
-	}
-
-	/* ok, done,
-	 * now it is visible again
-	 */
-
-	mdev->bitmap = b;
-
 	INFO("%s marked out-of-sync by on disk bit-map.\n",
 	     ppsize(ppb,drbd_bm_total_weight(mdev) << (BM_BLOCK_SIZE_B-10)) );
 }
 
-void drbd_bm_read(struct Drbd_Conf *mdev)
-{
-	drbd_bm_rw(mdev, READ);
-}
-
 /**
  * drbd_bm_write_sect: Writes a 512 byte piece of the bitmap to its
  * on disk location. On disk bitmap is little endian.
@@ -1069,7 +678,6 @@
 	}
 	drbd_bm_get_lel( mdev, offset, num_words,
 			 page_address(mdev->md_io_page) );
-	D_DUMPLU(on_disk_sector);
 	if (!drbd_md_sync_page_io(mdev,mdev->bc,on_disk_sector,WRITE)) {
 		int i;
 		err = -EIO;
@@ -1088,27 +696,25 @@
 
 /**
  * drbd_bm_write: Write the whole bitmap to its on disk location.
- *
- * called at various occasions:
- *
-int drbd_determin_dev_size(struct Drbd_Conf* mdev)
-int drbd_ioctl_set_disk(struct Drbd_Conf *mdev,
-			struct ioctl_disk_config * arg)
-	case DRBD_IOCTL_INVALIDATE:
-	case DRBD_IOCTL_INVALIDATE_REM:
-	case DRBD_IOCTL_SET_DISK_SIZE:
-int _drbd_send_bitmap(drbd_dev *mdev)
-STATIC int drbd_sync_handshake(drbd_dev *mdev, Drbd_Parameter_Packet *p)
-STATIC int receive_BecomeSyncTarget(drbd_dev *mdev, Drbd_Header *h)
-STATIC int receive_BecomeSyncSource(drbd_dev *mdev, Drbd_Header *h)
-STATIC int receive_param(drbd_dev *mdev, Drbd_Header *h)
- *
- * FIXME need to be able to return an error!!
  */
-
 void drbd_bm_write(struct Drbd_Conf *mdev)
 {
-	drbd_bm_rw(mdev, WRITE);
+	struct drbd_bitmap *b = mdev->bitmap;
+	sector_t sector;
+	int bm_words, num_sectors;
+
+	MUST_BE_LOCKED();
+
+	bm_words    = drbd_bm_words(mdev);
+	num_sectors = (bm_words*sizeof(long) + 511) >> 9;
+
+	for (sector = 0; sector < num_sectors; sector++) {
+		// FIXME do something on io error here?
+		drbd_bm_write_sect(mdev,sector);
+	}
+
+	INFO("%lu KB now marked out-of-sync by on disk bit-map.\n",
+	      drbd_bm_total_weight(mdev) << (BM_BLOCK_SIZE_B-10) );
 }
 
 /* clear all bits in the bitmap */
@@ -1117,12 +723,13 @@
 	struct drbd_bitmap *b = mdev->bitmap;
 
 	ERR_IF(!b) return;
-	ERR_IF(!b->bm_pages) return;
+	ERR_IF(!b->bm) return;
 
-	MUST_BE_LOCKED();
+	MUST_BE_LOCKED();						\
 
 	spin_lock_irq(&b->bm_lock);
-	bm_memset(b,0,0x00,b->bm_words);
+	BM_PARANOIA_CHECK();
+	memset(b->bm,0,b->bm_words*sizeof(long));
 	b->bm_set = 0;
 	spin_unlock_irq(&b->bm_lock);
 }
@@ -1145,44 +752,29 @@
 /* NOTE
  * find_first_bit returns int, we return unsigned long.
  * should not make much difference anyways, but ...
- *
  * this returns a bit number, NOT a sector!
  */
-#define BPP_MASK ((1UL << (PAGE_SHIFT+3) ) - 1)
 unsigned long drbd_bm_find_next(drbd_dev *mdev)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
 	unsigned long i = -1UL;
-	unsigned long *p_addr;
-	unsigned long bit_offset; //bit offset of the mapped page.
 
 	ERR_IF(!b) return i;
-	ERR_IF(!b->bm_pages) return i;
+	ERR_IF(!b->bm) return i;
 
 	spin_lock_irq(&b->bm_lock);
 	BM_PARANOIA_CHECK();
-	if (b->bm_fo > b->bm_bits) {
+	if (b->bm_fo < b->bm_bits) {
+		i = find_next_bit(b->bm,b->bm_bits,b->bm_fo);
+	} else if (b->bm_fo > b->bm_bits) {
 		ERR("bm_fo=%lu bm_bits=%lu\n",b->bm_fo, b->bm_bits);
-	} else {
-		while (b->bm_fo < b->bm_bits) {
-			unsigned long offset;
-			bit_offset = b->bm_fo & ~BPP_MASK; // bit offset of the page
-			offset = bit_offset >> LN2_BPL;    // word offset of the page
-			p_addr = bm_map_paddr(b,offset);
-			i = find_next_bit(p_addr,PAGE_SIZE*8,b->bm_fo&BPP_MASK);
-			bm_unmap(p_addr);
-			if (i < PAGE_SIZE*8) {
-				i = bit_offset + i;
-				if( i >= b->bm_bits ) break;
-				b->bm_fo = i+1;
-				goto found;
-			}
-			b->bm_fo = bit_offset + PAGE_SIZE*8;
-		}
+	}
+	if (i >= b->bm_bits) {
 		i = -1UL;
 		b->bm_fo = 0;
+	} else {
+		b->bm_fo = i+1;
 	}
- found:
 	spin_unlock_irq(&b->bm_lock);
 	return i;
 }
@@ -1203,6 +795,7 @@
 	spin_unlock_irq(&b->bm_lock);
 }
 
+
 int drbd_bm_rs_done(drbd_dev *mdev)
 {
 	return mdev->bitmap->bm_fo == 0;
@@ -1216,11 +809,9 @@
 int drbd_bm_set_bit(drbd_dev *mdev, const unsigned long bitnr)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	unsigned long *p_addr;
 	int i;
-
 	ERR_IF(!b) return 1;
-	ERR_IF(!b->bm_pages) return 1;
+	ERR_IF(!b->bm) return 1;
 
 /*
  * only called from drbd_set_out_of_sync.
@@ -1240,10 +831,7 @@
 		ERR("bitnr=%lu bm_bits=%lu\n",bitnr, b->bm_bits);
 		i = 0;
 	} else {
-		unsigned long offset = bitnr>>LN2_BPL;
-		p_addr = bm_map_paddr(b,offset);
-		i = (0 != __test_and_set_bit(bitnr & BPP_MASK, p_addr));
-		bm_unmap(p_addr);
+		i = (0 != __test_and_set_bit(bitnr, b->bm));
 		b->bm_set += !i;
 	}
 	spin_unlock_irq(&b->bm_lock);
@@ -1256,12 +844,10 @@
 int drbd_bm_clear_bit(drbd_dev *mdev, const unsigned long bitnr)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	unsigned long *p_addr;
 	unsigned long flags;
 	int i;
-
 	ERR_IF(!b) return 0;
-	ERR_IF(!b->bm_pages) return 0;
+	ERR_IF(!b->bm) return 0;
 
 	spin_lock_irqsave(&b->bm_lock,flags);
 	BM_PARANOIA_CHECK();
@@ -1270,10 +856,7 @@
 		ERR("bitnr=%lu bm_bits=%lu\n",bitnr, b->bm_bits);
 		i = 0;
 	} else {
-		unsigned long offset = bitnr>>LN2_BPL;
-		p_addr = bm_map_paddr(b,offset);
-		i = (0 != __test_and_clear_bit(bitnr & BPP_MASK, p_addr));
-		bm_unmap(p_addr);
+		i = (0 != __test_and_clear_bit(bitnr, b->bm));
 		b->bm_set -= i;
 	}
 	spin_unlock_irqrestore(&b->bm_lock,flags);
@@ -1299,19 +882,14 @@
 int drbd_bm_test_bit(drbd_dev *mdev, const unsigned long bitnr)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	unsigned long *p_addr;
 	int i;
-
 	ERR_IF(!b) return 0;
-	ERR_IF(!b->bm_pages) return 0;
+	ERR_IF(!b->bm) return 0;
 
 	spin_lock_irq(&b->bm_lock);
 	BM_PARANOIA_CHECK();
 	if (bitnr < b->bm_bits) {
-		unsigned long offset = bitnr>>LN2_BPL;
-		p_addr = bm_map_paddr(b,offset);
-		i = test_bit(bitnr & BPP_MASK, p_addr);
-		bm_unmap(p_addr);
+		i = test_bit(bitnr, b->bm) ? 1 : 0;
 	} else if (bitnr == b->bm_bits) {
 		i = -1;
 	} else /* (bitnr > b->bm_bits) */ {
@@ -1342,11 +920,9 @@
 	struct drbd_bitmap *b = mdev->bitmap;
 	int count, s, e;
 	unsigned long flags;
-	unsigned long *p_addr, *bm;
 
 	ERR_IF(!b) return 0;
-	ERR_IF(!b->bm_pages) return 0;
-
+	ERR_IF(!b->bm) return 0;
 	spin_lock_irqsave(&b->bm_lock,flags);
 	BM_PARANOIA_CHECK();
 
@@ -1354,15 +930,9 @@
 	e = min((size_t)S2W(enr+1),b->bm_words);
 	count = 0;
 	if (s < b->bm_words) {
+		const unsigned long* w = b->bm+s;
 		int n = e-s;
-		p_addr = bm_map_paddr(b,s);
-		bm = p_addr + MLPP(s);
-		while (n--) {
-			catch_oob_access_start();
-			count += hweight_long(*bm++);
-			catch_oob_access_end();
-		}
-		bm_unmap(p_addr);
+		while (n--) count += hweight_long(*w++);
 	} else {
 		ERR("start offset (%d) too large in drbd_bm_e_weight\n", s);
 	}
@@ -1377,11 +947,10 @@
 unsigned long drbd_bm_ALe_set_all(drbd_dev *mdev, unsigned long al_enr)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	unsigned long *p_addr, *bm;
 	unsigned long weight;
-	int count, s, e, i, do_now;
+	int count, s, e;
 	ERR_IF(!b) return 0;
-	ERR_IF(!b->bm_pages) return 0;
+	ERR_IF(!b->bm) return 0;
 
 	MUST_BE_LOCKED();
 
@@ -1391,23 +960,14 @@
 
 	s = al_enr * BM_WORDS_PER_AL_EXT;
 	e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words);
-	/* assert that s and e are on the same page */
-	D_ASSERT(  (e-1) >> (PAGE_SHIFT - LN2_BPL + 3)
-		==  s    >> (PAGE_SHIFT - LN2_BPL + 3) );
 	count = 0;
 	if (s < b->bm_words) {
-		i = do_now = e-s;
-		p_addr = bm_map_paddr(b,s);
-		bm = p_addr + MLPP(s);
-		while (i--) {
-			catch_oob_access_start();
-			count += hweight_long(*bm);
-			*bm = -1UL;
-			catch_oob_access_end();
-			bm++;
-		}
-		bm_unmap(p_addr);
-		b->bm_set += do_now*BITS_PER_LONG - count;
+		const unsigned long* w = b->bm+s;
+		int n = e-s;
+		while (n--) count += hweight_long(*w++);
+		n = e-s;
+		memset(b->bm+s,-1,n*sizeof(long));
+		b->bm_set += n*BITS_PER_LONG - count;
 		if (e == b->bm_words) {
 			b->bm_set -= bm_clear_surplus(b);
 		}



More information about the drbd-cvs mailing list