[DRBD-cvs] DRBD CVS: drbd by phil from

drbd-user@lists.linbit.com drbd-user@lists.linbit.com
Wed, 14 Jan 2004 07:42:22 +0100 (CET)


DRBD CVS committal

Author  : phil
Host    : 
Module  : drbd

Dir     : drbd/drbd


Modified Files:
      Tag: rel-0_7-branch
	drbd_actlog.c drbd_dsender.c drbd_int.h drbd_main.c 
	drbd_receiver.c drbd_req-2.4.c 


Log Message:
LGE's work of the last days. 

* Reorganizes various data structures to allow the use of the 
  container_of() macro. GOOD THING!

* A fix to drbd_dio_end()

Although the patch is so big, very little of the program logic is 
actually changed.

===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/Attic/drbd_actlog.c,v
retrieving revision 1.1.2.50
retrieving revision 1.1.2.51
diff -u -3 -r1.1.2.50 -r1.1.2.51
--- drbd_actlog.c	13 Jan 2004 19:42:16 -0000	1.1.2.50
+++ drbd_actlog.c	14 Jan 2004 06:42:22 -0000	1.1.2.51
@@ -140,7 +140,7 @@
 	u32 xor_sum=0;
 
 	down(&mdev->md_io_mutex); // protects md_io_buffer, al_tr_cycle, ...
-	buffer = (struct al_transaction*)bh_kmap(mdev->md_io_bh);
+	buffer = (struct al_transaction*)bh_kmap(&mdev->md_io_bh);
 
 	buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC);
 	buffer->tr_number = cpu_to_be32(mdev->al_tr_number);
@@ -175,16 +175,16 @@
 
 	buffer->xor_sum = cpu_to_be32(xor_sum);
 
-	bh_kunmap(mdev->md_io_bh);
+	bh_kunmap(&mdev->md_io_bh);
 
 	sector = drbd_md_ss(mdev) + MD_AL_OFFSET + mdev->al_tr_pos ;
 
-	drbd_set_md_bh(mdev, mdev->md_io_bh, sector, 512);
-	set_bit(BH_Dirty, &mdev->md_io_bh->b_state);
-	set_bit(BH_Lock, &mdev->md_io_bh->b_state);
-	mdev->md_io_bh->b_end_io = drbd_generic_end_io;
-	generic_make_request(WRITE,mdev->md_io_bh);
-	wait_on_buffer(mdev->md_io_bh);
+	drbd_set_md_bh(mdev, &mdev->md_io_bh, sector, 512);
+	set_bit(BH_Dirty, &mdev->md_io_bh.b_state);
+	set_bit(BH_Lock, &mdev->md_io_bh.b_state);
+	mdev->md_io_bh.b_end_io = drbd_generic_end_io;
+	generic_make_request(WRITE,&mdev->md_io_bh);
+	wait_on_buffer(&mdev->md_io_bh);
 
 	if( ++mdev->al_tr_pos > div_ceil(mdev->act_log->nr_elements,AL_EXTENTS_PT) ) {
 		mdev->al_tr_pos=0;
@@ -195,7 +195,7 @@
 }
 
 /* In case this function returns 1 == success, the caller must do
-		bh_kunmap(mdev->md_io_bh);
+		bh_kunmap(&mdev->md_io_bh);
 		up(&mdev->md_io_mutex);
  */
 STATIC int drbd_al_read_tr(struct Drbd_Conf *mdev,
@@ -210,14 +210,14 @@
 	down(&mdev->md_io_mutex);
 	sector = drbd_md_ss(mdev) + MD_AL_OFFSET + index;
 
-	drbd_set_md_bh(mdev, mdev->md_io_bh, sector, 512);
-	clear_bit(BH_Uptodate, &mdev->md_io_bh->b_state);
-	set_bit(BH_Lock, &mdev->md_io_bh->b_state);
-	mdev->md_io_bh->b_end_io = drbd_generic_end_io;
-	generic_make_request(READ,mdev->md_io_bh);
-	wait_on_buffer(mdev->md_io_bh);
+	drbd_set_md_bh(mdev, &mdev->md_io_bh, sector, 512);
+	clear_bit(BH_Uptodate, &mdev->md_io_bh.b_state);
+	set_bit(BH_Lock, &mdev->md_io_bh.b_state);
+	mdev->md_io_bh.b_end_io = drbd_generic_end_io;
+	generic_make_request(READ,&mdev->md_io_bh);
+	wait_on_buffer(&mdev->md_io_bh);
 
-	buffer = (struct al_transaction*)bh_kmap(mdev->md_io_bh);
+	buffer = (struct al_transaction*)bh_kmap(&mdev->md_io_bh);
 
 	rv = ( be32_to_cpu(buffer->magic) == DRBD_MAGIC );
 
@@ -229,7 +229,7 @@
 	if(rv) {
 		*bp = buffer;
 	} else {
-		bh_kunmap(mdev->md_io_bh);
+		bh_kunmap(&mdev->md_io_bh);
 		up(&mdev->md_io_mutex);
 	}
 
@@ -252,7 +252,7 @@
 		if(!drbd_al_read_tr(mdev,&buffer,i)) continue;
 		cnr = be32_to_cpu(buffer->tr_number);
 		// INFO("index %d valid tnr=%d\n",i,cnr);
-		bh_kunmap(mdev->md_io_bh);
+		bh_kunmap(&mdev->md_io_bh);
 		up(&mdev->md_io_mutex);
 
 		if(cnr == -1) overflow=1;
@@ -300,7 +300,7 @@
 			active_extents++;
 		}
 
-		bh_kunmap(mdev->md_io_bh);
+		bh_kunmap(&mdev->md_io_bh);
 		up(&mdev->md_io_mutex);
 
 		transactions++;
@@ -406,21 +406,21 @@
 		sector = drbd_md_ss(mdev) + MD_BM_OFFSET + so;
 		so++;
 
-		drbd_set_md_bh(mdev, mdev->md_io_bh, sector, 512);
-		clear_bit(BH_Uptodate, &mdev->md_io_bh->b_state);
-		set_bit(BH_Lock, &mdev->md_io_bh->b_state);
-		mdev->md_io_bh->b_end_io = drbd_generic_end_io;
-		generic_make_request(READ,mdev->md_io_bh);
-		wait_on_buffer(mdev->md_io_bh);
+		drbd_set_md_bh(mdev, &mdev->md_io_bh, sector, 512);
+		clear_bit(BH_Uptodate, &mdev->md_io_bh.b_state);
+		set_bit(BH_Lock, &mdev->md_io_bh.b_state);
+		mdev->md_io_bh.b_end_io = drbd_generic_end_io;
+		generic_make_request(READ,&mdev->md_io_bh);
+		wait_on_buffer(&mdev->md_io_bh);
 
-		buffer = (unsigned long *)bh_kmap(mdev->md_io_bh);
+		buffer = (unsigned long *)bh_kmap(&mdev->md_io_bh);
 
 		for(buf_i=0;buf_i<want;buf_i++) {
 			word = lel_to_cpu(buffer[buf_i]);
 			bits += hweight_long(word);
 			bm[bm_i++] = word;
 		}
-		bh_kunmap(mdev->md_io_bh);
+		bh_kunmap(&mdev->md_io_bh);
 	}
 
 	up(&mdev->md_io_mutex);
@@ -436,7 +436,8 @@
 {
 	struct Drbd_Conf *mdev;
 
-	mdev=drbd_mdev_of_bh(bh);
+	mdev = container_of(bh,struct Drbd_Conf,md_io_bh);
+	BUG_ON(!IS_VALID_MDEV(mdev));
 
 	mark_buffer_uptodate(bh, uptodate);
 	unlock_buffer(bh);
@@ -468,23 +469,23 @@
 	want=min_t(int,512/sizeof(long),bm_words-bm_i);
 
 	down(&mdev->md_io_mutex); // protects md_io_buffer
-	buffer = (unsigned long *)bh_kmap(mdev->md_io_bh);
+	buffer = (unsigned long *)bh_kmap(&mdev->md_io_bh);
 
 	for(buf_i=0;buf_i<want;buf_i++) {
 		buffer[buf_i] = cpu_to_lel(bm[bm_i++]);
 	}
 
-	bh_kunmap(mdev->md_io_bh);
+	bh_kunmap(&mdev->md_io_bh);
 
 	sector = drbd_md_ss(mdev) + MD_BM_OFFSET + enr/EXTENTS_PER_SECTOR;
 
-	drbd_set_md_bh(mdev, mdev->md_io_bh, sector, 512);
-	set_bit(BH_Dirty, &mdev->md_io_bh->b_state);
-	set_bit(BH_Lock, &mdev->md_io_bh->b_state);
-	mdev->md_io_bh->b_end_io = sync ? drbd_generic_end_io : drbd_async_eio;
-	generic_make_request(WRITE,mdev->md_io_bh);
+	drbd_set_md_bh(mdev, &mdev->md_io_bh, sector, 512);
+	set_bit(BH_Dirty, &mdev->md_io_bh.b_state);
+	set_bit(BH_Lock, &mdev->md_io_bh.b_state);
+	mdev->md_io_bh.b_end_io = sync ? drbd_generic_end_io : drbd_async_eio;
+	generic_make_request(WRITE,&mdev->md_io_bh);
 	if(sync) {
-		wait_on_buffer(mdev->md_io_bh);
+		wait_on_buffer(&mdev->md_io_bh);
 		up(&mdev->md_io_mutex);
 	}
 
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/Attic/drbd_dsender.c,v
retrieving revision 1.1.2.41
retrieving revision 1.1.2.42
diff -u -3 -r1.1.2.41 -r1.1.2.42
--- drbd_dsender.c	12 Jan 2004 10:46:42 -0000	1.1.2.41
+++ drbd_dsender.c	14 Jan 2004 06:42:22 -0000	1.1.2.42
@@ -53,10 +53,12 @@
 	struct Tl_epoch_entry *e=NULL;
 	struct Drbd_Conf* mdev;
 
+	// we could use pbh.b_private now for mdev
 	mdev=drbd_mdev_of_bh(bh);
+	PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
 
-	e=bh->b_private;
-	D_ASSERT(e->bh == bh);
+	e = container_of(bh,struct Tl_epoch_entry,pbh);
+	PARANOIA_BUG_ON(!VALID_POINTER(e));
 	D_ASSERT(e->block_id != ID_VACANT);
 
 	spin_lock_irqsave(&mdev->ee_lock,flags);
@@ -65,8 +67,8 @@
 	clear_bit(BH_Lock, &bh->b_state);
 	smp_mb__after_clear_bit();
 
-	list_del(&e->list);
-	list_add(&e->list,&mdev->rdone_ee);
+	list_del(&e->w.list);
+	list_add(&e->w.list,&mdev->rdone_ee);
 
 	spin_unlock_irqrestore(&mdev->ee_lock,flags);
 
@@ -83,9 +85,9 @@
 
 	while(!list_empty(&mdev->rdone_ee)) {
 		le = mdev->rdone_ee.next;
-		e = list_entry(le, struct Tl_epoch_entry,list);
+		e = list_entry(le, struct Tl_epoch_entry,w.list);
 		spin_unlock_irq(&mdev->ee_lock);
-		ok = ok && e->e_end_io(mdev,e);
+		ok = ok && e->w.cb(mdev,&e->w);
 
 		spin_lock_irq(&mdev->ee_lock);
 		list_del(le);         // remove from list first.
@@ -221,7 +223,7 @@
 		pr->d.sector = sector;
 		pr->cause = Resync;
 		spin_lock(&mdev->pr_lock);
-		list_add(&pr->list,&mdev->resync_reads);
+		list_add(&pr->w.list,&mdev->resync_reads);
 		spin_unlock(&mdev->pr_lock);
 
 		inc_pending(mdev);
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_int.h,v
retrieving revision 1.58.2.84
retrieving revision 1.58.2.85
diff -u -3 -r1.58.2.84 -r1.58.2.85
--- drbd_int.h	12 Jan 2004 19:49:41 -0000	1.58.2.84
+++ drbd_int.h	14 Jan 2004 06:42:22 -0000	1.58.2.85
@@ -82,6 +82,12 @@
 # define STATIC static
 #endif
 
+#ifdef PARANOIA
+# define PARANOIA_BUG_ON(x) BUG_ON(x)
+#else
+# define PARANOIA_BUG_ON(x)
+#endif
+
 /*
  * Some Message Macros
  *************************/
@@ -171,6 +177,10 @@
 #define bh_kunmap(bh)	do { } while (0)
 #endif
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,19)
+#define BH_Launder BH_launder
+#endif
+
 #ifndef list_for_each
 #define list_for_each(pos, head) \
 	for(pos = (head)->next; pos != (head); pos = pos->next)
@@ -203,14 +213,36 @@
 struct Drbd_Conf;
 typedef struct Drbd_Conf drbd_dev;
 
+#ifndef typecheck
+/*
+ * Check at compile time that something is of a particular type.
+ * Always evaluates to 1 so you may use it easily in comparisons.
+ */
+#define typecheck(type,x) \
+({	type __dummy; \
+	typeof(x) __dummy2; \
+	(void)(&__dummy == &__dummy2); \
+	1; \
+})
+#endif
+
 // THINK: x->magic = &x; ??
-#define SET_MAGIC(x) (x->magic = DRBD_MAGIC)
+#define SET_MAGIC(x) ((x)->magic = (int)(x) ^ DRBD_MAGIC)
 // For some optimization crap, please test for NULL explicitly,
 //	and not in this macro!
 // #define VALID_POINTER(x) ((x) && (x)->magic == DRBD_MAGIC)
-#define VALID_POINTER(x) ((x)->magic == DRBD_MAGIC)
+// #define VALID_POINTER(x) ((x)->magic == DRBD_MAGIC)
+// hopefully this works:
+#define VALID_POINTER(x) ((x) ? (((x)->magic ^ DRBD_MAGIC) == (int)(x)):0)
 #define INVALIDATE_MAGIC(x) (x->magic--)
 
+#define SET_MDEV_MAGIC(x) \
+	({ typecheck(struct Drbd_Conf*,x); \
+	  (x)->magic = (long)(x) ^ DRBD_MAGIC; })
+#define IS_VALID_MDEV(x)  \
+	( typecheck(struct Drbd_Conf*,x) && \
+	  ((x) ? (((x)->magic ^ DRBD_MAGIC) == (long)(x)):0))
+
 
 /*
  * GFP_DRBD is used for allocations inside drbd_do_request.
@@ -237,6 +269,38 @@
 #define RQ_DRBD_DONE      0x0030
 #define RQ_DRBD_READ      0x0040
 
+#define DRBD_PANIC 2
+/* do_panic alternatives:
+ *	0: panic();
+ *	1: machine_halt;  FIXME does not work;
+ *	2: prink(EMERG ), plus flag to fail all eventual drbd IO, plus panic()
+ */
+
+extern volatile int drbd_did_panic;
+
+#include <linux/reboot.h>
+
+#if    DRBD_PANIC == 0
+#define drbd_panic(x...) panic(x)
+#elif  DRBD_PANIC == 1
+#error "THIS DRBD_PANIC SETTING DOES NOT WORK (yet)"
+#define drbd_panic(x...) do {						\
+	printk(KERN_EMERG x);						\
+	notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);	\
+	printk(KERN_EMERG "System halted.\n");				\
+	machine_halt();							\
+	do_exit(0);							\
+} while (0)
+#else
+#define drbd_panic(x...) do {		\
+	printk(KERN_EMERG x);		\
+	drbd_did_panic = DRBD_MD_MAGIC;	\
+	smp_mb();			\
+	panic(x);			\
+} while (0)
+#endif
+#undef DRBD_PANIC
+
 enum MetaDataFlags {
 	MDF_Consistent   = 1,
 	MDF_PrimaryInd   = 2,
@@ -443,15 +507,39 @@
 	drbd_dev *mdev;
 };
 
+
+/*
+ * Having this as the first member of a struct provides sort of "inheritance".
+ * "derived" structs can be "drbd_queue_work()"ed.
+ * The callback should know and cast back to the descendant struct.
+ * drbd_request, Pending_read and Tl_epoch_entry are descendants of drbd_work.
+ * Pending_read will soon be merged into drbd_request, stay tuned ... -lge
+ */
+struct drbd_work;
+typedef int (*drbd_work_cb)(drbd_dev*, struct drbd_work*);
+struct drbd_work {
+	struct list_head list;
+	drbd_work_cb cb;
+};
+
+/*
+ * since we eventually don't want to "remap" any bhs, but allways need a
+ * private bh, it may as well be part of the struct so we do not need to
+ * allocate it separately.  it is only used as a clone, and since we own it, we
+ * can abuse certain fields of if for our own needs.  and, since it is part of
+ * the struct, we can use b_private for other things than the req, e.g. mdev,
+ * since we get the request struct by means of the "container_of()" macro.
+ *	-lge
+ */
+
 struct drbd_barrier;
 struct drbd_request {
+	struct drbd_work w;
 	int magic;
-	struct list_head list;     // requests are chained to a barrier
-	struct drbd_barrier *barrier; // The next barrier.
-	struct buffer_head *bh;    // buffer head
-	unsigned long sector;
-	int size;
 	int rq_status;
+	struct drbd_barrier *barrier; // The next barrier.
+	struct buffer_head *bh;       // master buffer head pointer
+	struct buffer_head  pbh;      // private buffer head struct
 };
 
 struct drbd_barrier {
@@ -472,16 +560,29 @@
    rdone_ee  .. block read, need to send DataReply
 */
 
+/* Since whenever we allocate a Tl_epoch_entry, we allocated a buffer_head,
+ * at the same time, we might as well put it as member into the struct.
+ * Yes, we may "waste" a little memory since the unused EEs on the free_ee list
+ * are somewhat larger. For 2.6, this will be a struct_bio, which is fairly
+ * small, and since we adopt the amount dynamically anyways, this is not an
+ * issue.
+ *
+ * TODO
+ * I'd like to "drop" the free list altogether, since we use mempools, which
+ * are designed for this. We probably would still need a private "page pool"
+ * to set the bh.b_page from.
+ *	-lge
+ */
 struct Tl_epoch_entry {
-	struct list_head list;
-	struct buffer_head* bh;
+	struct drbd_work    w;
+	struct buffer_head  pbh; // private buffer head struct, NOT a pointer
 	u64    block_id;
-	int   (*e_end_io) (drbd_dev*, struct Tl_epoch_entry *);
+	int magic;
 };
 
 struct Pending_read {
+	struct drbd_work w;
 	int magic;
-	struct list_head list;
 	union {
 		struct buffer_head* bh;
 		sector_t sector;
@@ -611,7 +712,7 @@
 	wait_queue_head_t ee_wait;
 	struct list_head busy_blocks;
 	struct tq_struct write_hint_tq;
-	struct buffer_head *md_io_bh; // a (one page) Byte buffer for md_io
+	struct buffer_head md_io_bh; // a (one page) Byte buffer for md_io
 	struct semaphore md_io_mutex; // protects the md_io_buffer
 	spinlock_t al_lock;
 	wait_queue_head_t al_wait;
@@ -995,7 +1096,7 @@
 	bh->b_list = BUF_LOCKED;
 	init_waitqueue_head(&bh->b_wait);
 	bh->b_size = size;
-	atomic_set(&bh->b_count, 0);
+	atomic_set(&bh->b_count, 1);
 	bh->b_state = (1 << BH_Mapped ); //has a disk mapping = dev & blocknr
 }
 
@@ -1011,11 +1112,7 @@
 
 	// we skip submit_bh, but use generic_make_request.
 	set_bit(BH_Req, &bh->b_state);
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,19)
-	set_bit(BH_launder, &bh->b_state);
-#else
 	set_bit(BH_Launder, &bh->b_state);
-#endif
 	bh->b_rdev = mdev->md_device;
 	bh->b_rsector = sector;
 }
@@ -1031,11 +1128,7 @@
 
 	// we skip submit_bh, but use generic_make_request.
 	set_bit(BH_Req, &bh->b_state);
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,19)
-	set_bit(BH_launder, &bh->b_state);
-#else
 	set_bit(BH_Launder, &bh->b_state);
-#endif
 	bh->b_rdev = mdev->lo_device;
 	bh->b_rsector = sector;
 }
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_main.c,v
retrieving revision 1.73.2.91
retrieving revision 1.73.2.92
diff -u -3 -r1.73.2.91 -r1.73.2.92
--- drbd_main.c	12 Jan 2004 14:33:31 -0000	1.73.2.91
+++ drbd_main.c	14 Jan 2004 06:42:22 -0000	1.73.2.92
@@ -113,6 +113,9 @@
 int minor_count = 2;
 int disable_io_hints = 0;
 
+// global panic flag
+volatile int drbd_did_panic = 0;
+
 STATIC int *drbd_blocksizes;
 STATIC int *drbd_sizes;
 struct Drbd_Conf *drbd_conf;
@@ -164,10 +167,8 @@
 
 	b=mdev->newest_barrier;
 
-	new_item->sector = new_item->bh->b_rsector;
-	new_item->size = new_item->bh->b_size;
 	new_item->barrier = b;
-	list_add(&new_item->list,&b->requests);
+	list_add(&new_item->w.list,&b->requests);
 
 	if( b->n_req++ > mdev->conf.max_epoch_size ) {
 		set_bit(ISSUE_BARRIER,&mdev->flags);
@@ -243,7 +244,7 @@
 	spin_lock_irqsave(&mdev->tl_lock,flags);
 
 	r = ( item->barrier == mdev->newest_barrier );
-	list_del(&item->list);
+	list_del(&item->w.list);
 
 	spin_unlock_irqrestore(&mdev->tl_lock,flags);
 	return r;
@@ -266,15 +267,15 @@
 	b=mdev->oldest_barrier;
 	while ( b ) {
 		list_for_each_safe(le, tle, &b->requests) {
-			r = list_entry(le, struct drbd_request,list);
+			r = list_entry(le, struct drbd_request,w.list);
 			if( (r->rq_status&0xfffe) != RQ_DRBD_SENT ) {
 				drbd_end_req(r,RQ_DRBD_SENT,ERF_NOTLD|1,
-					     r->sector);
+					     r->pbh.b_blocknr);
 				goto mark;
 			}
 			if(mdev->conf.wire_protocol != DRBD_PROT_C ) {
 			mark:
-				drbd_set_out_of_sync(mdev,r->sector,r->size);
+				drbd_set_out_of_sync(mdev,r->pbh.b_blocknr,r->pbh.b_size);
 			}
 		}
 		f=b;
@@ -610,9 +611,9 @@
 	int ok;
 	Drbd_BlockAck_Packet p;
 
-	p.sector   = cpu_to_be64(DRBD_BH_SECTOR(e->bh));
+	p.sector   = cpu_to_be64(e->pbh.b_blocknr);
 	p.block_id = e->block_id;
-	p.blksize  = cpu_to_be32(e->bh->b_size);
+	p.blksize  = cpu_to_be32(e->pbh.b_size);
 
 	// YES, this happens. There is some race with the syncer!
 	if ((unsigned long)e->block_id <= 1) {
@@ -776,9 +777,9 @@
 	p.head.magic   = BE_DRBD_MAGIC;
 	p.head.command = cpu_to_be16(cmd);
 	p.head.length  = cpu_to_be16( sizeof(p)-sizeof(Drbd_Header)
-				     + e->bh->b_size );
+				     + e->pbh.b_size );
 
-	p.sector   = cpu_to_be64(DRBD_BH_SECTOR(e->bh));
+	p.sector   = cpu_to_be64(e->pbh.b_blocknr);
 	p.block_id = e->block_id;
 
 	/* only called by our kernel thread.
@@ -792,7 +793,7 @@
 	spin_unlock(&mdev->send_task_lock);
 
 	ok =  (drbd_send(mdev,mdev->sock,&p,sizeof(p),MSG_MORE) == sizeof(p))
-	   && _drbd_send_zc_bh(mdev,e->bh);
+	   && _drbd_send_zc_bh(mdev,&e->pbh);
 
 	spin_lock(&mdev->send_task_lock);
 	mdev->send_task=NULL;
@@ -974,7 +975,7 @@
 	// note: only assignments, no allocation in here
 
 #ifdef PARANOIA
-	mdev->magic = (DRBD_MAGIC ^ (long)mdev);
+	SET_MDEV_MAGIC(mdev);
 #endif
 
 	/* If the WRITE_HINT_QUEUED flag is set but it is not
@@ -1140,10 +1141,8 @@
 			if(rr) printk(KERN_ERR DEVICE_NAME
 				       "%d: %d EEs in read list found!\n",i,rr);
 
-			if(mdev->md_io_bh) {
-				__free_page(mdev->md_io_bh->b_page);
-				kmem_cache_free(bh_cachep, mdev->md_io_bh);
-			}
+			if (mdev->md_io_bh.b_page)
+				__free_page(mdev->md_io_bh.b_page);
 
 			if (mdev->act_log) lc_free(mdev->act_log);
 		}
@@ -1205,13 +1204,8 @@
 		set_device_ro( MKDEV(MAJOR_NR, i), TRUE );
 
 		if(!page) goto Enomem;
-		mdev->md_io_bh = kmem_cache_alloc(bh_cachep, GFP_KERNEL);
-		if(!mdev->md_io_bh) {
-			__free_page(page);
-			goto Enomem;
-		}
-		drbd_init_bh(mdev->md_io_bh,512);
-		set_bh_page(mdev->md_io_bh,page,0);
+		drbd_init_bh(&mdev->md_io_bh,512);
+		set_bh_page(&mdev->md_io_bh,page,0);
 
 		mdev->mbds_id = bm_init(0);
 		if (!mdev->mbds_id) goto Enomem;
@@ -1613,8 +1607,11 @@
 	spin_lock(&sbm->bm_lock);
 	bm = sbm->bm;
 
-	for(bnr=sbnr; bnr <= ebnr; bnr++) {
-		if(test_bit(bnr & BPLM, bm + (bnr>>LN2_BPL))) ret=1;
+	for (bnr=sbnr; bnr <= ebnr; bnr++) {
+		if (test_bit(bnr, bm)) {
+			ret=1;
+			break;
+		}
 	}
 
 	spin_unlock(&sbm->bm_lock);
@@ -1795,7 +1792,7 @@
 	if( mdev->lo_device == 0) return;
 
 	down(&mdev->md_io_mutex);
-	buffer = (struct meta_data_on_disk *)bh_kmap(mdev->md_io_bh);
+	buffer = (struct meta_data_on_disk *)bh_kmap(&mdev->md_io_bh);
 
 	flags=mdev->gen_cnt[Flags] & ~(MDF_PrimaryInd|MDF_ConnectedInd);
 	if(mdev->state==Primary) flags |= MDF_PrimaryInd;
@@ -1813,14 +1810,14 @@
 
 	buffer->bm_offset = __constant_cpu_to_be32(MD_BM_OFFSET);
 
-	bh_kunmap(mdev->md_io_bh);
+	bh_kunmap(&mdev->md_io_bh);
 	sector = drbd_md_ss(mdev) + MD_GC_OFFSET;
-	drbd_set_bh(mdev, mdev->md_io_bh, sector, 512);
-	set_bit(BH_Dirty, &mdev->md_io_bh->b_state);
-	set_bit(BH_Lock, &mdev->md_io_bh->b_state);
-	mdev->md_io_bh->b_end_io = drbd_generic_end_io;
-	generic_make_request(WRITE,mdev->md_io_bh);
-	wait_on_buffer(mdev->md_io_bh);
+	drbd_set_bh(mdev, &mdev->md_io_bh, sector, 512);
+	set_bit(BH_Dirty, &mdev->md_io_bh.b_state);
+	set_bit(BH_Lock, &mdev->md_io_bh.b_state);
+	mdev->md_io_bh.b_end_io = drbd_generic_end_io;
+	generic_make_request(WRITE,&mdev->md_io_bh);
+	wait_on_buffer(&mdev->md_io_bh);
 
 	up(&mdev->md_io_mutex);
 }
@@ -1836,15 +1833,15 @@
 	down(&mdev->md_io_mutex);
 
 	sector = drbd_md_ss(mdev) + MD_GC_OFFSET;
-	drbd_set_bh(mdev, mdev->md_io_bh, sector, 512);
-	clear_bit(BH_Uptodate, &mdev->md_io_bh->b_state);
-	set_bit(BH_Lock, &mdev->md_io_bh->b_state);
-	mdev->md_io_bh->b_end_io = drbd_generic_end_io;
-	generic_make_request(READ,mdev->md_io_bh);
-	wait_on_buffer(mdev->md_io_bh);
-	ERR_IF( ! buffer_uptodate(mdev->md_io_bh) ) goto err;
+	drbd_set_bh(mdev, &mdev->md_io_bh, sector, 512);
+	clear_bit(BH_Uptodate, &mdev->md_io_bh.b_state);
+	set_bit(BH_Lock, &mdev->md_io_bh.b_state);
+	mdev->md_io_bh.b_end_io = drbd_generic_end_io;
+	generic_make_request(READ,&mdev->md_io_bh);
+	wait_on_buffer(&mdev->md_io_bh);
+	ERR_IF( ! buffer_uptodate(&mdev->md_io_bh) ) goto err;
 
-	buffer = (struct meta_data_on_disk *)bh_kmap(mdev->md_io_bh);
+	buffer = (struct meta_data_on_disk *)bh_kmap(&mdev->md_io_bh);
 
 	if(be32_to_cpu(buffer->magic) != DRBD_MD_MAGIC) goto err;
 
@@ -1853,12 +1850,12 @@
 	mdev->la_size = be64_to_cpu(buffer->la_size);
 	mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents);
 
-	bh_kunmap(mdev->md_io_bh);
+	bh_kunmap(&mdev->md_io_bh);
 	up(&mdev->md_io_mutex);
 	return;
 
  err:
-	bh_kunmap(mdev->md_io_bh);
+	bh_kunmap(&mdev->md_io_bh);
 	up(&mdev->md_io_mutex);
 
 	INFO("Creating state block\n");
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_receiver.c,v
retrieving revision 1.97.2.76
retrieving revision 1.97.2.77
diff -u -3 -r1.97.2.76 -r1.97.2.77
--- drbd_receiver.c	12 Jan 2004 19:49:41 -0000	1.97.2.76
+++ drbd_receiver.c	14 Jan 2004 06:42:22 -0000	1.97.2.77
@@ -147,23 +147,12 @@
 	struct Tl_epoch_entry *e=NULL;
 	struct Drbd_Conf* mdev;
 
+	// we could use pbh.b_private now for mdev
 	mdev=drbd_mdev_of_bh(bh);
+	PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
 
-	/*
-	printk(KERN_ERR DEVICE_NAME "%d: dio_end_sec in_irq()=%d\n",
-	       (int)(mdev-drbd_conf),in_irq());
-
-	printk(KERN_ERR DEVICE_NAME "%d: dio_end_sec in_softirq()=%d\n",
-	       (int)(mdev-drbd_conf),in_softirq());
-	*/
-
-	/*
-	printk(KERN_ERR DEVICE_NAME "%d: drbd_dio_end_sec(%ld)\n",
-	       (int)(mdev-drbd_conf),bh->b_blocknr);
-	*/
-
-	e=bh->b_private;
-	D_ASSERT(e->bh == bh);
+	e = container_of(bh,struct Tl_epoch_entry,pbh);
+	PARANOIA_BUG_ON(!VALID_POINTER(e));
 	D_ASSERT(e->block_id != ID_VACANT);
 
 	spin_lock_irqsave(&mdev->ee_lock,flags);
@@ -174,8 +163,8 @@
 	clear_bit(BH_Lock, &bh->b_state);
 	smp_mb__after_clear_bit();
 
-	list_del(&e->list);
-	list_add(&e->list,&mdev->done_ee);
+	list_del(&e->w.list);
+	list_add(&e->w.list,&mdev->done_ee);
 
 	if (waitqueue_active(&mdev->ee_wait) &&
 	    (list_empty(&mdev->active_ee) ||
@@ -188,15 +177,12 @@
 	spin_unlock_irqrestore(&mdev->ee_lock,flags);
 
 	if( mdev->do_panic && !uptodate) {
-		panic(DEVICE_NAME": The lower-level device had an error.\n");
+		drbd_panic(DEVICE_NAME": The lower-level device had an error.\n");
 	}
 
 	//	if(wake_asender) {
 	wake_asender(mdev);
 	//	}
-	// TODO: Think if we should implement a short-cut here.
-	//       How about send_dontwait, and only if that fails, ...
-	//       but then, it is in irq/bh context. probably bad idea.
 }
 
 /*
@@ -224,7 +210,20 @@
 	int number,buffer_size,i;
 
 	buffer_size=BM_BLOCK_SIZE;
-	number=PAGE_SIZE/buffer_size;
+	/*
+	 * I suggest to allways allocate full pages, even for b_size smaller
+	 * than PAGE_SIZE, otherwise we need to dump all EEs // realloc them
+	 * on each blocksize change. This won't work well for XFS.
+	 *
+	 * If we want do do it this way, the loop below can be replaced,
+	 * and the free_ee case simplyfies also.
+	 * We then of course waste much memory for blocksizes of 1K :(
+	 *
+	 * Alternative would be to have the EE lists as "arrays",
+	 * indexed by order of b_size.
+	 */
+	// number=PAGE_SIZE/buffer_size;
+	number=1;
 	lbh=NULL;
 	bh=NULL;
 	fbh=NULL;
@@ -232,22 +231,20 @@
 	for(i=0;i<number;i++) {
 
 		e = kmem_cache_alloc(drbd_ee_cache, GFP_KERNEL);
-		bh = kmem_cache_alloc(bh_cachep, GFP_KERNEL);
 
-		if( e == NULL || bh == NULL ) {
-			ERR("could not kmalloc() new ee\n");
+		if( e == NULL ) {
+			ERR("could not kmem_cache_alloc() new ee\n");
 			BUG();
 		}
 
-		drbd_init_bh(bh, buffer_size);
-		set_bh_page(bh,page,i*buffer_size); // sets b_data and b_page
+		drbd_init_bh(&e->pbh, buffer_size);
+		set_bh_page(&e->pbh,page,i*buffer_size); // sets b_data and b_page
 
-		e->bh=bh;
 		bh->b_private=e;
 
 		e->block_id = ID_VACANT;
 		spin_lock_irq(&mdev->ee_lock);
-		list_add(&e->list,&mdev->free_ee);
+		list_add(&e->w.list,&mdev->free_ee);
 		mdev->ee_vacant++;
 		spin_unlock_irq(&mdev->ee_lock);
 		if (lbh) {
@@ -287,8 +284,12 @@
 
 	MUST_HOLD(&mdev->ee_lock);
 
+	/* for "one bh per page" regardless of b_size,
+	 * this could be simplified.	-lge
+	 */
+
 	list_for_each(le,list) {
-		bh=list_entry(le, struct Tl_epoch_entry,list)->bh;
+		bh=&list_entry(le, struct Tl_epoch_entry, w.list)->pbh;
 		nbh=bh->b_this_page;
 		freeable=1;
 		while( nbh != bh ) {
@@ -304,13 +305,12 @@
 	page=bh->b_page;
 	do {
 		e=nbh->b_private;
-		list_del(&e->list);
+		list_del(&e->w.list);
 		mdev->ee_vacant--;
 		D_ASSERT(nbh->b_page == page);
 		nbh=nbh->b_this_page;
 		/*printk(KERN_ERR DEVICE_NAME "%d: kfree(%p)\n",
 		  (int)(mdev-drbd_conf),e);*/
-		kmem_cache_free(bh_cachep, e->bh);
 		kmem_cache_free(drbd_ee_cache, e);
 	} while(nbh != bh);
 
@@ -381,8 +381,9 @@
 	list_del(le);
 	mdev->ee_vacant--;
 	mdev->ee_in_use++;
-	e=list_entry(le, struct Tl_epoch_entry,list);
+	e=list_entry(le, struct Tl_epoch_entry, w.list);
 	e->block_id = !ID_VACANT;
+	SET_MAGIC(e);
 	return e;
 }
 
@@ -395,7 +396,8 @@
 	mdev->ee_in_use--;
 	mdev->ee_vacant++;
 	e->block_id = ID_VACANT;
-	list_add(&e->list,&mdev->free_ee);
+	INVALIDATE_MAGIC(e);
+	list_add(&e->w.list,&mdev->free_ee);
 
 	if((mdev->ee_vacant * 2 > mdev->ee_in_use ) &&
 	   ( mdev->ee_vacant + mdev->ee_in_use > EE_MININUM) ) {
@@ -432,8 +434,8 @@
 		le = head->next;
 		list_del(le);
 		spin_unlock_irq(&mdev->ee_lock);
-		e = list_entry(le, struct Tl_epoch_entry,list);
-		ok = ok && e->e_end_io(mdev,e);
+		e = list_entry(le, struct Tl_epoch_entry, w.list);
+		ok = ok && e->w.cb(mdev,&e->w);
 		spin_lock_irq(&mdev->ee_lock);
 		drbd_put_ee(mdev,e);
 	}
@@ -463,7 +465,7 @@
 	while(!list_empty(&mdev->done_ee)) {
 		le = mdev->done_ee.next;
 		list_del(le);
-		e = list_entry(le,struct Tl_epoch_entry,list);
+		e = list_entry(le, struct Tl_epoch_entry, w.list);
 		drbd_put_ee(mdev,e);
 		if(mdev->conf.wire_protocol == DRBD_PROT_C ||
 		   is_syncer_blk(mdev,e->block_id)) {
@@ -900,7 +902,7 @@
 	spin_lock_irq(&mdev->ee_lock);
 	e=drbd_get_ee(mdev);
 	spin_unlock_irq(&mdev->ee_lock);
-	bh=e->bh;
+	bh=&e->pbh;
 
 	rr=drbd_recv(mdev,mdev->sock,bh_kmap(bh),data_size);
 	bh_kunmap(bh);
@@ -914,7 +916,6 @@
 	}
 
 	/* do not use mark_buffer_dirty() since it would call refile_buffer()*/
-	bh=e->bh;
 	set_bit(BH_Dirty, &bh->b_state);
 	set_bit(BH_Lock, &bh->b_state); // since using generic_make_request()
 
@@ -968,9 +969,10 @@
 	return ok;
 }
 
-STATIC int e_end_resync_block(drbd_dev *mdev, struct Tl_epoch_entry *e)
+STATIC int e_end_resync_block(drbd_dev *mdev, struct drbd_work *w)
 {
-	drbd_set_in_sync(mdev,DRBD_BH_SECTOR(e->bh),e->bh->b_size,1);
+	struct Tl_epoch_entry *e = (struct Tl_epoch_entry*)w;
+	drbd_set_in_sync(mdev,e->pbh.b_blocknr,e->pbh.b_size,1);
 	drbd_send_ack(mdev,WriteAck,e);
 	dec_unacked(mdev,HERE); // FIXME unconditional ??
 	return TRUE;
@@ -987,31 +989,38 @@
 
 	e = read_in_block(mdev,data_size);
 	ERR_IF(!e) return FALSE;
-	drbd_set_bh(mdev, e->bh, sector ,data_size);
+	drbd_set_bh(mdev, &e->pbh, sector ,data_size);
 	e->block_id = ID_SYNCER;
-	e->e_end_io = e_end_resync_block;
+	e->w.cb     = e_end_resync_block;
 
 	spin_lock_irq(&mdev->ee_lock);
-	list_add(&e->list,&mdev->sync_ee);
+	list_add(&e->w.list,&mdev->sync_ee);
 	spin_unlock_irq(&mdev->ee_lock);
 
 	dec_pending(mdev,HERE);
 	inc_unacked(mdev);
 
-	generic_make_request(WRITE,e->bh);
+	generic_make_request(WRITE,&e->pbh);
 
 	receive_data_tail(mdev,data_size);
 	return TRUE;
 }
 
 
+/*
+ * these should not happen any more. if they do, we do not properly
+ * serialize app and resync requests.
+ * yes, I think even app READS should be serialized, or made independent of,
+ * resync requests
+ */
+
 int recv_both_read(drbd_dev *mdev, struct Pending_read *pr,
 		   sector_t sector, int data_size)
 {
 	struct Tl_epoch_entry *e;
 	struct buffer_head *bh;
 
-	// DBG("%s\n", __func__);
+	ERR("should not happen anymore%s\n", __func__);
 
 	bh = pr->d.bh;
 
@@ -1025,24 +1034,24 @@
 	}
 
 	// XXX can't we share it somehow?
-	memcpy(bh_kmap(bh),bh_kmap(e->bh),data_size);
+	memcpy(bh_kmap(bh),bh_kmap(&e->pbh),data_size);
 	bh_kunmap(bh);
-	bh_kunmap(e->bh);
+	bh_kunmap(&e->pbh);
 
 	bh->b_end_io(bh,1);
 
-	drbd_set_bh(mdev, e->bh, sector, data_size);
+	drbd_set_bh(mdev, &e->pbh, sector, data_size);
 	e->block_id = ID_SYNCER;
-	e->e_end_io = e_end_resync_block;
+	e->w.cb     = e_end_resync_block;
 
 	spin_lock_irq(&mdev->ee_lock);
-	list_add(&e->list,&mdev->sync_ee);
+	list_add(&e->w.list,&mdev->sync_ee);
 	spin_unlock_irq(&mdev->ee_lock);
 
 	dec_pending(mdev,HERE);
 	inc_unacked(mdev);
 
-	generic_make_request(WRITE,e->bh);
+	generic_make_request(WRITE,&e->pbh);
 
 	receive_data_tail(mdev,data_size);
 	return TRUE;
@@ -1054,12 +1063,12 @@
 	// THINK maybe ignore this block without using EEs ?
 	struct Tl_epoch_entry *e;
 
-	// DBG("%s\n", __func__);
+	ERR("should not happen anymore: %s\n", __func__);
 
 	e = read_in_block(mdev,data_size);
 	ERR_IF(!e) return FALSE;
 
-	drbd_set_bh(mdev, e->bh, sector ,data_size);
+	drbd_set_bh(mdev, &e->pbh, sector ,data_size);
 	drbd_send_ack(mdev,WriteAck,e);
 
 	spin_lock_irq(&mdev->ee_lock);
@@ -1116,7 +1125,7 @@
 	   handler could be changed by make_req as long as it is on the list
 	*/
 	spin_lock(&mdev->pr_lock);
-	list_del(&pr->list);
+	list_del(&pr->w.list);
 	spin_unlock(&mdev->pr_lock);
 
 	ok = funcs[pr->cause](mdev,pr,sector,data_size);
@@ -1125,15 +1134,16 @@
 	return ok;
 }
 
-STATIC int e_end_block(drbd_dev *mdev, struct Tl_epoch_entry *e)
+STATIC int e_end_block(drbd_dev *mdev, struct drbd_work *w)
 {
+	struct Tl_epoch_entry *e = (struct Tl_epoch_entry*)w;
 	int ok=TRUE;
 
 	mdev->epoch_size++;
 	if(mdev->conf.wire_protocol == DRBD_PROT_C) {
 		if( mdev->cstate > Connected ) {
-			drbd_set_in_sync(mdev,DRBD_BH_SECTOR(e->bh),
-					 e->bh->b_size,1);
+			drbd_set_in_sync(mdev,e->pbh.b_blocknr,
+					 e->pbh.b_size,1);
 		}
 		ok=drbd_send_ack(mdev,WriteAck,e);
 		dec_unacked(mdev,HERE); // FIXME unconditional ??
@@ -1171,12 +1181,12 @@
 	e = read_in_block(mdev,data_size);
 	ERR_IF(!e) return FALSE;
 
-	drbd_set_bh(mdev, e->bh, sector, data_size);
+	drbd_set_bh(mdev, &e->pbh, sector, data_size);
 	e->block_id = p->block_id; // no meaning on this side, e* on partner
-	e->e_end_io = e_end_block;
+	e->w.cb     = e_end_block;
 
 	spin_lock_irq(&mdev->ee_lock);
-	list_add(&e->list,&mdev->active_ee);
+	list_add(&e->w.list,&mdev->active_ee);
 	spin_unlock_irq(&mdev->ee_lock);
 
 	switch(mdev->conf.wire_protocol) {
@@ -1191,24 +1201,26 @@
 		break;
 	}
 
-	generic_make_request(WRITE,e->bh);
+	generic_make_request(WRITE,&e->pbh);
 
 	receive_data_tail(mdev,data_size);
 	return TRUE;
 }
 
-STATIC int e_end_data_req(drbd_dev *mdev, struct Tl_epoch_entry *e)
+STATIC int e_end_data_req(drbd_dev *mdev, struct drbd_work *w)
 {
+	struct Tl_epoch_entry *e = (struct Tl_epoch_entry*)w;
 	int ok;
 	ok=drbd_send_block(mdev, DataReply, e);
 	dec_unacked(mdev,HERE); // THINK unconditional?
 	return ok;
 }
 
-STATIC int e_end_rsdata_req(drbd_dev *mdev, struct Tl_epoch_entry *e)
+STATIC int e_end_rsdata_req(drbd_dev *mdev, struct drbd_work *w)
 {
+	struct Tl_epoch_entry *e = (struct Tl_epoch_entry*)w;
 	int ok;
-	drbd_rs_complete_io(mdev,DRBD_BH_SECTOR(e->bh));
+	drbd_rs_complete_io(mdev,e->pbh.b_blocknr);
 	inc_pending(mdev);
 	ok=drbd_send_block(mdev, DataReply, e);
 	dec_unacked(mdev,HERE); // THINK unconditional?
@@ -1219,7 +1231,6 @@
 {
 	sector_t sector;
 	struct Tl_epoch_entry *e;
-	struct buffer_head *bh;
 	int data_size;
 	Drbd_BlockRequest_Packet *p = (Drbd_BlockRequest_Packet*)h;
 
@@ -1233,17 +1244,17 @@
 
 	spin_lock_irq(&mdev->ee_lock);
 	e=drbd_get_ee(mdev);
-	drbd_set_bh(mdev, e->bh, sector, data_size);
+	drbd_set_bh(mdev, &e->pbh, sector, data_size);
 	e->block_id = p->block_id; // no meaning on this side, pr* on partner
-	list_add(&e->list,&mdev->read_ee);
+	list_add(&e->w.list,&mdev->read_ee);
 	spin_unlock_irq(&mdev->ee_lock);
 
 	switch (h->command) {
 	case DataRequest:
-		e->e_end_io = e_end_data_req;
+		e->w.cb = e_end_data_req;
 		break;
 	case RSDataRequest:
-		e->e_end_io = e_end_rsdata_req;
+		e->w.cb = e_end_rsdata_req;
 		/* Eventually this should become asynchrously. Currently it
 		 * blocks the whole receiver just to delay the reading of a
 		 * resync data block. */
@@ -1253,14 +1264,13 @@
 		D_ASSERT(0);
 	}
 
-	bh=e->bh;
-	clear_bit(BH_Uptodate, &bh->b_state);
-	set_bit(BH_Lock, &bh->b_state);
-	e->bh->b_end_io = drbd_dio_end_read;
+	clear_bit(BH_Uptodate, &e->pbh.b_state);
+	set_bit(BH_Lock, &e->pbh.b_state);
+	e->pbh.b_end_io = drbd_dio_end_read;
 
-	mdev->read_cnt += bh->b_size >> 9;
+	mdev->read_cnt += e->pbh.b_size >> 9;
 	inc_unacked(mdev);
-	generic_make_request(READ,e->bh);
+	generic_make_request(READ,&e->pbh);
 
 	return TRUE;
 }
@@ -1492,7 +1502,7 @@
 
 	while(!list_empty(&workset)) {
 		le = workset.next;
-		pr = list_entry(le, struct Pending_read, list);
+		pr = list_entry(le, struct Pending_read, w.list);
 		bh = pr->d.bh;
 		list_del(le);
 
@@ -1512,7 +1522,7 @@
 	while(!list_empty(&workset)) {
 		le = workset.next;
 		list_del(le);
-		pr = list_entry(le, struct Pending_read, list);
+		pr = list_entry(le, struct Pending_read, w.list);
 		mempool_free(pr,drbd_pr_mempool);
 		INVALIDATE_MAGIC(pr);
 	}
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_req-2.4.c,v
retrieving revision 1.33.2.36
retrieving revision 1.33.2.37
diff -u -3 -r1.33.2.36 -r1.33.2.37
--- drbd_req-2.4.c	10 Jan 2004 07:53:55 -0000	1.33.2.36
+++ drbd_req-2.4.c	14 Jan 2004 06:42:22 -0000	1.33.2.37
@@ -46,6 +46,7 @@
 	struct Drbd_Conf* mdev = drbd_conf + MINOR(req->bh->b_rdev);
 	unsigned long flags=0;
 
+	PARANOIA_BUG_ON(req->pbh.b_blocknr != rsector);
 	spin_lock_irqsave(&mdev->req_lock,flags);
 
 	if(req->rq_status & nextstate) {
@@ -73,7 +74,7 @@
 		if(tl_dependence(mdev,req))
 			set_bit(ISSUE_BARRIER,&mdev->flags);
 	} else {
-		list_del(&req->list); // we have the tl_lock...
+		list_del(&req->w.list); // we have the tl_lock...
 	}
 
 	if(mdev->conf.wire_protocol==DRBD_PROT_C && mdev->cstate > Connected) {
@@ -83,7 +84,7 @@
 	req->bh->b_end_io(req->bh,(req->rq_status & 0x0001));
 
 	if( mdev->do_panic && !(req->rq_status & 0x0001) ) {
-		panic(DEVICE_NAME": The lower-level device had an error.\n");
+		drbd_panic(DEVICE_NAME": The lower-level device had an error.\n");
 	}
 
 	INVALIDATE_MAGIC(req);
@@ -93,17 +94,23 @@
 		wake_asender(mdev);
 }
 
+/*
+ * b_end_io for writes on Primary comming from drbd_make_request
+ */
 void drbd_dio_end(struct buffer_head *bh, int uptodate)
 {
 	struct Drbd_Conf* mdev;
 	drbd_request_t *req;
 
-	req = bh->b_private;
+	// ok, now we have the b_private available for other use
+	req = container_of(bh,struct drbd_request,pbh);
+	PARANOIA_BUG_ON(!VALID_POINTER(req));
 	mdev = drbd_conf+MINOR(req->bh->b_rdev);
+	PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
 
-	drbd_end_req(req, RQ_DRBD_WRITTEN, uptodate, bh->b_rsector);
-	drbd_al_complete_io(mdev,bh->b_rsector);
-	kmem_cache_free(bh_cachep, bh);
+	// NOT bh->b_rsector, may have been remapped!
+	drbd_end_req(req, RQ_DRBD_WRITTEN, uptodate, req->bh->b_rsector);
+	drbd_al_complete_io(mdev,req->bh->b_rsector);
 }
 
 STATIC struct Pending_read*
@@ -113,7 +120,7 @@
 	struct Pending_read *pr;
 
 	list_for_each(le,in) {
-		pr = list_entry(le, struct Pending_read, list);
+		pr = list_entry(le, struct Pending_read, w.list);
 		if(pr->d.sector == sector) return pr;
 	}
 
@@ -135,7 +142,7 @@
 	pr->d.bh = bh;
 	pr->cause = mdev->cstate == SyncTarget ? AppAndResync : Application;
 	spin_lock(&mdev->pr_lock);
-	list_add(&pr->list,&mdev->app_reads);
+	list_add(&pr->w.list,&mdev->app_reads);
 	spin_unlock(&mdev->pr_lock);
 	inc_pending(mdev);
 	drbd_send_drequest(mdev, mdev->cstate == SyncTarget ? RSDataRequest : DataRequest,
@@ -210,8 +217,8 @@
 
 				pr->cause |= Application;
 				pr->d.bh=bh;
-				list_del(&pr->list);
-				list_add(&pr->list,&mdev->app_reads);
+				list_del(&pr->w.list);
+				list_add(&pr->w.list,&mdev->app_reads);
 				spin_unlock(&mdev->pr_lock);
 				return 0; // Ok everything arranged
 			}
@@ -245,13 +252,13 @@
 	req = mempool_alloc(drbd_request_mempool, GFP_DRBD);
 
 	if (!req) {
-		ERR("could not kmalloc() nbh\n");
+		ERR("could not kmalloc() req\n");
 		bh->b_end_io(bh,0);
 		return 0;
 	}
 	SET_MAGIC(req);
 
-	nbh = kmem_cache_alloc(bh_cachep, GFP_DRBD);
+	nbh = &req->pbh;
 
 	drbd_init_bh(nbh, bh->b_size);