[DRBD-cvs] drbd by phil; Lars' 01-bio.diff.tgz applied. Preparing...

drbd-user@lists.linbit.com drbd-user@lists.linbit.com
Wed, 21 Jan 2004 16:40:10 +0100 (CET)


DRBD CVS committal

Author  : phil
Module  : drbd

Dir     : drbd/drbd


Modified Files:
      Tag: rel-0_7-branch
	drbd_actlog.c drbd_dsender.c drbd_fs.c drbd_int.h drbd_main.c 
	drbd_receiver.c drbd_req-2.4.c 
Added Files:
      Tag: rel-0_7-branch
	drbd_compat_types.h drbd_compat_wrappers.h 


Log Message:
Lars' 01-bio.diff.tgz applied. Preparing Linux-2.6 work.
Currently it still compiles on Linux-2.4 

===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/Attic/drbd_actlog.c,v
retrieving revision 1.1.2.58
retrieving revision 1.1.2.59
diff -u -3 -r1.1.2.58 -r1.1.2.59
--- drbd_actlog.c	18 Jan 2004 07:26:50 -0000	1.1.2.58
+++ drbd_actlog.c	21 Jan 2004 15:40:09 -0000	1.1.2.59
@@ -146,7 +146,7 @@
 	u32 xor_sum=0;
 
 	down(&mdev->md_io_mutex); // protects md_io_buffer, al_tr_cycle, ...
-	buffer = (struct al_transaction*)bh_kmap(&mdev->md_io_bh);
+	buffer = (struct al_transaction*)drbd_bio_kmap(&mdev->md_io_bio);
 
 	buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC);
 	buffer->tr_number = cpu_to_be32(mdev->al_tr_number);
@@ -181,16 +181,12 @@
 
 	buffer->xor_sum = cpu_to_be32(xor_sum);
 
-	bh_kunmap(&mdev->md_io_bh);
+	drbd_bio_kunmap(&mdev->md_io_bio);
 
 	sector = drbd_md_ss(mdev) + MD_AL_OFFSET + mdev->al_tr_pos ;
 
-	drbd_set_md_bh(mdev, &mdev->md_io_bh, sector, 512);
-	set_bit(BH_Dirty, &mdev->md_io_bh.b_state);
-	set_bit(BH_Lock, &mdev->md_io_bh.b_state);
-	mdev->md_io_bh.b_end_io = drbd_generic_end_io;
-	generic_make_request(WRITE,&mdev->md_io_bh);
-	wait_on_buffer(&mdev->md_io_bh);
+	drbd_md_prepare_write(mdev,sector);
+	drbd_generic_make_request_wait(WRITE,&mdev->md_io_bio);
 
 	if( ++mdev->al_tr_pos > div_ceil(mdev->act_log->nr_elements,AL_EXTENTS_PT) ) {
 		mdev->al_tr_pos=0;
@@ -201,7 +197,7 @@
 }
 
 /* In case this function returns 1 == success, the caller must do
-		bh_kunmap(&mdev->md_io_bh);
+		drbd_bio_kunmap(&mdev->md_io_bio);
 		up(&mdev->md_io_mutex);
  */
 STATIC int drbd_al_read_tr(struct Drbd_Conf *mdev,
@@ -216,14 +212,10 @@
 	down(&mdev->md_io_mutex);
 	sector = drbd_md_ss(mdev) + MD_AL_OFFSET + index;
 
-	drbd_set_md_bh(mdev, &mdev->md_io_bh, sector, 512);
-	clear_bit(BH_Uptodate, &mdev->md_io_bh.b_state);
-	set_bit(BH_Lock, &mdev->md_io_bh.b_state);
-	mdev->md_io_bh.b_end_io = drbd_generic_end_io;
-	generic_make_request(READ,&mdev->md_io_bh);
-	wait_on_buffer(&mdev->md_io_bh);
+	drbd_md_prepare_read(mdev,sector);
+	drbd_generic_make_request_wait(READ,&mdev->md_io_bio);
 
-	buffer = (struct al_transaction*)bh_kmap(&mdev->md_io_bh);
+	buffer = (struct al_transaction*)drbd_bio_kmap(&mdev->md_io_bio);
 
 	rv = ( be32_to_cpu(buffer->magic) == DRBD_MAGIC );
 
@@ -235,7 +227,7 @@
 	if(rv) {
 		*bp = buffer;
 	} else {
-		bh_kunmap(&mdev->md_io_bh);
+		drbd_bio_kunmap(&mdev->md_io_bio);
 		up(&mdev->md_io_mutex);
 	}
 
@@ -258,7 +250,7 @@
 		if(!drbd_al_read_tr(mdev,&buffer,i)) continue;
 		cnr = be32_to_cpu(buffer->tr_number);
 		// INFO("index %d valid tnr=%d\n",i,cnr);
-		bh_kunmap(&mdev->md_io_bh);
+		drbd_bio_kunmap(&mdev->md_io_bio);
 		up(&mdev->md_io_mutex);
 
 		if(cnr == -1) overflow=1;
@@ -306,7 +298,7 @@
 			active_extents++;
 		}
 
-		bh_kunmap(&mdev->md_io_bh);
+		drbd_bio_kunmap(&mdev->md_io_bio);
 		up(&mdev->md_io_mutex);
 
 		transactions++;
@@ -412,21 +404,17 @@
 		sector = drbd_md_ss(mdev) + MD_BM_OFFSET + so;
 		so++;
 
-		drbd_set_md_bh(mdev, &mdev->md_io_bh, sector, 512);
-		clear_bit(BH_Uptodate, &mdev->md_io_bh.b_state);
-		set_bit(BH_Lock, &mdev->md_io_bh.b_state);
-		mdev->md_io_bh.b_end_io = drbd_generic_end_io;
-		generic_make_request(READ,&mdev->md_io_bh);
-		wait_on_buffer(&mdev->md_io_bh);
+		drbd_md_prepare_read(mdev, sector);
+		drbd_generic_make_request_wait(READ,&mdev->md_io_bio);
 
-		buffer = (unsigned long *)bh_kmap(&mdev->md_io_bh);
+		buffer = (unsigned long *)drbd_bio_kmap(&mdev->md_io_bio);
 
 		for(buf_i=0;buf_i<want;buf_i++) {
 			word = lel_to_cpu(buffer[buf_i]);
 			bits += hweight_long(word);
 			bm[bm_i++] = word;
 		}
-		bh_kunmap(&mdev->md_io_bh);
+		drbd_bio_kunmap(&mdev->md_io_bio);
 	}
 
 	up(&mdev->md_io_mutex);
@@ -438,19 +426,6 @@
 	     mdev->rs_total/2);
 }
 
-STATIC void drbd_async_eio(struct buffer_head *bh, int uptodate)
-{
-	struct Drbd_Conf *mdev;
-
-	mdev = container_of(bh,struct Drbd_Conf,md_io_bh);
-	PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
-
-	mark_buffer_uptodate(bh, uptodate);
-	unlock_buffer(bh);
-	up(&mdev->md_io_mutex);
-}
-
-
 #define BM_WORDS_PER_EXTENT ( (AL_EXTENT_SIZE/BM_BLOCK_SIZE) / BITS_PER_LONG )
 #define BM_BYTES_PER_EXTENT ( (AL_EXTENT_SIZE/BM_BLOCK_SIZE) / 8 )
 #define EXTENTS_PER_SECTOR  ( 512 / BM_BYTES_PER_EXTENT )
@@ -475,24 +450,23 @@
 	want=min_t(int,512/sizeof(long),bm_words-bm_i);
 
 	down(&mdev->md_io_mutex); // protects md_io_buffer
-	buffer = (unsigned long *)bh_kmap(&mdev->md_io_bh);
+	buffer = (unsigned long *)drbd_bio_kmap(&mdev->md_io_bio);
 
 	for(buf_i=0;buf_i<want;buf_i++) {
 		buffer[buf_i] = cpu_to_lel(bm[bm_i++]);
 	}
 
-	bh_kunmap(&mdev->md_io_bh);
+	drbd_bio_kunmap(&mdev->md_io_bio);
 
 	sector = drbd_md_ss(mdev) + MD_BM_OFFSET + enr/EXTENTS_PER_SECTOR;
 
-	drbd_set_md_bh(mdev, &mdev->md_io_bh, sector, 512);
-	set_bit(BH_Dirty, &mdev->md_io_bh.b_state);
-	set_bit(BH_Lock, &mdev->md_io_bh.b_state);
-	mdev->md_io_bh.b_end_io = sync ? drbd_generic_end_io : drbd_async_eio;
-	generic_make_request(WRITE,&mdev->md_io_bh);
+	drbd_md_prepare_write(mdev,sector);
 	if(sync) {
-		wait_on_buffer(&mdev->md_io_bh);
+		drbd_generic_make_request_wait(WRITE,&mdev->md_io_bio);
 		up(&mdev->md_io_mutex);
+	} else {
+		drbd_bio_set_end_io(&mdev->md_io_bio,drbd_async_eio);
+		drbd_generic_make_request(WRITE,&mdev->md_io_bio);
 	}
 
 	mdev->bm_writ_cnt++;
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/Attic/drbd_dsender.c,v
retrieving revision 1.1.2.58
retrieving revision 1.1.2.59
diff -u -3 -r1.1.2.58 -r1.1.2.59
--- drbd_dsender.c	21 Jan 2004 15:16:20 -0000	1.1.2.58
+++ drbd_dsender.c	21 Jan 2004 15:40:09 -0000	1.1.2.59
@@ -44,11 +44,46 @@
 #include "drbd.h"
 #include "drbd_int.h"
 
-void enslaved_read_bh_end_io(struct buffer_head *bh, int uptodate)
+
+/* I choose to have all block layer end_io handlers defined here.
+
+ * For all these callbacks, note the follwing:
+ * The callbacks will be called in irq context by the IDE drivers,
+ * and in Softirqs/Tasklets/BH context by the SCSI drivers.
+ * Try to get the locking right :)
+ *
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+
+/* used for synchronous meta data and bitmap IO
+ * submitted by FIXME (I'd say worker only, but currently this is not true...)
+ */
+void drbd_generic_end_io(struct buffer_head *bh, int uptodate)
+{ // This is a rough copy of end_buffer_io_sync
+	mark_buffer_uptodate(bh, uptodate);
+	unlock_buffer(bh);
+}
+
+/* used for asynchronous meta data and bitmap IO
+ * submitted by FIXME (I'd say worker only, but currently this is not true...)
+ */
+void drbd_async_eio(drbd_bio_t *bh, int uptodate)
+{
+	struct Drbd_Conf *mdev;
+
+	mdev = container_of(bh,struct Drbd_Conf,md_io_bio);
+	PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
+
+	mark_buffer_uptodate(bh, uptodate);
+	unlock_buffer(bh);
+	up(&mdev->md_io_mutex);
+}
+
+/* reads on behalf of the partner,
+ * "submitted" by the receiver
+ */
+void enslaved_read_bi_end_io(drbd_bio_t *bh, int uptodate)
 {
-	/* This callback will be called in irq context by the IDE drivers,
-	   and in Softirqs/Tasklets/BH context by the SCSI drivers.
-	   Try to get the locking right :) */
 	unsigned long flags=0;
 	struct Tl_epoch_entry *e=NULL;
 	struct Drbd_Conf* mdev;
@@ -56,7 +91,7 @@
 	mdev=bh->b_private;
 	PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
 
-	e = container_of(bh,struct Tl_epoch_entry,pbh);
+	e = container_of(bh,struct Tl_epoch_entry,private_bio);
 	PARANOIA_BUG_ON(!VALID_POINTER(e));
 	D_ASSERT(e->block_id != ID_VACANT);
 
@@ -72,6 +107,68 @@
 	drbd_queue_work(mdev,&mdev->data.work,&e->w);
 }
 
+/* writes on behalf of the partner, or resync writes,
+ * "submitted" by the receiver.
+ */
+void drbd_dio_end_sec(struct buffer_head *bh, int uptodate)
+{
+	unsigned long flags=0;
+	struct Tl_epoch_entry *e=NULL;
+	struct Drbd_Conf* mdev;
+
+	mdev=bh->b_private;
+	PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
+
+	e = container_of(bh,struct Tl_epoch_entry,private_bio);
+	PARANOIA_BUG_ON(!VALID_POINTER(e));
+	D_ASSERT(e->block_id != ID_VACANT);
+
+	spin_lock_irqsave(&mdev->ee_lock,flags);
+
+	mark_buffer_uptodate(bh, uptodate);
+
+	clear_bit(BH_Dirty, &bh->b_state);
+	clear_bit(BH_Lock, &bh->b_state);
+	smp_mb__after_clear_bit();
+
+	list_del(&e->w.list);
+	list_add(&e->w.list,&mdev->done_ee);
+
+	if (waitqueue_active(&mdev->ee_wait) &&
+	    (list_empty(&mdev->active_ee) ||
+	     list_empty(&mdev->sync_ee)))
+		wake_up(&mdev->ee_wait);
+
+	spin_unlock_irqrestore(&mdev->ee_lock,flags);
+
+	if( mdev->do_panic && !uptodate) {
+		drbd_panic(DEVICE_NAME": The lower-level device had an error.\n");
+	}
+
+	wake_asender(mdev);
+}
+
+/* writes on Primary comming from drbd_make_request
+ */
+void drbd_dio_end(struct buffer_head *bh, int uptodate)
+{
+	struct Drbd_Conf* mdev;
+	drbd_request_t *req;
+
+	mdev = bh->b_private;
+	PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
+
+	req = container_of(bh,struct drbd_request,private_bio);
+	PARANOIA_BUG_ON(!VALID_POINTER(req));
+
+	drbd_end_req(req, RQ_DRBD_WRITTEN, uptodate, drbd_req_get_sector(req));
+	drbd_al_complete_io(mdev,drbd_req_get_sector(req));
+}
+
+#else
+# error "FIXME"
+#endif
+
 int w_resync_inactive(drbd_dev *mdev, struct drbd_work *w)
 {
 	ERR("resync inactive, but callback triggered??\n");
@@ -85,7 +182,7 @@
 	drbd_queue_work(mdev,&mdev->data.work,&mdev->resync_work);
 }
 
-STATIC int w_make_resync_request(drbd_dev* mdev, struct drbd_work* w)
+int w_make_resync_request(drbd_dev* mdev, struct drbd_work* w)
 {
 	struct Pending_read *pr;
 	sector_t sector;
@@ -188,7 +285,7 @@
 	struct Tl_epoch_entry *e = (struct Tl_epoch_entry*)w;
 	int ok;
 
-	drbd_rs_complete_io(mdev,DRBD_BH_SECTOR(&e->pbh));
+	drbd_rs_complete_io(mdev,drbd_ee_get_sector(e));
 	inc_rs_pending(mdev);
 	ok=drbd_send_block(mdev, DataReply, e);
 	dec_unacked(mdev,HERE); // THINK unconditional?
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_fs.c,v
retrieving revision 1.28.2.47
retrieving revision 1.28.2.48
diff -u -3 -r1.28.2.47 -r1.28.2.48
--- drbd_fs.c	21 Jan 2004 15:16:20 -0000	1.28.2.47
+++ drbd_fs.c	21 Jan 2004 15:40:09 -0000	1.28.2.48
@@ -50,6 +50,7 @@
 #include <linux/blkpg.h>
 #endif
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
 STATIC enum { NotMounted=0,MountedRO,MountedRW } drbd_is_mounted(int minor)
 {
        struct super_block *sb;
@@ -65,8 +66,15 @@
        drop_super(sb);
        return MountedRW;
 }
+#endif
 
 /* Returns 1 if there is a disk-less node, 0 if both nodes have a disk. */
+/*
+ * THINK do we want the size to be KB or sectors ?
+ * note, *_capacity operates in 512 byte sectors!!
+ *
+ * currently *_size is in KB.
+ */
 int drbd_determin_dev_size(struct Drbd_Conf* mdev)
 {
 	unsigned long p_size = mdev->p_size;  // partner's disk size.
@@ -74,13 +82,13 @@
 	unsigned long m_size; // my size
 	unsigned long u_size = mdev->lo_usize; // size requested by user.
 	unsigned long size=0;
-	kdev_t ll_dev = mdev->lo_device;
-	int rv,minor=(int)(mdev-drbd_conf);
+	int rv;
 
-	m_size = ll_dev ? blk_size[MAJOR(ll_dev)][MINOR(ll_dev)] : 0;
+	m_size = drbd_get_lo_capacity(mdev)>>1;
 
-	if( mdev->md_index == -1 && m_size) {// internal metadata
-		m_size = m_size - MD_RESERVED_SIZE;
+	if (mdev->md_index == -1 && m_size) {// internal metadata
+		D_ASSERT(m_size > MD_RESERVED_SIZE);
+		m_size = drbd_md_ss(mdev)>>1;
 	}
 
 	if(p_size && m_size) {
@@ -111,13 +119,13 @@
 		}
 	}
 
-	if( blk_size[MAJOR_NR][minor] != size ) {
+	if( drbd_get_my_capacity(mdev) != size ) {
 		if(bm_resize(mdev->mbds_id,size)) {
-			blk_size[MAJOR_NR][minor] = size;
+			drbd_set_my_capacity(mdev,size<<1);
 			mdev->la_size = size;
 			INFO("size = %lu KB\n",size);
 		}
-		// FIXME else { error handling }
+#warning "FIXME else { error handling }"
 	}
 
 	return rv;
@@ -127,13 +135,15 @@
 int drbd_ioctl_set_disk(struct Drbd_Conf *mdev,
 			struct ioctl_disk_config * arg)
 {
-	int err,i,minor;
+	int err,i; // unused in 26 ?? cannot believe it ...
+	int minor;
 	enum ret_codes retcode;
 	struct disk_config new_conf;
 	struct file *filp = 0;
 	struct file *filp2 = 0;
 	struct inode *inode;
-	kdev_t ll_dev;
+	NOT_IN_26(kdev_t ll_dev);
+	ONLY_IN_26(struct block_device *bdev);
 
 	/*
 	if (!capable(CAP_SYS_ADMIN)) //MAYBE: Move this to the drbd_ioctl()
@@ -156,6 +166,21 @@
 
 	inode = filp->f_dentry->d_inode;
 
+	if (!S_ISBLK(inode->i_mode)) {
+		retcode=LDNoBlockDev;
+		goto fail_ioctl;
+	}
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	bdev = inode->i_bdev;
+	if (bd_claim(bdev, &mdev)) {
+		retcode=LDMounted;
+		goto fail_ioctl;
+	}
+#warning "FIXME sync ll-dev, check size"
+#warning "FIXME meta-device"
+	mdev->backing_device = bdev;
+#else
 	for(i=0;i<minor_count;i++) {
 		if( i != minor &&
 		    inode->i_rdev == drbd_conf[i].lo_device) {
@@ -164,11 +189,6 @@
 		}
 	}
 
-	if (!S_ISBLK(inode->i_mode)) {
-		retcode=LDNoBlockDev;
-		goto fail_ioctl;
-	}
-
 	if (drbd_is_mounted(inode->i_rdev)) {
 		WARN("can not configure %d:%d, has active inodes!\n",
 		     MAJOR(inode->i_rdev), MINOR(inode->i_rdev));
@@ -185,7 +205,7 @@
 
 	ll_dev = inode->i_rdev;
 
-	if (blk_size[MAJOR(ll_dev)][MINOR(ll_dev)] < new_conf.disk_size) {
+	if ((drbd_get_lo_capacity(mdev)>>1) < new_conf.disk_size) {
 		retcode = LDDeviceTooSmall;
 		blkdev_put(filp->f_dentry->d_inode->i_bdev,BDEV_FILE);
 		goto fail_ioctl;
@@ -215,17 +235,19 @@
 	}
 
 	fsync_dev(MKDEV(MAJOR_NR, minor));
+#endif
+
 	drbd_thread_stop(&mdev->worker);
 	drbd_thread_stop(&mdev->asender);
 	drbd_thread_stop(&mdev->receiver);
 	drbd_free_resources(mdev);
 
-	mdev->md_device = inode->i_rdev;
-	mdev->md_file = filp2;
+	NOT_IN_26( mdev->md_device = inode->i_rdev; )
+	mdev->md_file  = filp2;
 	mdev->md_index = new_conf.meta_index;
 
-	mdev->lo_device = ll_dev;
-	mdev->lo_file = filp;
+	NOT_IN_26( mdev->lo_device = ll_dev; )
+	mdev->lo_file  = filp;
 	mdev->lo_usize = new_conf.disk_size;
 	mdev->do_panic = new_conf.do_panic;
 
@@ -258,6 +280,7 @@
 		drbd_al_to_on_disk_bm(mdev);
 	}
 
+#warning "FIXME introduce drbd_set_blocksize"
 	set_blocksize(MKDEV(MAJOR_NR, minor), INITIAL_BLOCK_SIZE);
 	set_blocksize(mdev->lo_device, INITIAL_BLOCK_SIZE);
 
@@ -277,6 +300,7 @@
 {
 	struct ioctl_get_config cn;
 
+#warning "FIXME make 26 clean, maybe move to compat layer?"
 	cn.cstate=mdev->cstate;
 	cn.lower_device_major=MAJOR(mdev->lo_device);
 	cn.lower_device_minor=MINOR(mdev->lo_device);
@@ -354,8 +378,9 @@
 	 * XXX maybe rather store the value scaled to jiffies?
 	 * Note: MAX_SCHEDULE_TIMEOUT/HZ*HZ != MAX_SCHEDULE_TIMEOUT
 	 *       and HZ > 10; which is unlikely to change...
-	 *       Thus, if interrupted by a signal, or the timeout,
-	 *       sock_{send,recv}msg returns -EINTR.
+	 *       Thus, if interrupted by a signal,
+	 *       sock_{send,recv}msg returns -EINTR,
+	 *       if the timeout expires, -EAGAIN.
 	 */
 	// unlikely: someone disabled the timeouts ...
 	// just put some huge values in there.
@@ -521,12 +546,12 @@
 	 */
 	switch (cmd) {
 	case BLKGETSIZE:
-		err = put_user(blk_size[MAJOR_NR][minor]<<1, (long *)arg);
+		err = put_user(drbd_get_my_capacity(mdev), (long *)arg);
 		break;
 
 #ifdef BLKGETSIZE64
 	case BLKGETSIZE64: /* see ./drivers/block/loop.c */
-		err = put_user((u64)blk_size[MAJOR_NR][minor]<<10, (u64*)arg);
+		err = put_user((u64)drbd_get_my_capacity(mdev)<<9, (u64*)arg);
 		break;
 #endif
 
@@ -608,7 +633,7 @@
 		drbd_free_resources(mdev);
 		if (mdev->mbds_id) {
 			bm_resize(mdev->mbds_id,0);
-			blk_size[MAJOR_NR][minor] = 0;
+			drbd_set_my_capacity(mdev,0);
 		}
 
 		set_cstate(mdev,Unconfigured);
@@ -667,7 +692,7 @@
 		}
 
 		bm_fill_bm(mdev->mbds_id,-1);
-		mdev->rs_total=blk_size[MAJOR_NR][minor]<<1;
+		mdev->rs_total = drbd_get_my_capacity(mdev);
 		drbd_write_bm(mdev);
 		drbd_send_short_cmd(mdev,BecomeSyncSource);
 		drbd_start_resync(mdev,SyncTarget);
@@ -680,7 +705,7 @@
 		}
 
 		bm_fill_bm(mdev->mbds_id,-1);
-		mdev->rs_total=blk_size[MAJOR_NR][minor]<<1;
+		mdev->rs_total = drbd_get_my_capacity(mdev);
 		drbd_write_bm(mdev);
 		drbd_send_short_cmd(mdev,BecomeSyncTarget);
 		drbd_start_resync(mdev,SyncSource);
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_int.h,v
retrieving revision 1.58.2.100
retrieving revision 1.58.2.101
diff -u -3 -r1.58.2.100 -r1.58.2.101
--- drbd_int.h	21 Jan 2004 14:59:05 -0000	1.58.2.100
+++ drbd_int.h	21 Jan 2004 15:40:09 -0000	1.58.2.101
@@ -42,7 +42,7 @@
    left on all_requests...
    look out for NBD_MAJOR in ll_rw_blk.c */
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
 /*lge: this hack is to get rid of the compiler warnings about
  * 'do_nbd_request declared static but never defined'
  * whilst forcing blk.h defines on
@@ -102,11 +102,11 @@
  *************************/
 
 // handy macro: DUMPP(somepointer)
-#define DUMPP(A) ERR( #A " = %p in %s:%d\n",(A),__FILE__,__LINE__);
-#define DUMPLU(A) ERR( #A " = %lu in %s:%d\n",(A),__FILE__,__LINE__);
+#define DUMPP(A)   ERR( #A " = %p in %s:%d\n",  (A),__FILE__,__LINE__);
+#define DUMPLU(A)  ERR( #A " = %lu in %s:%d\n", (A),__FILE__,__LINE__);
 #define DUMPLLU(A) ERR( #A " = %llu in %s:%d\n",(A),__FILE__,__LINE__);
-#define DUMPLX(A) ERR( #A " = %lx in %s:%d\n",(A),__FILE__,__LINE__);
-#define DUMPI(A) ERR( #A " = %d in %s:%d\n",(A),__FILE__,__LINE__);
+#define DUMPLX(A)  ERR( #A " = %lx in %s:%d\n", (A),__FILE__,__LINE__);
+#define DUMPI(A)   ERR( #A " = %d in %s:%d\n",  (A),__FILE__,__LINE__);
 
 
 // Info: do not remove the spaces around the "," before ##
@@ -160,46 +160,7 @@
  * Compatibility Section
  *************************/
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
-typedef unsigned long sector_t;
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,7)
-#define completion semaphore
-#define init_completion(A) init_MUTEX_LOCKED(A)
-#define wait_for_completion(A) down(A)
-#define complete(A) up(A)
-#else
-#include <linux/completion.h>
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,10)
-#define min_t(type,x,y) \
-	({ type __x = (x); type __y = (y); __x < __y ? __x: __y; })
-#define max_t(type,x,y) \
-	({ type __x = (x); type __y = (y); __x > __y ? __x: __y; })
-#define MODULE_LICENSE(L)
-#endif
-
-#if !defined(CONFIG_HIGHMEM) && !defined(bh_kmap)
-#define bh_kmap(bh)	((bh)->b_data)
-#define bh_kunmap(bh)	do { } while (0)
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,19)
-#define BH_Launder BH_launder
-#endif
-
-#ifndef list_for_each
-#define list_for_each(pos, head) \
-	for(pos = (head)->next; pos != (head); pos = pos->next)
-#endif
-
-#if defined(DBG_SPINLOCKS) && defined(__SMP__)
-# define MUST_HOLD(lock) if(!spin_is_locked(lock)) { ERR("Not holding lock! in %s\n", __FUNCTION__ ); }
-#else
-# define MUST_HOLD(lock)
-#endif
+#include "drbd_compat_types.h"
 
 #ifdef SIGHAND_HACK
 # define LOCK_SIGMASK(task,flags)   spin_lock_irqsave(&task->sighand->siglock, flags)
@@ -232,14 +193,8 @@
 })
 #endif
 
-// THINK: x->magic = &x; ??
-#define SET_MAGIC(x) ((x)->magic = (int)(x) ^ DRBD_MAGIC)
-// For some optimization crap, please test for NULL explicitly,
-//	and not in this macro!
-// #define VALID_POINTER(x) ((x) && (x)->magic == DRBD_MAGIC)
-// #define VALID_POINTER(x) ((x)->magic == DRBD_MAGIC)
-// hopefully this works:
-#define VALID_POINTER(x) ((x) ? (((x)->magic ^ DRBD_MAGIC) == (int)(x)):0)
+#define SET_MAGIC(x)       ((x)->magic = (int)(x) ^ DRBD_MAGIC)
+#define VALID_POINTER(x)   ((x) ? (((x)->magic ^ DRBD_MAGIC) == (int)(x)):0)
 #define INVALIDATE_MAGIC(x) (x->magic--)
 
 #define SET_MDEV_MAGIC(x) \
@@ -251,18 +206,15 @@
 
 
 /*
- * GFP_DRBD is used for allocations inside drbd_do_request.
+ * GFP_DRBD is used for allocations inside drbd_make_request,
+ * and for the sk->allocation scheme.
  *
- * 2.4 kernels will probably remove the __GFP_IO check in the VM code,
- * so lets use GFP_ATOMIC for allocations.  For 2.2, we abuse the GFP_BUFFER
- * flag to avoid __GFP_IO, thus avoiding the use of the atomic queue and
- *  avoiding the deadlock.
+ * Try to get away with GFP_NOIO, which is
+ * in 2.4.x:	(__GFP_HIGH | __GFP_WAIT) // HIGH == EMERGENCY, not HIGHMEM!
+ * in 2.6.x:	             (__GFP_WAIT)
  *
- * - marcelo
- *
- * try to get away with GFP_NOIO, which is (GFP_ATOMIC | __GFP_WAIT)
- * as far as i can see we do not allocate from interrupt context...
- * needs to be tested under memory pressure, though.
+ * As far as i can see we do not allocate from interrupt context...
+ * if we do, we certainly should fix that.
  * - lge
  */
 #define GFP_DRBD GFP_NOIO
@@ -383,6 +335,7 @@
 	}
 }
 
+
 /* This is the layout for a packet on the wire.
  * The byteorder is the network byte order.
  *     (except block_id and barrier fields.
@@ -538,8 +491,8 @@
 	int magic;
 	int rq_status;
 	struct drbd_barrier *barrier; // The next barrier.
-	struct buffer_head *bh;       // master buffer head pointer
-	struct buffer_head  pbh;      // private buffer head struct
+	drbd_bio_t *master_bio;       // master bio pointer
+	drbd_bio_t private_bio;       // private bio struct
 };
 
 struct drbd_barrier {
@@ -570,12 +523,12 @@
  * TODO
  * I'd like to "drop" the free list altogether, since we use mempools, which
  * are designed for this. We probably would still need a private "page pool"
- * to set the bh.b_page from.
+ * to do the "bio_add_page" from.
  *	-lge
  */
 struct Tl_epoch_entry {
 	struct drbd_work    w;
-	struct buffer_head  pbh; // private buffer head struct, NOT a pointer
+	drbd_bio_t private_bio; // private bio struct, NOT a pointer
 	u64    block_id;
 	int magic;
 };
@@ -584,7 +537,7 @@
 	struct drbd_work w;
 	int magic;
 	union {
-		struct buffer_head* bh;
+		drbd_bio_t *master_bio;
 		sector_t sector;
 	} d;
 	enum {
@@ -671,9 +624,17 @@
 	volatile unsigned long last_received; // in jiffies, either socket
 	struct drbd_work  resync_work;
 	struct timer_list resync_timer;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
 	kdev_t lo_device;         // backing device
-	struct file *lo_file;
 	kdev_t md_device;         // device for meta-data.
+#else
+	struct block_device *backing_bdev;
+	struct block_device *md_bdev;
+	struct gendisk      *vdisk;
+	request_queue_t     *rq_queue;
+#endif
+	// THINK is this the same in 2.6.x ??
+	struct file *lo_file;
 	struct file *md_file;
 	int md_index;
 	unsigned long lo_usize;   /* user provided size */
@@ -727,8 +688,8 @@
 	int ee_in_use;
 	wait_queue_head_t ee_wait;
 	struct list_head busy_blocks;
-	struct tq_struct write_hint_tq;
-	struct buffer_head md_io_bh; // a (one page) Byte buffer for md_io
+	NOT_IN_26(struct tq_struct write_hint_tq);
+	drbd_bio_t md_io_bio; // a (one page) Byte buffer for md_io
 	struct semaphore md_io_mutex; // protects the md_io_buffer
 	spinlock_t al_lock;
 	wait_queue_head_t al_wait;
@@ -764,6 +725,8 @@
 			   u32 set_size);
 extern int drbd_send_ack(drbd_dev *mdev, Drbd_Packet_Cmd cmd,
 			 struct Tl_epoch_entry *e);
+extern int _drbd_send_page(drbd_dev *mdev, struct page *page,
+			   int offset, size_t size);
 extern int drbd_send_block(drbd_dev *mdev, Drbd_Packet_Cmd cmd,
 			   struct Tl_epoch_entry *e);
 extern int drbd_send_dblock(drbd_dev *mdev, drbd_request_t *req);
@@ -775,7 +738,6 @@
 extern int drbd_send_bitmap(drbd_dev *mdev);
 
 // drbd_meta-data.c (still in drbd_main.c)
-extern void drbd_generic_end_io(struct buffer_head *bh, int uptodate);
 extern void drbd_md_write(drbd_dev *mdev);
 extern void drbd_md_read(drbd_dev *mdev);
 extern void drbd_md_inc(drbd_dev *mdev, enum MetaDataIndex order);
@@ -808,21 +770,6 @@
 #define MD_AL_MAX_SIZE 64   // = 32 kb LOG  ~ 3776 extents ~ 14 GB Storage
 #define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE) //Allows up to about 3.8TB
 
-/* Returns the number of kb of a block device. */
-static inline unsigned long capacity(kdev_t dev) {
-	return blk_size[MAJOR(dev)][MINOR(dev)];
-}
-
-/* Returns the start sector for metadata */
-static inline sector_t drbd_md_ss(drbd_dev *mdev) {
-	if( mdev->md_index == -1 ) {
-		return ( (capacity(mdev->lo_device) & ~3L) -
-			 MD_RESERVED_SIZE ) * 2;
-	} else {
-		return 2 * MD_RESERVED_SIZE * mdev->md_index;
-	}
-}
-
 #if BITS_PER_LONG == 32
 #define LN2_BPL 5
 #define cpu_to_lel(A) cpu_to_le32(A)
@@ -872,7 +819,6 @@
 
 // drbd_dsender.c
 extern int drbd_worker(struct Drbd_thread *thi);
-extern void enslaved_read_bh_end_io(struct buffer_head *bh, int uptodate);
 extern void drbd_alter_sg(drbd_dev *mdev, int ng);
 extern void drbd_start_resync(drbd_dev *mdev, Drbd_CState side);
 // worker callbacks
@@ -997,6 +943,8 @@
  * inline helper functions
  *************************/
 
+#include "drbd_compat_wrappers.h"
+
 static inline void set_cstate(drbd_dev* mdev,Drbd_CState ns)
 {
 	unsigned long flags;
@@ -1005,7 +953,7 @@
 	spin_unlock_irqrestore(&mdev->req_lock,flags);
 }
 
-static inline void 
+static inline void
 _drbd_dequeue_work(struct drbd_work_queue *q, struct drbd_work *w)
 {
 	if(!list_empty(&w->list)) {
@@ -1128,73 +1076,6 @@
 	mdev->rs_total +=
 		bm_set_bit(mdev, sector, blk_size, SS_OUT_OF_SYNC);
 }
-
-static inline void drbd_init_bh(struct buffer_head *bh,
-				int size)
-{
-	memset(bh, 0, sizeof(struct buffer_head));
-
-	bh->b_list = BUF_LOCKED;
-	init_waitqueue_head(&bh->b_wait);
-	bh->b_size = size;
-	atomic_set(&bh->b_count, 1);
-	bh->b_state = (1 << BH_Mapped ); //has a disk mapping = dev & blocknr
-}
-
-
-static inline void drbd_set_md_bh(drbd_dev *mdev,
-				  struct buffer_head *bh,
-				  sector_t sector,
-				  int size)
-{
-	bh->b_blocknr = sector;  // We abuse b_blocknr here.
-	bh->b_size = size;
-	// bh->b_dev = 0xab00 | (int)(mdev-drbd_conf);  // DRBD's magic mark
-	bh->b_private = mdev;
-
-	// we skip submit_bh, but use generic_make_request.
-	set_bit(BH_Req, &bh->b_state);
-	set_bit(BH_Launder, &bh->b_state);
-	bh->b_rdev = mdev->md_device;
-	bh->b_rsector = sector;
-}
-
-static inline void drbd_set_bh(drbd_dev *mdev,
-			       struct buffer_head *bh,
-			       sector_t sector,
-			       int size)
-{
-	bh->b_blocknr = sector;  // We abuse b_blocknr here.
-	bh->b_size = size;
-	// bh->b_dev = 0xab00 | (int)(mdev-drbd_conf);  // DRBD's magic mark
-	bh->b_private = mdev;
-
-	// we skip submit_bh, but use generic_make_request.
-	set_bit(BH_Req, &bh->b_state);
-	set_bit(BH_Launder, &bh->b_state);
-	bh->b_rdev = mdev->lo_device;
-	bh->b_rsector = sector;
-}
-
-#ifdef DBG_BH_SECTOR
-static inline sector_t DRBD_BH_SECTOR(struct buffer_head *bh)
-{
-	if(!IS_VALID_MDEV(bh->b_private)) {
-		printk(KERN_ERR DEVICE_NAME" !IS_VALID_MDEV(bh->b_private)\n");
-	}
-	return bh->b_blocknr;
-}
-static inline sector_t APP_BH_SECTOR(struct buffer_head *bh)
-{
-	if(IS_VALID_MDEV(bh->b_private)) {
-		printk(KERN_ERR DEVICE_NAME" IS_VALID_MDEV(bh->b_private)\n");
-	}
-	return bh->b_blocknr * (bh->b_size>>9) ;
-}
-#else
-# define DRBD_BH_SECTOR(BH) ( (BH)->b_blocknr )
-# define APP_BH_SECTOR(BH)  ( (BH)->b_blocknr * ((BH)->b_size>>9) )
-#endif
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
 # if (BITS_PER_LONG > 32)
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_main.c,v
retrieving revision 1.73.2.102
retrieving revision 1.73.2.103
diff -u -3 -r1.73.2.102 -r1.73.2.103
--- drbd_main.c	21 Jan 2004 14:59:05 -0000	1.73.2.102
+++ drbd_main.c	21 Jan 2004 15:40:09 -0000	1.73.2.103
@@ -75,7 +75,6 @@
 #endif
 
 int drbdd_init(struct Drbd_thread*);
-//int drbd_dsender(struct Drbd_thread*);
 int drbd_worker(struct Drbd_thread*);
 int drbd_asender(struct Drbd_thread*);
 
@@ -105,8 +104,13 @@
 // global panic flag
 volatile int drbd_did_panic = 0;
 
+/* in 2.6.x, our device mapping and config info contains our virtual gendisks
+ * as member struct gendisk vdisk;
+ */
+NOT_IN_26(
 STATIC int *drbd_blocksizes;
 STATIC int *drbd_sizes;
+);
 struct Drbd_Conf *drbd_conf;
 kmem_cache_t *drbd_request_cache;
 kmem_cache_t *drbd_pr_cache;
@@ -133,6 +137,7 @@
 	struct drbd_barrier *b;
 
 	b=kmalloc(sizeof(struct drbd_barrier),GFP_KERNEL);
+	// FIXME no mem ;-)
 	INIT_LIST_HEAD(&b->requests);
 	b->next=0;
 	b->br_number=4711;
@@ -259,12 +264,14 @@
 			r = list_entry(le, struct drbd_request,w.list);
 			if( (r->rq_status&0xfffe) != RQ_DRBD_SENT ) {
 				drbd_end_req(r,RQ_DRBD_SENT,ERF_NOTLD|1,
-					     r->pbh.b_blocknr);
+					     drbd_req_get_sector(r));
 				goto mark;
 			}
 			if(mdev->conf.wire_protocol != DRBD_PROT_C ) {
 			mark:
-				drbd_set_out_of_sync(mdev,r->pbh.b_blocknr,r->pbh.b_size);
+				drbd_set_out_of_sync(mdev
+				,	drbd_req_get_sector(r)
+				,	drbd_req_get_size(r));
 			}
 		}
 		f=b;
@@ -281,6 +288,11 @@
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,14)
 // Check when daemonize was introduced.
+/* NOTE: seems like all 2.4.X have it, so it should be 2,4,0 above.
+ * in 2.4.6 is is prototyped as
+ * void daemonize(const char *name, ...)
+ * though, so maybe we want to do this for 2.4.x already, too.
+ */
 void daemonize(void)
 {
 	struct fs_struct *fs;
@@ -301,6 +313,14 @@
 }
 #endif
 
+void drbd_daemonize(void) {
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+	daemonize("drbd_thread");
+#else
+	daemonize();
+#endif
+}
+
 void _set_cstate(drbd_dev* mdev,Drbd_CState ns)
 {
 	Drbd_CState os;
@@ -320,7 +340,7 @@
 	struct Drbd_thread *thi = (struct Drbd_thread *) arg;
 	int retval;
 
-	daemonize();
+	drbd_daemonize();
 
 	down(&thi->mutex); //ensures that thi->task is set.
 
@@ -520,14 +540,12 @@
 {
 	Drbd_Parameter_Packet p;
 	int ok,i;
-	kdev_t ll_dev = mdev->lo_device;
-	unsigned long m_size=0; // sector_t ??
+	unsigned long m_size; // sector_t ??
 
-	if(ll_dev) {
-		m_size = blk_size[MAJOR(ll_dev)][MINOR(ll_dev)];
-		if( mdev->md_index == -1 ) {// internal metadata
-			m_size = m_size - MD_RESERVED_SIZE;
-		}
+	m_size = drbd_get_lo_capacity(mdev)>>1;
+	if (mdev->md_index == -1 ) {// internal metadata
+		D_ASSERT(m_size > MD_RESERVED_SIZE);
+		m_size = drbd_md_ss(mdev)>>1;
 	}
 
 	p.u_size = cpu_to_be64(mdev->lo_usize);
@@ -613,9 +631,9 @@
 	int ok;
 	Drbd_BlockAck_Packet p;
 
-	p.sector   = cpu_to_be64(e->pbh.b_blocknr);
+	p.sector   = cpu_to_be64(drbd_ee_get_sector(e));
 	p.block_id = e->block_id;
-	p.blksize  = cpu_to_be32(e->pbh.b_size);
+	p.blksize  = cpu_to_be32(drbd_ee_get_size(e));
 
 	// YES, this happens. There is some race with the syncer!
 	if ((unsigned long)e->block_id <= 1) {
@@ -665,28 +683,19 @@
 	return FALSE;
 }
 
-int _drbd_send_zc_bh(drbd_dev *mdev, struct buffer_head *bh)
+int _drbd_send_page(drbd_dev *mdev, struct page *page,
+		    int offset, size_t size)
 {
 	int sent,ok;
-	struct page *page = bh->b_page;
-	size_t size = bh->b_size;
-	int offset;
+	int len   = size;
 	int retry = 10;
 
 	spin_lock(&mdev->send_task_lock);
 	mdev->send_task=current;
 	spin_unlock(&mdev->send_task_lock);
 
-	/*
-	 * CAUTION I do not yet understand this completely.
-	 * I thought I have to kmap the page first... ?
-	 */
-	if (PageHighMem(page))
-		offset = (int)bh->b_data;
-	else
-		offset = (int)bh->b_data - (int)page_address(page);
 	do {
-		sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page, offset, size, MSG_NOSIGNAL);
+		sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page, offset, len, MSG_NOSIGNAL);
 		if (sent == -EAGAIN) {
 			// FIXME move "retry--" into drbd_retry_send()
 			if (drbd_retry_send(mdev,mdev->data.socket) && retry--)
@@ -694,22 +703,22 @@
 			else
 				break;
 		}
-		if (sent <= 0) break;
-		size   -= sent;
+		if (sent <= 0) {
+			WARN("%s: size=%d len=%d sent=%d\n",__func__,size,len,sent);
+			break;
+		}
+		len    -= sent;
 		offset += sent;
 		// FIXME test "last_received" ...
-	} while(size > 0 /* THINK && mdev->cstate >= Connected*/);
+	} while(len > 0 /* THINK && mdev->cstate >= Connected*/);
 
 	spin_lock(&mdev->send_task_lock);
 	mdev->send_task=NULL;
 	spin_unlock(&mdev->send_task_lock);
 
-	if (sent < 0)
-		WARN("%s: size=%d sent==%d\n",__func__,size,sent);
-
-	ok = (size == 0);
-	if(likely(ok))
-		mdev->send_cnt+=bh->b_size>>9;
+	ok = (len == 0);
+	if (likely(ok))
+		mdev->send_cnt += size>>9;
 	return ok;
 }
 
@@ -720,15 +729,14 @@
 	sigset_t old_blocked;
 	Drbd_Data_Packet p;
 
-	ERR_IF(!req || !req->bh) return FALSE;
-	ERR_IF(req->bh->b_reqnext != NULL) return FALSE;
+	ERR_IF(!req || !req->master_bio) return FALSE;
 
 	p.head.magic   = BE_DRBD_MAGIC;
 	p.head.command = cpu_to_be16(Data);
 	p.head.length  = cpu_to_be16( sizeof(p)-sizeof(Drbd_Header)
-				     + req->bh->b_size );
+				     + drbd_req_get_size(req) );
 
-	p.sector   = cpu_to_be64(req->bh->b_rsector);
+	p.sector   = cpu_to_be64(drbd_req_get_sector(req));
 	p.block_id = (unsigned long)req;
 
 	/* About tl_add():
@@ -756,7 +764,7 @@
 		_drbd_send_barrier(mdev);
 	tl_add(mdev,req);
 	ok =  (drbd_send(mdev,mdev->data.socket,&p,sizeof(p),MSG_MORE) == sizeof(p))
-	   && _drbd_send_zc_bh(mdev,req->bh);
+	   && _drbd_send_zc_bio(mdev,&req->private_bio);
 
 	spin_lock(&mdev->send_task_lock);
 	mdev->send_task=NULL;
@@ -766,7 +774,6 @@
 	return ok;
 }
 
-// Used to send answer to read requests, DRBD_BH_SECTOR(bh) !!
 int drbd_send_block(drbd_dev *mdev, Drbd_Packet_Cmd cmd,
 		    struct Tl_epoch_entry *e)
 {
@@ -779,9 +786,9 @@
 	p.head.magic   = BE_DRBD_MAGIC;
 	p.head.command = cpu_to_be16(cmd);
 	p.head.length  = cpu_to_be16( sizeof(p)-sizeof(Drbd_Header)
-				     + e->pbh.b_size );
+				     + drbd_ee_get_size(e) );
 
-	p.sector   = cpu_to_be64(e->pbh.b_blocknr);
+	p.sector   = cpu_to_be64(drbd_ee_get_sector(e));
 	p.block_id = e->block_id;
 
 	/* only called by our kernel thread.
@@ -795,7 +802,7 @@
 	spin_unlock(&mdev->send_task_lock);
 
 	ok =  (drbd_send(mdev,mdev->data.socket,&p,sizeof(p),MSG_MORE) == sizeof(p))
-	   && _drbd_send_zc_bh(mdev,&e->pbh);
+	   && _drbd_send_zc_bio(mdev,&e->private_bio);
 
 	spin_lock(&mdev->send_task_lock);
 	mdev->send_task=NULL;
@@ -1115,14 +1122,28 @@
 void drbd_cleanup(void)
 {
 	int i, rr;
+	struct page *page;
 	if (drbd_conf) {
 		if (drbd_proc)
 			remove_proc_entry("drbd",&proc_root);
 		i=minor_count;
 		while (i--) {
 			drbd_dev        *mdev  = &drbd_conf[i];
+			ONLY_IN_26(
+			struct gendisk  **disk = &mdev->vdisk;
+			request_queue_t **q    = &mdev->rq_queue;
+			)
 
 			drbd_free_resources(mdev);
+
+			ONLY_IN_26(
+			if (*disk)
+				put_disk(*disk);
+			*disk = NULL;
+			if (*q) blk_put_queue(*q);
+			*q = NULL;
+			)
+
 			tl_cleanup(mdev);
 			if (mdev->mbds_id) bm_cleanup(mdev->mbds_id);
 			if (mdev->resync) lc_free(mdev->resync);
@@ -1144,17 +1165,21 @@
 			if(rr) printk(KERN_ERR DEVICE_NAME
 				       "%d: %d EEs in read list found!\n",i,rr);
 
-			if (mdev->md_io_bh.b_page)
-				__free_page(mdev->md_io_bh.b_page);
+			page = drbd_bio_get_page(&mdev->md_io_bio);
+			if (page)
+				__free_page(page);
 
 			if (mdev->act_log) lc_free(mdev->act_log);
 		}
 		drbd_destroy_mempools();
 	}
 
-	// kfree(NULL) is noop
+
+	NOT_IN_26(
 	blksize_size[MAJOR_NR] = NULL;
 	blk_size[MAJOR_NR]     = NULL;
+	)
+	// kfree(NULL) is noop
 	kfree(drbd_conf);
 	kfree(drbd_blocksizes);
 	kfree(drbd_sizes);
@@ -1164,7 +1189,7 @@
 
 }
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
 void *
 kcalloc(size_t size, int type)
 {
@@ -1191,10 +1216,22 @@
 
 	drbd_proc       = NULL; // play safe for drbd_cleanup
 	drbd_conf       = kcalloc(sizeof(drbd_dev)*minor_count,GFP_KERNEL);
+	if (!drbd_conf)
+		goto Enomem;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
 	drbd_sizes      = kcalloc(sizeof(int)*minor_count,GFP_KERNEL);
 	drbd_blocksizes = kmalloc(sizeof(int)*minor_count,GFP_KERNEL);
-	if (!drbd_conf || !drbd_blocksizes || !drbd_sizes)
+	if (!drbd_blocksizes || !drbd_sizes)
 		goto Enomem;
+#else
+	for (i = 0; i < minor_count; i++) {
+		drbd_conf[i].vdisk = alloc_disk(1);
+		if (!drbd_conf[i].vdisk) goto Enomem;
+	}
+	/* thanks to alloc_disk, we now have minor_count gendisks with
+	 * capacity == 0, waiting to be configured.  */
+#endif
 
 	if ((err = drbd_create_mempools()))
 		goto Enomem;
@@ -1202,13 +1239,25 @@
 	for (i = 0; i < minor_count; i++) {
 		drbd_dev    *mdev = &drbd_conf[i];
 		struct page *page = alloc_page(GFP_KERNEL);
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+		struct gendisk         **disk = &mdev->vdisk;
+		request_queue_t        **q    = &mdev->rq_queue;
 
+		*disk = alloc_disk(1);
+		if (!*disk) goto Enomem;
+
+		*q = blk_alloc_queue(GFP_KERNEL);
+		if (!*q) goto Enomem;
+
+		set_disk_ro( mdev->this_bdev, TRUE );
+#else
 		drbd_blocksizes[i] = INITIAL_BLOCK_SIZE;
 		set_device_ro( MKDEV(MAJOR_NR, i), TRUE );
+#endif
 
 		if(!page) goto Enomem;
-		drbd_init_bh(&mdev->md_io_bh,512);
-		set_bh_page(&mdev->md_io_bh,page,0);
+		drbd_init_bio(&mdev->md_io_bio,512);
+		drbd_bio_add_page(&mdev->md_io_bio,page,0);
 
 		mdev->mbds_id = bm_init(0);
 		if (!mdev->mbds_id) goto Enomem;
@@ -1239,8 +1288,10 @@
 #else
 # error "Currently drbd depends on the proc file system (CONFIG_PROC_FS)"
 #endif
+	NOT_IN_26(
 	blksize_size[MAJOR_NR] = drbd_blocksizes;
 	blk_size[MAJOR_NR] = drbd_sizes;
+	)
 
 #ifdef CONFIG_DEVFS_FS
 	devfs_handle = devfs_mk_dir (NULL, "nbd", NULL);
@@ -1360,6 +1411,8 @@
 		mdev->lo_device = 0;
 		mdev->md_file = 0;
 		mdev->md_device = 0;
+#warning FIXME
+		ONLY_IN_26(del_gendisk(&mdev->vdisk));
 	}
 }
 
@@ -1793,12 +1846,6 @@
 	u32 bm_offset;         // offset to the bitmap, from here
 };
 
-void drbd_generic_end_io(struct buffer_head *bh, int uptodate)
-{ // This is a rough copy of end_buffer_io_sync
-	mark_buffer_uptodate(bh, uptodate);
-	unlock_buffer(bh);
-}
-
 void drbd_md_write(drbd_dev *mdev)
 {
 	struct meta_data_on_disk * buffer;
@@ -1809,7 +1856,7 @@
 	if( mdev->lo_device == 0) return;
 
 	down(&mdev->md_io_mutex);
-	buffer = (struct meta_data_on_disk *)bh_kmap(&mdev->md_io_bh);
+	buffer = (struct meta_data_on_disk *)drbd_bio_kmap(&mdev->md_io_bio);
 
 	flags=mdev->gen_cnt[Flags] & ~(MDF_PrimaryInd|MDF_ConnectedInd);
 	if(mdev->state==Primary) flags |= MDF_PrimaryInd;
@@ -1818,7 +1865,7 @@
 
 	for(i=Flags;i<=ArbitraryCnt;i++)
 		buffer->gc[i]=cpu_to_be32(mdev->gen_cnt[i]);
-	buffer->la_size=cpu_to_be64(blk_size[MAJOR_NR][(int)(mdev-drbd_conf)]);
+	buffer->la_size=cpu_to_be64(drbd_get_my_capacity(mdev)>>1);
 	buffer->magic=cpu_to_be32(DRBD_MD_MAGIC);
 
 	buffer->md_size = __constant_cpu_to_be32(MD_RESERVED_SIZE);
@@ -1827,14 +1874,11 @@
 
 	buffer->bm_offset = __constant_cpu_to_be32(MD_BM_OFFSET);
 
-	bh_kunmap(&mdev->md_io_bh);
+	drbd_bio_kunmap(&mdev->md_io_bio);
 	sector = drbd_md_ss(mdev) + MD_GC_OFFSET;
-	drbd_set_bh(mdev, &mdev->md_io_bh, sector, 512);
-	set_bit(BH_Dirty, &mdev->md_io_bh.b_state);
-	set_bit(BH_Lock, &mdev->md_io_bh.b_state);
-	mdev->md_io_bh.b_end_io = drbd_generic_end_io;
-	generic_make_request(WRITE,&mdev->md_io_bh);
-	wait_on_buffer(&mdev->md_io_bh);
+
+	drbd_md_prepare_write(mdev,sector);
+	drbd_generic_make_request_wait(WRITE,&mdev->md_io_bio);
 
 	up(&mdev->md_io_mutex);
 }
@@ -1850,15 +1894,12 @@
 	down(&mdev->md_io_mutex);
 
 	sector = drbd_md_ss(mdev) + MD_GC_OFFSET;
-	drbd_set_bh(mdev, &mdev->md_io_bh, sector, 512);
-	clear_bit(BH_Uptodate, &mdev->md_io_bh.b_state);
-	set_bit(BH_Lock, &mdev->md_io_bh.b_state);
-	mdev->md_io_bh.b_end_io = drbd_generic_end_io;
-	generic_make_request(READ,&mdev->md_io_bh);
-	wait_on_buffer(&mdev->md_io_bh);
-	ERR_IF( ! buffer_uptodate(&mdev->md_io_bh) ) goto err;
 
-	buffer = (struct meta_data_on_disk *)bh_kmap(&mdev->md_io_bh);
+	drbd_md_prepare_read(mdev,sector);
+	drbd_generic_make_request_wait(READ,&mdev->md_io_bio);
+	ERR_IF( ! buffer_uptodate(&mdev->md_io_bio) ) goto err;
+
+	buffer = (struct meta_data_on_disk *)drbd_bio_kmap(&mdev->md_io_bio);
 
 	if(be32_to_cpu(buffer->magic) != DRBD_MD_MAGIC) goto err;
 
@@ -1867,12 +1908,12 @@
 	mdev->la_size = be64_to_cpu(buffer->la_size);
 	mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents);
 
-	bh_kunmap(&mdev->md_io_bh);
+	drbd_bio_kunmap(&mdev->md_io_bio);
 	up(&mdev->md_io_mutex);
 	return;
 
  err:
-	bh_kunmap(&mdev->md_io_bh);
+	drbd_bio_kunmap(&mdev->md_io_bio);
 	up(&mdev->md_io_mutex);
 
 	INFO("Creating state block\n");
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_receiver.c,v
retrieving revision 1.97.2.89
retrieving revision 1.97.2.90
diff -u -3 -r1.97.2.89 -r1.97.2.90
--- drbd_receiver.c	21 Jan 2004 14:59:05 -0000	1.97.2.89
+++ drbd_receiver.c	21 Jan 2004 15:40:09 -0000	1.97.2.90
@@ -137,53 +137,6 @@
 }
 #endif //PARANOIA
 
-STATIC void drbd_dio_end_sec(struct buffer_head *bh, int uptodate)
-{
-	/* This callback will be called in irq context by the IDE drivers,
-	   and in Softirqs/Tasklets/BH context by the SCSI drivers.
-	   Try to get the locking right :) */
-  //	int wake_asender=0;
-	unsigned long flags=0;
-	struct Tl_epoch_entry *e=NULL;
-	struct Drbd_Conf* mdev;
-
-	mdev=bh->b_private;
-	PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
-
-	e = container_of(bh,struct Tl_epoch_entry,pbh);
-	PARANOIA_BUG_ON(!VALID_POINTER(e));
-	D_ASSERT(e->block_id != ID_VACANT);
-
-	spin_lock_irqsave(&mdev->ee_lock,flags);
-
-	mark_buffer_uptodate(bh, uptodate);
-
-	clear_bit(BH_Dirty, &bh->b_state);
-	clear_bit(BH_Lock, &bh->b_state);
-	smp_mb__after_clear_bit();
-
-	list_del(&e->w.list);
-	list_add(&e->w.list,&mdev->done_ee);
-
-	if (waitqueue_active(&mdev->ee_wait) &&
-	    (list_empty(&mdev->active_ee) ||
-	     list_empty(&mdev->sync_ee)))
-		wake_up(&mdev->ee_wait);
-
-	//	if(mdev->conf.wire_protocol == DRBD_PROT_C ||
-	//	   e->block_id == ID_SYNCER ) wake_asender=1;
-
-	spin_unlock_irqrestore(&mdev->ee_lock,flags);
-
-	if( mdev->do_panic && !uptodate) {
-		drbd_panic(DEVICE_NAME": The lower-level device had an error.\n");
-	}
-
-	//	if(wake_asender) {
-	wake_asender(mdev);
-	//	}
-}
-
 /*
 You need to hold the ee_lock:
  drbd_free_ee()
@@ -209,17 +162,16 @@
 	e = kmem_cache_alloc(drbd_ee_cache, mask);
 	if( e == NULL ) return FALSE;
 
-	drbd_init_bh(&e->pbh, BM_BLOCK_SIZE); // BM_BLOCK_SIZE == PAGE_SIZE !
-	set_bh_page(&e->pbh,page,0);          // sets b_data and b_page
+	// BM_BLOCK_SIZE == PAGE_SIZE ! FIXME not necessarily on all arch!!
+	drbd_init_bio(&e->private_bio, BM_BLOCK_SIZE);
+	drbd_bio_add_page(&e->private_bio,page,0);     // sets b_data and b_page
 
 	e->block_id = ID_VACANT;
 	spin_lock_irq(&mdev->ee_lock);
 	list_add(&e->w.list,&mdev->free_ee);
 	mdev->ee_vacant++;
 	spin_unlock_irq(&mdev->ee_lock);
-	
-	e->pbh.b_this_page=&e->pbh;
-	
+
 	return TRUE;
 }
 
@@ -235,7 +187,7 @@
 		__free_page(page);
 		return FALSE;
 	}
-	  
+
 	return TRUE;
 }
 
@@ -252,12 +204,12 @@
 	e = list_entry(le, struct Tl_epoch_entry, w.list);
 	list_del(le);
 
-	page = e->pbh.b_page;
+	page = drbd_bio_get_page(&e->private_bio);
 	kmem_cache_free(drbd_ee_cache, e);
 	mdev->ee_vacant--;
-	
+
 	return page;
-}	
+}
 
 void drbd_init_ee(drbd_dev *mdev)
 {
@@ -813,39 +765,28 @@
 }
 
 STATIC struct Tl_epoch_entry *
-read_in_block(drbd_dev *mdev,int data_size)
+read_in_block(drbd_dev *mdev, int data_size)
 {
 	struct Tl_epoch_entry *e;
-	struct buffer_head *bh;
+	drbd_bio_t *bio;
 	int rr;
 
 	spin_lock_irq(&mdev->ee_lock);
 	e=drbd_get_ee(mdev);
 	spin_unlock_irq(&mdev->ee_lock);
-	bh=&e->pbh;
 
-	rr=drbd_recv(mdev,mdev->data.socket,bh_kmap(bh),data_size);
-	bh_kunmap(bh);
+	bio = &e->private_bio;
+
+	rr=drbd_recv(mdev,mdev->data.socket, drbd_bio_kmap(bio), data_size);
+	drbd_bio_kunmap(bio);
 
 	if ( rr != data_size) {
-		clear_bit(BH_Lock, &bh->b_state);
+		clear_bit(BH_Lock, &bio->b_state);
 		spin_lock_irq(&mdev->ee_lock);
 		drbd_put_ee(mdev,e);
 		spin_unlock_irq(&mdev->ee_lock);
 		return 0;
 	}
-
-	/* do not use mark_buffer_dirty() since it would call refile_buffer()*/
-	set_bit(BH_Dirty, &bh->b_state);
-	set_bit(BH_Lock, &bh->b_state); // since using generic_make_request()
-
-	/* MAYBE: set_bit(BH_Sync, &bh->b_state);
-	 * at least for A&B
-	 * see drivers/block/ll_rw_blk.c, __make_request:
-	 * would unplug the request queue for every single request, so
-	 * we won't need to run_task_queue(&tq_disk) ...
-	 */
-	bh->b_end_io = drbd_dio_end_sec;
 	mdev->recv_cnt+=data_size>>9;
 
 	return e;
@@ -870,20 +811,20 @@
 int recv_dless_read(drbd_dev *mdev, struct Pending_read *pr,
 		    sector_t sector, int data_size)
 {
-	struct buffer_head *bh;
+	drbd_bio_t *bio;
 	int ok,rr;
 
 	// DBG("%s\n", __func__);
 
-	bh = pr->d.bh;
+	bio = pr->d.master_bio;
 
-	D_ASSERT( sector == APP_BH_SECTOR(bh) );
+	D_ASSERT( sector == APP_BH_SECTOR(bio) );
 
-	rr=drbd_recv(mdev,mdev->data.socket,bh_kmap(bh),data_size);
-	bh_kunmap(bh);
+	rr=drbd_recv(mdev,mdev->data.socket,drbd_bio_kmap(bio),data_size);
+	drbd_bio_kunmap(bio);
 
 	ok=(rr==data_size);
-	bh->b_end_io(bh,ok);
+	drbd_bio_endio(bio,ok);
 
 	dec_ap_pending(mdev,HERE);
 	return ok;
@@ -892,7 +833,9 @@
 STATIC int e_end_resync_block(drbd_dev *mdev, struct drbd_work *w)
 {
 	struct Tl_epoch_entry *e = (struct Tl_epoch_entry*)w;
-	drbd_set_in_sync(mdev,e->pbh.b_blocknr,e->pbh.b_size);
+	drbd_set_in_sync(mdev,
+			 drbd_ee_get_sector(e),
+			 drbd_ee_get_size(e));
 	drbd_send_ack(mdev,WriteAck,e);
 	dec_unacked(mdev,HERE); // FIXME unconditional ??
 	return TRUE;
@@ -909,7 +852,8 @@
 
 	e = read_in_block(mdev,data_size);
 	ERR_IF(!e) return FALSE;
-	drbd_set_bh(mdev, &e->pbh, sector ,data_size);
+
+	drbd_ee_prepare_write(mdev,e,sector,data_size);
 	e->block_id = ID_SYNCER;
 	e->w.cb     = e_end_resync_block;
 
@@ -920,7 +864,7 @@
 	dec_rs_pending(mdev,HERE);
 	inc_unacked(mdev);
 
-	generic_make_request(WRITE,&e->pbh);
+	drbd_generic_make_request(WRITE,&e->private_bio);
 
 	receive_data_tail(mdev,data_size);
 	return TRUE;
@@ -932,35 +876,35 @@
  * serialize app and resync requests.
  * yes, I think even app READS should be serialized, or made independent of,
  * resync requests
+ * at least they only make sense for aligned (size == BM_BLOCK_SIZE)
  */
-
 int recv_both_read(drbd_dev *mdev, struct Pending_read *pr,
 		   sector_t sector, int data_size)
 {
 	struct Tl_epoch_entry *e;
-	struct buffer_head *bh;
+	drbd_bio_t *bio;
 
 	ERR("should not happen anymore%s\n", __func__);
 
-	bh = pr->d.bh;
+	bio = pr->d.master_bio;
 
-	D_ASSERT( sector == bh->b_blocknr * (bh->b_size >> 9) );
+	D_ASSERT( sector == APP_BH_SECTOR(bio) );
 
 	e = read_in_block(mdev,data_size);
 
-	if(!e) {
-		bh->b_end_io(bh,0);
+	if (!e) {
+		drbd_bio_IO_error(bio);
 		return FALSE;
 	}
 
 	// XXX can't we share it somehow?
-	memcpy(bh_kmap(bh),bh_kmap(&e->pbh),data_size);
-	bh_kunmap(bh);
-	bh_kunmap(&e->pbh);
+	memcpy(drbd_bio_kmap(bio),drbd_bio_kmap(&e->private_bio),data_size);
+	drbd_bio_kunmap(bio);
+	drbd_bio_kunmap(&e->private_bio);
 
-	bh->b_end_io(bh,1);
+	drbd_bio_endio(bio,1); // propagate success for application read
 
-	drbd_set_bh(mdev, &e->pbh, sector, data_size);
+	drbd_ee_prepare_write(mdev, e, sector, data_size);
 	e->block_id = ID_SYNCER;
 	e->w.cb     = e_end_resync_block;
 
@@ -972,7 +916,7 @@
 	dec_ap_pending(mdev,HERE);
 	inc_unacked(mdev);
 
-	generic_make_request(WRITE,&e->pbh);
+	drbd_generic_make_request(WRITE,&e->private_bio);
 
 	receive_data_tail(mdev,data_size);
 	return TRUE;
@@ -989,7 +933,8 @@
 	e = read_in_block(mdev,data_size);
 	ERR_IF(!e) return FALSE;
 
-	drbd_set_bh(mdev, &e->pbh, sector ,data_size);
+	// needed to correctly set the ee members for drbd_send_ack
+	drbd_ee_prepare_write(mdev, e, sector ,data_size);
 	drbd_send_ack(mdev,WriteAck,e);
 
 	spin_lock_irq(&mdev->ee_lock);
@@ -1061,8 +1006,9 @@
 	mdev->epoch_size++;
 	if(mdev->conf.wire_protocol == DRBD_PROT_C) {
 		if( mdev->cstate > Connected ) {
-			drbd_set_in_sync(mdev,e->pbh.b_blocknr,
-					 e->pbh.b_size);
+			drbd_set_in_sync(mdev
+			,	drbd_ee_get_sector(e)
+			,	drbd_ee_get_size(e));
 		}
 		ok=drbd_send_ack(mdev,WriteAck,e);
 		dec_unacked(mdev,HERE); // FIXME unconditional ??
@@ -1100,7 +1046,7 @@
 	e = read_in_block(mdev,data_size);
 	ERR_IF(!e) return FALSE;
 
-	drbd_set_bh(mdev, &e->pbh, sector, data_size);
+	drbd_ee_prepare_write(mdev, e, sector, data_size);
 	e->block_id = p->block_id; // no meaning on this side, e* on partner
 	e->w.cb     = e_end_block;
 
@@ -1120,7 +1066,7 @@
 		break;
 	}
 
-	generic_make_request(WRITE,&e->pbh);
+	drbd_generic_make_request(WRITE,&e->private_bio);
 
 	receive_data_tail(mdev,data_size);
 	return TRUE;
@@ -1143,7 +1089,8 @@
 
 	spin_lock_irq(&mdev->ee_lock);
 	e=drbd_get_ee(mdev);
-	drbd_set_bh(mdev, &e->pbh, sector, data_size);
+	// can we move it outside the lock?
+	drbd_ee_prepare_read(mdev,e,sector,data_size);
 	e->block_id = p->block_id; // no meaning on this side, pr* on partner
 	list_add(&e->w.list,&mdev->read_ee);
 	spin_unlock_irq(&mdev->ee_lock);
@@ -1165,13 +1112,11 @@
 		D_ASSERT(0);
 	}
 
-	clear_bit(BH_Uptodate, &e->pbh.b_state);
-	set_bit(BH_Lock, &e->pbh.b_state);
-	e->pbh.b_end_io = enslaved_read_bh_end_io;
-
-	mdev->read_cnt += e->pbh.b_size >> 9;
+	// FIXME do statistics here, or better within the end_io handler ?
+	// what about concurrent access to *_cnt ?
+	mdev->read_cnt += data_size >> 9;
 	inc_unacked(mdev);
-	generic_make_request(READ,&e->pbh);
+	drbd_generic_make_request(READ,&e->private_bio);
 
 	return TRUE;
 }
@@ -1206,7 +1151,6 @@
 STATIC int receive_param(drbd_dev *mdev, Drbd_Header *h)
 {
 	Drbd_Parameter_Packet *p = (Drbd_Parameter_Packet*)h;
-	int minor=(int)(mdev-drbd_conf);
 	int no_sync=0;
 	int oo_state;
 	unsigned long p_size;
@@ -1271,7 +1215,7 @@
 
 	no_sync=drbd_determin_dev_size(mdev);
 
-	if( blk_size[MAJOR_NR][minor] == 0) {
+	if( drbd_get_my_capacity(mdev) == 0) {
 		set_cstate(mdev,StandAlone);
 		mdev->receiver.t_state = Exiting;
 		return FALSE;
@@ -1393,7 +1337,7 @@
 {
 	struct Pending_read *pr;
 	struct list_head workset,*le;
-	struct buffer_head *bh;
+	drbd_bio_t *bio;
 
 	spin_lock(&mdev->pr_lock);
 	list_add(&workset,&mdev->app_reads);
@@ -1404,10 +1348,11 @@
 	while(!list_empty(&workset)) {
 		le = workset.next;
 		pr = list_entry(le, struct Pending_read, w.list);
-		bh = pr->d.bh;
 		list_del(le);
 
-		bh->b_end_io(bh,0);
+		bio = pr->d.master_bio;
+		drbd_bio_IO_error(bio);
+
 		switch(pr->cause) {
 		case Application:
 			dec_ap_pending(mdev,HERE);
@@ -1463,7 +1408,7 @@
 	ERR_IF(!mdev->mbds_id)
 		return FALSE;
 	bm_fill_bm(mdev->mbds_id,-1);
-	mdev->rs_total = blk_size[MAJOR_NR][(int)(mdev-drbd_conf)]<<1;
+	mdev->rs_total = drbd_get_my_capacity(mdev);
 	drbd_write_bm(mdev);
 	drbd_start_resync(mdev,SyncTarget);
 	return TRUE; // cannot fail ?
@@ -1472,7 +1417,7 @@
 STATIC int receive_BecomeSyncSource(drbd_dev *mdev, Drbd_Header *h)
 {
 	bm_fill_bm(mdev->mbds_id,-1);
-	mdev->rs_total = blk_size[MAJOR_NR][(int)(mdev-drbd_conf)]<<1;
+	mdev->rs_total = drbd_get_my_capacity(mdev);
 	drbd_write_bm(mdev);
 	drbd_start_resync(mdev,SyncSource);
 	return TRUE; // cannot fail ?
@@ -1679,7 +1624,7 @@
 	Drbd_BlockAck_Packet *p = (Drbd_BlockAck_Packet*)h;
 	sector_t sector = be64_to_cpu(p->sector);
 	int blksize = be32_to_cpu(p->blksize);
-	
+
 	if( is_syncer_blk(mdev,p->block_id)) {
 		drbd_set_in_sync(mdev,sector,blksize);
 	} else {
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_req-2.4.c,v
retrieving revision 1.33.2.39
retrieving revision 1.33.2.40
diff -u -3 -r1.33.2.39 -r1.33.2.40
--- drbd_req-2.4.c	21 Jan 2004 14:59:05 -0000	1.33.2.39
+++ drbd_req-2.4.c	21 Jan 2004 15:40:09 -0000	1.33.2.40
@@ -43,10 +43,11 @@
 	   This function is called by the receiver in kernel-thread context.
 	   Try to get the locking right :) */
 
-	struct Drbd_Conf* mdev = drbd_conf + MINOR(req->bh->b_rdev);
+	struct Drbd_Conf* mdev = drbd_req_get_mdev(req);
 	unsigned long flags=0;
 
-	PARANOIA_BUG_ON(req->pbh.b_blocknr != rsector);
+	PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
+	PARANOIA_BUG_ON(drbd_req_get_sector(req) != rsector);
 	spin_lock_irqsave(&mdev->req_lock,flags);
 
 	if(req->rq_status & nextstate) {
@@ -78,10 +79,10 @@
 	}
 
 	if(mdev->conf.wire_protocol==DRBD_PROT_C && mdev->cstate > Connected) {
-		drbd_set_in_sync(mdev,rsector,req->bh->b_size);
+		drbd_set_in_sync(mdev,rsector,drbd_req_get_size(req));
 	}
 
-	req->bh->b_end_io(req->bh,(req->rq_status & 0x0001));
+	drbd_bio_endio(req->master_bio,(req->rq_status & 0x0001));
 
 	if( mdev->do_panic && !(req->rq_status & 0x0001) ) {
 		drbd_panic(DEVICE_NAME": The lower-level device had an error.\n");
@@ -94,25 +95,6 @@
 		wake_asender(mdev);
 }
 
-/*
- * b_end_io for writes on Primary comming from drbd_make_request
- */
-void drbd_dio_end(struct buffer_head *bh, int uptodate)
-{
-	struct Drbd_Conf* mdev;
-	drbd_request_t *req;
-
-	// ok, now we have the b_private available for other use
-	req = container_of(bh,struct drbd_request,pbh);
-	PARANOIA_BUG_ON(!VALID_POINTER(req));
-	mdev = drbd_conf+MINOR(req->bh->b_rdev);
-	PARANOIA_BUG_ON(!IS_VALID_MDEV(mdev));
-
-	// NOT bh->b_rsector, may have been remapped!
-	drbd_end_req(req, RQ_DRBD_WRITTEN, uptodate, req->bh->b_rsector);
-	drbd_al_complete_io(mdev,req->bh->b_rsector);
-}
-
 STATIC struct Pending_read*
 drbd_find_read(sector_t sector, struct list_head *in)
 {
@@ -127,19 +109,20 @@
 	return NULL;
 }
 
-STATIC void drbd_issue_drequest(struct Drbd_Conf* mdev,struct buffer_head *bh)
+#warning "FIXME make 2.6.x clean"
+STATIC void drbd_issue_drequest(struct Drbd_Conf* mdev,drbd_bio_t *bio)
 {
 	struct Pending_read *pr;
 	pr = mempool_alloc(drbd_pr_mempool, GFP_DRBD);
 
 	if (!pr) {
 		ERR("could not kmalloc() pr\n");
-		bh->b_end_io(bh,0);
+		drbd_bio_IO_error(bio);
 		return;
 	}
 	SET_MAGIC(pr);
 
-	pr->d.bh = bh;
+	pr->d.master_bio = bio;
 	// TODO: should only issue AppAndResnc if it is out of sync!
 	pr->cause = mdev->cstate == SyncTarget ? AppAndResync : Application;
 	spin_lock(&mdev->pr_lock);
@@ -149,18 +132,23 @@
 	if(pr->cause == AppAndResync) inc_rs_pending(mdev);
 	drbd_send_drequest(mdev, 
 			   pr->cause==AppAndResync ? RSDataRequest:DataRequest,
-			   bh->b_rsector, bh->b_size,
+			   bio->b_rsector, bio->b_size,
 			   (unsigned long)pr);
 }
 
-
+// in 2.6 this is of the form
+// static int __make_request(request_queue_t *q, struct bio *bio)
 int drbd_make_request(request_queue_t *q, int rw, struct buffer_head *bh)
 {
 	struct Drbd_Conf* mdev = drbd_conf + MINOR(bh->b_rdev);
-	struct buffer_head *nbh;
 	drbd_request_t *req;
 	int send_ok;
 
+	if (MINOR(bh->b_rdev) >= minor_count || mdev->cstate < StandAlone) {
+		buffer_IO_error(bh);
+		return 0;
+	}
+
 	if( mdev->lo_device == 0 ) {
 		if( mdev->cstate < Connected ) {
 			bh->b_end_io(bh,0);
@@ -183,11 +171,18 @@
 			SET_MAGIC(req);
 
 			req->rq_status = RQ_DRBD_WRITTEN | 1;
-			req->bh=bh;
+			req->master_bio=bh;
 
 			if(mdev->conf.wire_protocol != DRBD_PROT_A) {
 				inc_ap_pending(mdev);
 			}
+			/* FIXME the drbd_make_request function will be
+			 * restructured soon.
+			 * until that is the case,
+			 * at least put the mdev and sector number into the
+			 * private bh!
+			 */
+			drbd_req_prepare_write(mdev,req);
 			drbd_send_dblock(mdev,req); // FIXME error check?
 		} else { // rw == READ || rw == READA
 			drbd_issue_drequest(mdev,bh);
@@ -221,7 +216,7 @@
 
 				pr->cause |= Application;
 				inc_ap_pending(mdev);
-				pr->d.bh=bh;
+				pr->d.master_bio=bh;
 				list_del(&pr->w.list);
 				list_add(&pr->w.list,&mdev->app_reads);
 				spin_unlock(&mdev->pr_lock);
@@ -263,34 +258,17 @@
 	}
 	SET_MAGIC(req);
 
-	nbh = &req->pbh;
-
-	drbd_init_bh(nbh, bh->b_size);
-
-	nbh->b_page=bh->b_page; // instead of set_bh_page()
-	nbh->b_data=bh->b_data; // instead of set_bh_page()
-
-	drbd_set_bh(mdev, nbh, bh->b_rsector, bh->b_size);
-
-	if(mdev->cstate < StandAlone || MINOR(bh->b_rdev) >= minor_count) {
-		buffer_IO_error(bh);
-		return 0;
-	}
-
-	nbh->b_private = req;
-	nbh->b_state = (1 << BH_Dirty) | ( 1 << BH_Mapped) | (1 << BH_Lock);
-
-	req->bh=bh;
-
-	req->rq_status = RQ_DRBD_NOTHING;
+	req->master_bio = bh;
+	drbd_req_prepare_write(mdev,req);
 
 	send_ok=drbd_send_dblock(mdev,req);
+
 	// FIXME we could remove the send_ok cases, the are redundant to tl_clear()
 	if(send_ok && mdev->conf.wire_protocol!=DRBD_PROT_A) inc_ap_pending(mdev);
 	if(mdev->conf.wire_protocol==DRBD_PROT_A || (!send_ok) ) {
 				/* If sending failed, we can not expect
 				   an ack packet. */
-		drbd_end_req(req, RQ_DRBD_SENT, 1, bh->b_rsector);
+		drbd_end_req(req, RQ_DRBD_SENT, 1, drbd_req_get_sector(req));
 	}
 	if(!send_ok) drbd_set_out_of_sync(mdev,bh->b_rsector,bh->b_size);
 
@@ -298,10 +276,9 @@
 		queue_task(&mdev->write_hint_tq, &tq_disk);
 	}
 
-	drbd_al_begin_io(mdev, nbh->b_rsector);
+	drbd_al_begin_io(mdev, drbd_req_get_sector(req));
 
-	nbh->b_end_io = drbd_dio_end;
-	generic_make_request(rw,nbh);
+	drbd_generic_make_request(rw,&req->private_bio);
 
 	return 0; /* Ok, bh arranged for transfer */