[DRBD-cvs] svn commit by phil - r2004 - in trunk: . drbd drbd/linux
- * Made changing of disk config and net config sane
drbd-cvs at lists.linbit.com
drbd-cvs at lists.linbit.com
Tue Nov 15 16:38:42 CET 2005
* ad
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Author: phil
Date: 2005-11-15 16:38:40 +0100 (Tue, 15 Nov 2005)
New Revision: 2004
Modified:
trunk/ROADMAP
trunk/drbd/drbd_actlog.c
trunk/drbd/drbd_bitmap.c
trunk/drbd/drbd_compat_wrappers.h
trunk/drbd/drbd_fs.c
trunk/drbd/drbd_int.h
trunk/drbd/drbd_main.c
trunk/drbd/drbd_receiver.c
trunk/drbd/drbd_req.c
trunk/drbd/drbd_strings.c
trunk/drbd/drbd_worker.c
trunk/drbd/linux/drbd.h
Log:
* Made changing of disk config and net config sane
* added inc_net() and dec_net()
The module basically works, but this massive change for sure
introduced some bugs
Modified: trunk/ROADMAP
===================================================================
--- trunk/ROADMAP 2005-11-15 15:29:47 UTC (rev 2003)
+++ trunk/ROADMAP 2005-11-15 15:38:40 UTC (rev 2004)
@@ -358,13 +358,9 @@
Remove config options syncer { group <number>; }
Introduce config options syncer { after <resource>; }
- 50% DONE
+ 99% DONE
Finished the implementation. Tested.
- TOOD: Rethink the whole thing and consider:
- * 3 node setups.
- * Coordinate with the resync of raid controllers.
-
11 Take into account that the two systems could have different
PAGE_SIZE.
@@ -646,8 +642,17 @@
than two nodes!
99% DONE
-
+24 Make it possible to hot-add disk drives == Atomic configuration changes.
+ 80% DONE [ The net and disk config changes are atomic by now ]
+
+25 Add reserved fields to DRBD-meta-data, add a bytes per bit field to
+ metadata.
+
+26 Implement a kind of "dstate" command to make integration with
+ Heartbeat-2.0's master/slave-support possible.
+
+
plus-banches:
----------------------
Modified: trunk/drbd/drbd_actlog.c
===================================================================
--- trunk/drbd/drbd_actlog.c 2005-11-15 15:29:47 UTC (rev 2003)
+++ trunk/drbd/drbd_actlog.c 2005-11-15 15:38:40 UTC (rev 2004)
@@ -34,14 +34,15 @@
* ;)
* this is mostly from drivers/md/md.c
*/
-STATIC int _drbd_md_sync_page_io(drbd_dev *mdev, struct page *page,
- sector_t sector, int rw, int size)
+STATIC int _drbd_md_sync_page_io(struct drbd_backing_dev *bdev,
+ struct page *page, sector_t sector,
+ int rw, int size)
{
struct bio *bio = bio_alloc(GFP_KERNEL, 1);
struct completion event;
int ok;
- bio->bi_bdev = mdev->md_bdev;
+ bio->bi_bdev = bdev->md_bdev;
bio->bi_sector = sector;
bio_add_page(bio, page, size, 0);
init_completion(&event);
@@ -52,7 +53,7 @@
submit_bio(rw | (1 << BIO_RW_SYNC), bio);
#else
submit_bio(rw, bio);
- drbd_blk_run_queue(bdev_get_queue(mdev->md_bdev));
+ drbd_blk_run_queue(bdev_get_queue(bdev->md_bdev));
#endif
wait_for_completion(&event);
@@ -61,23 +62,24 @@
return ok;
}
-int drbd_md_sync_page_io(drbd_dev *mdev, sector_t sector, int rw)
+int drbd_md_sync_page_io(drbd_dev *mdev, struct drbd_backing_dev *bdev,
+ sector_t sector, int rw)
{
int hardsect,mask,ok,offset=0;
struct page *iop = mdev->md_io_page;
D_ASSERT(semaphore_is_locked(&mdev->md_io_mutex));
- if (!mdev->md_bdev) {
+ if (!bdev->md_bdev) {
if (DRBD_ratelimit(5*HZ,5)) {
- ERR("mdev->md_bdev==NULL\n");
+ ERR("bdev->md_bdev==NULL\n");
dump_stack();
}
return 0;
}
- hardsect = drbd_get_hardsect(mdev->md_bdev);
+ hardsect = drbd_get_hardsect(bdev->md_bdev);
// in case hardsect != 512 [ s390 only? ]
if( hardsect != MD_HARDSECT ) {
@@ -103,7 +105,7 @@
void *p = page_address(mdev->md_io_page);
void *hp = page_address(mdev->md_io_tmpp);
- ok = _drbd_md_sync_page_io(mdev,iop,
+ ok = _drbd_md_sync_page_io(bdev,iop,
sector,READ,hardsect);
if (unlikely(!ok)) return 0;
@@ -118,13 +120,13 @@
sector, rw ? "WRITE" : "READ");
#endif
- if (sector < drbd_md_first_sector(mdev) || sector > drbd_md_last_sector(mdev)) {
+ if (sector < drbd_md_first_sector(bdev) || sector > drbd_md_last_sector(bdev)) {
ALERT("%s [%d]:%s(,%llu,%s) out of range md access!\n",
current->comm, current->pid, __func__,
(unsigned long long)sector, rw ? "WRITE" : "READ");
}
- ok = _drbd_md_sync_page_io(mdev,iop,sector,rw,hardsect);
+ ok = _drbd_md_sync_page_io(bdev,iop,sector,rw,hardsect);
if (unlikely(!ok)) {
ERR("drbd_md_sync_page_io(,%llu,%s) failed!\n",
(unsigned long long)sector,rw ? "WRITE" : "READ");
@@ -295,9 +297,9 @@
buffer->xor_sum = cpu_to_be32(xor_sum);
#warning check outcome of addition u64/sector_t/s32
- sector = mdev->md.md_offset + mdev->md.al_offset + mdev->al_tr_pos;
+ sector = mdev->bc->md.md_offset + mdev->bc->md.al_offset + mdev->al_tr_pos;
- if(!drbd_md_sync_page_io(mdev,sector,WRITE)) {
+ if(!drbd_md_sync_page_io(mdev,mdev->bc,sector,WRITE)) {
drbd_chk_io_error(mdev, 1);
drbd_io_error(mdev);
}
@@ -319,9 +321,9 @@
int rv,i;
u32 xor_sum=0;
- sector = mdev->md.md_offset + mdev->md.al_offset + index;
+ sector = mdev->bc->md.md_offset + mdev->bc->md.al_offset + index;
- if(!drbd_md_sync_page_io(mdev,sector,READ)) {
+ if(!drbd_md_sync_page_io(mdev,mdev->bc,sector,READ)) {
drbd_chk_io_error(mdev, 1);
drbd_io_error(mdev);
return 0;
@@ -442,9 +444,9 @@
wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
- i=inc_local_md_only(mdev);
+ i=inc_md_only(mdev,Attaching);
D_ASSERT( i ); // Assertions should not have side effects.
- // I do not want to have D_ASSERT( inc_local_md_only(mdev) );
+ // I do not want to have D_ASSERT( inc_md_only(mdev,Attaching) );
for(i=0;i<mdev->act_log->nr_elements;i++) {
enr = lc_entry(mdev->act_log,i)->lc_number;
@@ -525,7 +527,7 @@
{
struct update_odbm_work *udw = (struct update_odbm_work*)w;
- if( !inc_local_md_only(mdev) ) {
+ if( !inc_md_only(mdev,Attaching) ) {
if (DRBD_ratelimit(5*HZ,5))
WARN("Can not update on disk bitmap, local IO disabled.\n");
return 1;
Modified: trunk/drbd/drbd_bitmap.c
===================================================================
--- trunk/drbd/drbd_bitmap.c 2005-11-15 15:29:47 UTC (rev 2003)
+++ trunk/drbd/drbd_bitmap.c 2005-11-15 15:38:40 UTC (rev 2004)
@@ -357,7 +357,7 @@
*/
words = ALIGN(bits,64) >> LN2_BPL;
- D_ASSERT((u64)bits <= (((u64)mdev->md.md_size_sect-MD_BM_OFFSET) << 12));
+ D_ASSERT((u64)bits <= (((u64)mdev->bc->md.md_size_sect-MD_BM_OFFSET) << 12));
if ( words == b->bm_words ) {
/* optimize: capacity has changed,
@@ -596,13 +596,13 @@
int drbd_bm_read_sect(drbd_dev *mdev,unsigned long enr)
{
#warning check outcome of addition of sector_t/u64/s32
- sector_t on_disk_sector = mdev->md.md_offset + mdev->md.bm_offset + enr;
+ sector_t on_disk_sector = mdev->bc->md.md_offset + mdev->bc->md.bm_offset + enr;
int bm_words, num_words, offset, err = 0;
// MUST_BE_LOCKED(); not neccessarily global ...
down(&mdev->md_io_mutex);
- if(drbd_md_sync_page_io(mdev,on_disk_sector,READ)) {
+ if(drbd_md_sync_page_io(mdev,mdev->bc,on_disk_sector,READ)) {
bm_words = drbd_bm_words(mdev);
offset = S2W(enr); // word offset into bitmap
num_words = min(S2W(1), bm_words - offset);
@@ -660,7 +660,7 @@
*/
int drbd_bm_write_sect(struct Drbd_Conf *mdev,unsigned long enr)
{
- sector_t on_disk_sector = enr + mdev->md.md_offset + mdev->md.bm_offset;
+ sector_t on_disk_sector = enr + mdev->bc->md.md_offset + mdev->bc->md.bm_offset;
int bm_words, num_words, offset, err = 0;
// MUST_BE_LOCKED(); not neccessarily global...
@@ -678,7 +678,7 @@
}
drbd_bm_get_lel( mdev, offset, num_words,
page_address(mdev->md_io_page) );
- if (!drbd_md_sync_page_io(mdev,on_disk_sector,WRITE)) {
+ if (!drbd_md_sync_page_io(mdev,mdev->bc,on_disk_sector,WRITE)) {
int i;
err = -EIO;
ERR( "IO ERROR writing bitmap sector %lu "
Modified: trunk/drbd/drbd_compat_wrappers.h
===================================================================
--- trunk/drbd/drbd_compat_wrappers.h 2005-11-15 15:29:47 UTC (rev 2003)
+++ trunk/drbd/drbd_compat_wrappers.h 2005-11-15 15:38:40 UTC (rev 2004)
@@ -29,31 +29,17 @@
/* Returns the number of 512 byte sectors of the device */
static inline sector_t drbd_get_capacity(struct block_device *bdev)
{
- loff_t capacity = bdev ? bdev->bd_inode->i_size >> 9 : 0;
- if ((sector_t)capacity != capacity)
- printk(KERN_ERR "drbd: overflow in drbd_get_capacity\n");
- return (sector_t)capacity;
+ return bdev ? bdev->bd_inode->i_size >> 9 : 0;
}
/* sets the number of 512 byte sectors of our virtual device */
-static inline void drbd_set_my_capacity(drbd_dev *mdev, sector_t size)
+static inline void drbd_set_my_capacity(drbd_dev *mdev,
+ sector_t size)
{
set_capacity(mdev->vdisk,size);
mdev->this_bdev->bd_inode->i_size = (loff_t)size << 9;
}
-//#warning "FIXME why don't we care for the return value?"
-static inline void drbd_set_blocksize(drbd_dev *mdev, int blksize)
-{
- set_blocksize(mdev->this_bdev,blksize);
- if (mdev->backing_bdev) {
- set_blocksize(mdev->backing_bdev, blksize);
- } else {
- D_ASSERT(mdev->backing_bdev);
- // FIXME send some package over to the peer?
- }
-}
-
static inline int drbd_sync_me(drbd_dev *mdev)
{
return fsync_bdev(mdev->this_bdev);
@@ -196,17 +182,18 @@
static inline void drbd_kick_lo(drbd_dev *mdev)
{
- if (!mdev->backing_bdev) {
+ if (!mdev->bc->backing_bdev) {
if (DRBD_ratelimit(5*HZ,5)) {
ERR("backing_bdev==NULL in drbd_kick_lo! The following call trace is for debuggin purposes only. Don't worry.\n");
dump_stack();
}
} else {
- request_queue_t *q = bdev_get_queue(mdev->backing_bdev);
+ request_queue_t *q;
+ q = bdev_get_queue(mdev->bc->backing_bdev);
/*
* FIXME investigate what makes most sense:
* struct backing_dev_info *bdi;
- * bdi = mdev->backing_bdev->bd_inode->i_mapping->backing_dev_info;
+ * bdi = mdev->bc->backing_bdev->bd_inode->i_mapping->backing_dev_info;
* bdi = &q->backing_dev_info;
* blk_run_queue(q);
*
@@ -220,7 +207,8 @@
static inline void drbd_plug_device(drbd_dev *mdev)
{
- request_queue_t *q = bdev_get_queue(mdev->this_bdev);
+ request_queue_t *q;
+ q = bdev_get_queue(mdev->this_bdev);
spin_lock_irq(q->queue_lock);
Modified: trunk/drbd/drbd_fs.c
===================================================================
--- trunk/drbd/drbd_fs.c 2005-11-15 15:29:47 UTC (rev 2003)
+++ trunk/drbd/drbd_fs.c 2005-11-15 15:38:40 UTC (rev 2004)
@@ -58,19 +58,19 @@
wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
- prev_first_sect = drbd_md_first_sector(mdev);
- prev_size = mdev->md.md_size_sect;
- la_size = mdev->md.la_size_sect;
+ prev_first_sect = drbd_md_first_sector(mdev->bc);
+ prev_size = mdev->bc->md.md_size_sect;
+ la_size = mdev->bc->md.la_size_sect;
// TODO: should only be some assert here, not (re)init...
- drbd_md_set_sector_offsets(mdev);
+ drbd_md_set_sector_offsets(mdev,mdev->bc);
rv = do_determin_dev_size(mdev);
- la_size_changed = (la_size != mdev->md.la_size_sect);
+ la_size_changed = (la_size != mdev->bc->md.la_size_sect);
#warning flexible device size!! is this the right thing to test?
- md_moved = prev_first_sect != drbd_md_first_sector(mdev)
- || prev_size != mdev->md.md_size_sect;
+ md_moved = prev_first_sect != drbd_md_first_sector(mdev->bc)
+ || prev_size != mdev->bc->md.md_size_sect;
if ( md_moved ) {
WARN("Moving meta-data.\n");
@@ -80,7 +80,7 @@
if ( la_size_changed || md_moved ) {
drbd_al_shrink(mdev); // All extents inactive.
drbd_bm_write(mdev); // write bitmap
- // Write mdev->md.la_size_sect to [possibly new position on] disk.
+ // Write mdev->bc->md.la_size_sect to [possibly new position on] disk.
drbd_md_write(mdev);
}
lc_unlock(mdev->act_log);
@@ -117,9 +117,9 @@
STATIC int do_determin_dev_size(struct Drbd_Conf* mdev)
{
sector_t p_size = mdev->p_size; // partner's disk size.
- sector_t la_size = mdev->md.la_size_sect; // last agreed size.
+ sector_t la_size = mdev->bc->md.la_size_sect; // last agreed size.
sector_t m_size; // my size
- sector_t u_size = mdev->lo_usize; // size requested by user.
+ sector_t u_size = mdev->bc->u_size; // size requested by user.
sector_t size=0;
int rv;
char ppb[10];
@@ -173,7 +173,7 @@
}
// racy, see comments above.
drbd_set_my_capacity(mdev,size);
- mdev->md.la_size_sect = size;
+ mdev->bc->md.la_size_sect = size;
INFO("size = %s (%lu KB)\n",ppsize(ppb,size>>1),
(unsigned long)size>>1);
}
@@ -181,7 +181,9 @@
return rv;
}
-/* checks that the al lru is of requested size, and if neccessary tries to
+/**
+ * drbd_check_al_size:
+ * checks that the al lru is of requested size, and if neccessary tries to
* allocate a new one. returns -EBUSY if current al lru is still used,
* -ENOMEM when allocation failed, and 0 on success.
*/
@@ -235,7 +237,7 @@
void drbd_setup_queue_param(drbd_dev *mdev, unsigned int max_seg_s)
{
request_queue_t * const q = mdev->rq_queue;
- request_queue_t * const b = mdev->backing_bdev->bd_disk->queue;
+ request_queue_t * const b = mdev->bc->backing_bdev->bd_disk->queue;
unsigned int old_max_seg_s = q->max_segment_size;
@@ -265,121 +267,93 @@
{
int i, md_gc_valid, minor;
enum ret_codes retcode;
- struct disk_config new_conf;
- struct file *filp = 0;
- struct file *filp2 = 0;
+ struct disk_config new_conf; // local copy of ioctl() args.
+ struct drbd_backing_dev* nbc; // new_backing_conf
struct inode *inode, *inode2;
- struct block_device *bdev, *bdev2;
drbd_disks_t nds;
minor=(int)(mdev-drbd_conf);
/* if you want to reconfigure, please tear down first */
- smp_rmb();
if (mdev->state.disk > Diskless)
return -EBUSY;
- /* if this was "adding" a lo dev to a previously "diskless" node,
- * there still could be requests comming in right now. brrks.
- * if it was mounted, we had an open_cnt > 1,
- * so it would be BUSY anyways...
- */
- ERR_IF (mdev->state.role != Secondary)
- return -EBUSY;
-
- if (mdev->open_cnt > 1)
- return -EBUSY;
-
if (copy_from_user(&new_conf, &arg->config,sizeof(struct disk_config)))
return -EFAULT;
- /* FIXME
- * I'd like to do it here, so I can just fail this ioctl with ENOMEM.
- * but drbd_md_read below might change the al_nr_extens again, so need
- * to do it there again anyways...
- * but then I already changed it all and cannot easily undo it..
- * for now, do it there, but then if it fails, rather panic than later
- * have a NULL pointer dereference.
- *
- i = drbd_check_al_size(mdev);
- if (i) return i;
- *
- */
-
-
- /* FIXME allow reattach while connected,
- * and allow it in Primary/Diskless state...
- * currently there are strange races leading to a distributed
- * deadlock in that case...
- */
- if ( mdev->state.conn > StandAlone ) {
- return -EBUSY;
+ nbc = kmalloc(sizeof(struct drbd_backing_dev),GFP_KERNEL);
+ if(!nbc) {
+ retcode=KMallocFailed;
+ goto fail_ioctl;
}
+ nbc->lo_file = NULL;
+ nbc->md_file = NULL;
-#warning "FIXME hardcoded"
- if ( new_conf.meta_index < -3) {
+ if ( new_conf.meta_index < DRBD_MD_INDEX_FLEX_INT) {
retcode=LDMDInvalid;
goto fail_ioctl;
}
- filp = fget(new_conf.lower_device);
- if (!filp) {
+ nbc->lo_file = fget(new_conf.lower_device);
+ if (!nbc->lo_file) {
retcode=LDFDInvalid;
goto fail_ioctl;
}
- inode = filp->f_dentry->d_inode;
+ inode = nbc->lo_file->f_dentry->d_inode;
if (!S_ISBLK(inode->i_mode)) {
retcode=LDNoBlockDev;
goto fail_ioctl;
}
- filp2 = fget(new_conf.meta_device);
+ nbc->md_file = fget(new_conf.meta_device);
- if (!filp2) {
+ if (!nbc->md_file) {
retcode=MDFDInvalid;
goto fail_ioctl;
}
- inode2 = filp2->f_dentry->d_inode;
+ inode2 = nbc->md_file->f_dentry->d_inode;
if (!S_ISBLK(inode2->i_mode)) {
retcode=MDNoBlockDev;
goto fail_ioctl;
}
- bdev = inode->i_bdev;
- if (bd_claim(bdev, mdev)) {
+ nbc->backing_bdev = inode->i_bdev;
+ if (bd_claim(nbc->backing_bdev, mdev)) {
retcode=LDMounted;
goto fail_ioctl;
}
- bdev2 = inode2->i_bdev;
-
-
-#warning checks below no longer valid
-// --- rewrite
- if (bd_claim(bdev2, new_conf.meta_index == -1 ?
+ nbc->md_bdev = inode2->i_bdev;
+ if (bd_claim(nbc->md_bdev,
+ (new_conf.meta_index==DRBD_MD_INDEX_INTERNAL ||
+ new_conf.meta_index==DRBD_MD_INDEX_FLEX_INT) ?
(void *)mdev : (void*) drbd_m_holder )) {
retcode=MDMounted;
goto release_bdev_fail_ioctl;
}
- if ( (bdev == bdev2) != (new_conf.meta_index == -1) ) {
+ if ( (nbc->backing_bdev==nbc->md_bdev) !=
+ (new_conf.meta_index==DRBD_MD_INDEX_INTERNAL ||
+ new_conf.meta_index==DRBD_MD_INDEX_FLEX_INT) ) {
retcode=LDMDInvalid;
goto release_bdev2_fail_ioctl;
}
-#if 0
- if ((drbd_get_capacity(bdev)>>1) < new_conf.disk_size) {
+ if ((drbd_get_capacity(nbc->backing_bdev)>>1) < new_conf.disk_size) {
/* FIXME maybe still allow,
* but leave only DRBD_MAX_SECTORS usable */
retcode = LDDeviceTooSmall;
goto release_bdev2_fail_ioctl;
}
- if (drbd_get_capacity(bdev) >= (sector_t)DRBD_MAX_SECTORS) {
+#warning checks below no longer valid
+// --- rewrite
+#if 0
+ if (drbd_get_capacity(nbc->backing_bdev) >= (sector_t)DRBD_MAX_SECTORS) {
retcode = LDDeviceTooLarge;
goto release_bdev2_fail_ioctl;
}
@@ -396,29 +370,45 @@
* FIXME this is arbitrary and needs to be reconsidered as soon as we
* move to flexible size meta data.
*/
- if( drbd_get_capacity(bdev2) < MD_RESERVED_SECT*i
+ if( drbd_get_capacity(nbc->md_bdev) < 2*MD_RESERVED_SIZE*i
+ (new_conf.meta_index == -1) ? (1<<16) : 0 )
{
retcode = MDDeviceTooSmall;
goto release_bdev2_fail_ioctl;
}
#endif
-// --- up to here
+// -- up to here
- drbd_free_ll_dev(mdev);
+ if(drbd_request_state(mdev,NS(disk,Attaching)) <= 0 ) {
+ retcode = StateNotAllowed;
+ goto release_bdev2_fail_ioctl;
+ }
- if (new_conf.split_brain_fix) set_bit(SPLIT_BRAIN_FIX,&mdev->flags);
- else clear_bit(SPLIT_BRAIN_FIX,&mdev->flags);
+ nbc->md_index = new_conf.meta_index;
+ nbc->u_size = new_conf.disk_size;
+ nbc->on_io_error = new_conf.on_io_error;
+ drbd_md_set_sector_offsets(mdev,nbc);
- mdev->md_bdev = bdev2;
- mdev->md_file = filp2;
- mdev->md_index = new_conf.meta_index;
+ md_gc_valid = drbd_md_read(mdev,nbc);
+ if ( md_gc_valid != NoError ) {
+ retcode = md_gc_valid;
+ goto release_bdev3_fail_ioctl;
+ }
- mdev->backing_bdev = bdev;
- mdev->lo_file = filp;
- mdev->lo_usize = new_conf.disk_size;
- mdev->on_io_error = new_conf.on_io_error;
+ // Since ware are diskless, fix the AL first...
+ if (drbd_check_al_size(mdev)) {
+ retcode = KMallocFailed;
+ goto release_bdev3_fail_ioctl;
+ }
+ // Point of no return reached.
+
+ D_ASSERT(mdev->bc == NULL);
+ mdev->bc = nbc;
+
+ if (new_conf.split_brain_fix) set_bit(SPLIT_BRAIN_FIX,&mdev->flags);
+ else clear_bit(SPLIT_BRAIN_FIX,&mdev->flags);
+
mdev->send_cnt = 0;
mdev->recv_cnt = 0;
mdev->read_cnt = 0;
@@ -430,21 +420,6 @@
* drbd_set_recv_tcq(mdev,drbd_queue_order_type(mdev)==QUEUE_ORDERED_TAG);
*/
- set_bit(MD_IO_ALLOWED,&mdev->flags);
- drbd_md_set_sector_offsets(mdev);
- md_gc_valid = drbd_md_read(mdev);
-
- if (md_gc_valid != NoError) {
- retcode = md_gc_valid;
- goto unset_fail_ioctl;
- }
-
- /* We reach this only if we have a valid meta data block,
- * so we no longer create one here, if we don't find one.
- * That means that initially, you HAVE to use the drbdmeta
- * command to create one in user space.
- */
-
drbd_bm_lock(mdev); // racy...
drbd_determin_dev_size(mdev);
/* FIXME
@@ -486,8 +461,6 @@
FIXME wipe out on disk al!
} */
- drbd_set_blocksize(mdev,INITIAL_BLOCK_SIZE);
-
/* If MDF_Consistent is not set go into inconsistent state, otherwise
investige MDF_WasUpToDate...
If MDF_WasUpToDate is not set go into Outdated disk state, otherwise
@@ -507,29 +480,19 @@
drbd_thread_start(&mdev->worker);
}
-// FIXME EXPLAIN:
- clear_bit(MD_IO_ALLOWED,&mdev->flags);
-
drbd_bm_unlock(mdev);
return 0;
-
- unset_fail_ioctl:
- mdev->md_bdev = 0;
- mdev->md_file = 0;
- mdev->md_index = 0;
-
- mdev->backing_bdev = 0;
- mdev->lo_file = 0;
- mdev->lo_usize = 0;
- mdev->on_io_error = 0;
+ release_bdev3_fail_ioctl:
+ drbd_force_state(mdev,NS(disk,Diskless));
release_bdev2_fail_ioctl:
- bd_release(bdev2);
+ bd_release(nbc->md_bdev);
release_bdev_fail_ioctl:
- bd_release(bdev);
+ bd_release(nbc->backing_bdev);
fail_ioctl:
- if (filp) fput(filp);
- if (filp2) fput(filp2);
+ if (nbc->lo_file) fput(nbc->lo_file);
+ if (nbc->md_file) fput(nbc->md_file);
+ if (nbc) kfree(nbc);
if (put_user(retcode, &arg->ret_code)) return -EFAULT;
return -EINVAL;
}
@@ -540,20 +503,22 @@
struct ioctl_get_config cn;
memset(&cn,0,sizeof(cn));
- if (mdev->backing_bdev) {
- cn.lower_device_major = MAJOR(mdev->backing_bdev->bd_dev);
- cn.lower_device_minor = MINOR(mdev->backing_bdev->bd_dev);
- bdevname(mdev->backing_bdev,cn.lower_device_name);
+ if(inc_local(mdev)) {
+ cn.lower_device_major = MAJOR(mdev->bc->backing_bdev->bd_dev);
+ cn.lower_device_minor = MINOR(mdev->bc->backing_bdev->bd_dev);
+ bdevname(mdev->bc->backing_bdev,cn.lower_device_name);
+ cn.meta_device_major = MAJOR(mdev->bc->md_bdev->bd_dev);
+ cn.meta_device_minor = MINOR(mdev->bc->md_bdev->bd_dev);
+ bdevname(mdev->bc->md_bdev,cn.meta_device_name);
+ cn.meta_index=mdev->bc->md_index;
+ cn.on_io_error=mdev->bc->on_io_error;
+ dec_local(mdev);
}
- if (mdev->md_bdev) {
- cn.meta_device_major = MAJOR(mdev->md_bdev->bd_dev);
- cn.meta_device_minor = MINOR(mdev->md_bdev->bd_dev);
- bdevname(mdev->md_bdev,cn.meta_device_name);
- }
cn.state=mdev->state;
- cn.meta_index=mdev->md_index;
- cn.on_io_error=mdev->on_io_error;
- memcpy(&cn.nconf, &mdev->conf, sizeof(struct net_config));
+ if(inc_net(mdev)) {
+ memcpy(&cn.nconf, mdev->net_conf, sizeof(struct net_config));
+ dec_net(mdev);
+ }
memcpy(&cn.sconf, &mdev->sync_conf, sizeof(struct syncer_config));
if (copy_to_user(arg,&cn,sizeof(struct ioctl_get_config)))
@@ -568,36 +533,41 @@
{
int i,minor;
enum ret_codes retcode;
- struct net_config new_conf;
+ struct net_config *new_conf = NULL;
struct crypto_tfm* tfm = NULL;
struct hlist_head *new_tl_hash = NULL;
struct hlist_head *new_ee_hash = NULL;
minor=(int)(mdev-drbd_conf);
- if( mdev->state.role == Primary && mdev->conf.want_lose ) {
- retcode=DiscardNotAllowed;
+ new_conf = kmalloc(sizeof(struct net_config),GFP_KERNEL);
+ if(!new_conf) {
+ retcode=KMallocFailed;
goto fail_ioctl;
}
- // FIXME plausibility check
- if (copy_from_user(&new_conf, &arg->config,sizeof(struct net_config)))
+ if (copy_from_user(new_conf, &arg->config,sizeof(struct net_config)))
return -EFAULT;
-#define M_ADDR(A) (((struct sockaddr_in *)&A.my_addr)->sin_addr.s_addr)
-#define M_PORT(A) (((struct sockaddr_in *)&A.my_addr)->sin_port)
-#define O_ADDR(A) (((struct sockaddr_in *)&A.other_addr)->sin_addr.s_addr)
-#define O_PORT(A) (((struct sockaddr_in *)&A.other_addr)->sin_port)
+ if( mdev->state.role == Primary && new_conf->want_lose ) {
+ retcode=DiscardNotAllowed;
+ goto fail_ioctl;
+ }
+
+#define M_ADDR(A) (((struct sockaddr_in *)&A->my_addr)->sin_addr.s_addr)
+#define M_PORT(A) (((struct sockaddr_in *)&A->my_addr)->sin_port)
+#define O_ADDR(A) (((struct sockaddr_in *)&A->other_addr)->sin_addr.s_addr)
+#define O_PORT(A) (((struct sockaddr_in *)&A->other_addr)->sin_port)
for(i=0;i<minor_count;i++) {
if( i!=minor && drbd_conf[i].state.conn > StandAlone &&
- M_ADDR(new_conf) == M_ADDR(drbd_conf[i].conf) &&
- M_PORT(new_conf) == M_PORT(drbd_conf[i].conf) ) {
+ M_ADDR(new_conf) == M_ADDR(drbd_conf[i].net_conf) &&
+ M_PORT(new_conf) == M_PORT(drbd_conf[i].net_conf) ) {
retcode=LAAlreadyInUse;
goto fail_ioctl;
}
if( i!=minor && drbd_conf[i].state.conn > StandAlone &&
- O_ADDR(new_conf) == O_ADDR(drbd_conf[i].conf) &&
- O_PORT(new_conf) == O_PORT(drbd_conf[i].conf) ) {
+ O_ADDR(new_conf) == O_ADDR(drbd_conf[i].net_conf) &&
+ O_PORT(new_conf) == O_PORT(drbd_conf[i].net_conf) ) {
retcode=OAAlreadyInUse;
goto fail_ioctl;
}
@@ -607,8 +577,8 @@
#undef O_ADDR
#undef O_PORT
- if( new_conf.cram_hmac_alg[0] != 0) {
- tfm = crypto_alloc_tfm(new_conf.cram_hmac_alg, 0);
+ if( new_conf->cram_hmac_alg[0] != 0) {
+ tfm = crypto_alloc_tfm(new_conf->cram_hmac_alg, 0);
if (tfm == NULL) {
retcode=CRAMAlgNotAvail;
goto fail_ioctl;
@@ -621,23 +591,25 @@
}
- if (mdev->tl_hash_s != new_conf.max_epoch_size/8 ) {
- new_tl_hash=kmalloc((new_conf.max_epoch_size/8)*sizeof(void*),
+ if (mdev->tl_hash_s != new_conf->max_epoch_size/8 ) {
+ new_tl_hash=kmalloc((new_conf->max_epoch_size/8)*sizeof(void*),
GFP_KERNEL);
if(!new_tl_hash) {
retcode=KMallocFailed;
goto fail_ioctl;
}
+ memset(new_tl_hash, 0, mdev->tl_hash_s * sizeof(void*));
}
- if (new_conf.two_primaries &&
- ( mdev->ee_hash_s != new_conf.max_buffers/8 ) ) {
- new_ee_hash=kmalloc((new_conf.max_buffers/8)*sizeof(void*),
+ if (new_conf->two_primaries &&
+ ( mdev->ee_hash_s != new_conf->max_buffers/8 ) ) {
+ new_ee_hash=kmalloc((new_conf->max_buffers/8)*sizeof(void*),
GFP_KERNEL);
if(!new_ee_hash) {
retcode=KMallocFailed;
goto fail_ioctl;
}
+ memset(new_ee_hash, 0, mdev->ee_hash_s * sizeof(void*));
}
/* IMPROVE:
@@ -645,16 +617,10 @@
used already. E.g. some FS mounted on it.
*/
- drbd_sync_me(mdev);
- drbd_thread_stop(&mdev->receiver);
- drbd_free_sock(mdev);
+ ((char*)new_conf->shared_secret)[SHARED_SECRET_MAX-1]=0;
- // TODO plausibility check ...
- memcpy(&mdev->conf,&new_conf,sizeof(struct net_config));
-
- ((char*)mdev->conf.shared_secret)[SHARED_SECRET_MAX-1]=0;
#if 0
-FIXME
+FIXME LGE
/* for the connection loss logic in drbd_recv
* I _need_ the resulting timeo in jiffies to be
* non-zero and different
@@ -668,37 +634,50 @@
*/
// unlikely: someone disabled the timeouts ...
// just put some huge values in there.
- if (!mdev->conf.ping_int)
- mdev->conf.ping_int = MAX_SCHEDULE_TIMEOUT/HZ;
- if (!mdev->conf.timeout)
- mdev->conf.timeout = MAX_SCHEDULE_TIMEOUT/HZ*10;
- if (mdev->conf.ping_int*10 < mdev->conf.timeout)
- mdev->conf.timeout = mdev->conf.ping_int*10/6;
- if (mdev->conf.ping_int*10 == mdev->conf.timeout)
- mdev->conf.ping_int = mdev->conf.ping_int+1;
+ if (!new_conf->ping_int)
+ new_conf->ping_int = MAX_SCHEDULE_TIMEOUT/HZ;
+ if (!new_conf->timeout)
+ new_conf->timeout = MAX_SCHEDULE_TIMEOUT/HZ*10;
+ if (new_conf->ping_int*10 < new_conf->timeout)
+ new_conf->timeout = new_conf->ping_int*10/6;
+ if (new_conf->ping_int*10 == new_conf->timeout)
+ new_conf->ping_int = new_conf->ping_int+1;
#endif
+ drbd_sync_me(mdev);
+ drbd_thread_stop(&mdev->receiver); // conn = StadAlone afterwards
+ drbd_free_sock(mdev);
+
+ /* As soon as mdev->state.conn < Unconnected nobody can increase
+ the net_cnt. Wait until the net_cnt is 0. */
+ if ( wait_event_interruptible( mdev->cstate_wait,
+ atomic_read(&mdev->net_cnt) == 0 ) ) {
+ retcode=GotSignal;
+ goto fail_ioctl;
+ }
+
+ /* Now we may touch net_conf */
+ if (mdev->net_conf) kfree(mdev->net_conf);
+ mdev->net_conf = new_conf;
+
mdev->send_cnt = 0;
mdev->recv_cnt = 0;
if(new_tl_hash) {
if (mdev->tl_hash) kfree(mdev->tl_hash);
- mdev->tl_hash_s = mdev->conf.max_epoch_size/8;
+ mdev->tl_hash_s = mdev->net_conf->max_epoch_size/8;
mdev->tl_hash = new_tl_hash;
- memset(mdev->tl_hash, 0, mdev->tl_hash_s * sizeof(void*));
}
if(new_ee_hash) {
if (mdev->ee_hash) kfree(mdev->ee_hash);
- mdev->ee_hash_s = mdev->conf.max_buffers/8;
+ mdev->ee_hash_s = mdev->net_conf->max_buffers/8;
mdev->ee_hash = new_ee_hash;
- memset(mdev->ee_hash, 0, mdev->ee_hash_s * sizeof(void*));
}
if ( mdev->cram_hmac_tfm ) {
crypto_free_tfm(mdev->cram_hmac_tfm);
}
-
mdev->cram_hmac_tfm = tfm;
drbd_thread_start(&mdev->worker);
@@ -712,6 +691,7 @@
if (tfm) crypto_free_tfm(tfm);
if (new_tl_hash) kfree(new_tl_hash);
if (new_ee_hash) kfree(new_ee_hash);
+ if (new_conf) kfree(new_conf);
if (put_user(retcode, &arg->ret_code)) return -EFAULT;
return -EINVAL;
}
@@ -852,14 +832,17 @@
if (newstate & Secondary) {
set_disk_ro(mdev->vdisk, TRUE );
} else {
- mdev->conf.want_lose = 0;
+ if(inc_net(mdev)) {
+ mdev->net_conf->want_lose = 0;
+ dec_net(mdev);
+ }
set_disk_ro(mdev->vdisk, FALSE );
D_ASSERT(mdev->this_bdev->bd_holder == drbd_sec_holder);
bd_release(mdev->this_bdev);
mdev->this_bdev->bd_disk = mdev->vdisk;
if ( (mdev->state.conn < WFReportParams &&
- mdev->md.uuid[Bitmap] == 0) || forced ) {
+ mdev->bc->md.uuid[Bitmap] == 0) || forced ) {
drbd_uuid_new_current(mdev);
}
}
@@ -974,7 +957,6 @@
return 0;
}
-/* new */
STATIC int drbd_detach_ioctl(drbd_dev *mdev)
{
int interrupted,r;
@@ -993,14 +975,16 @@
drbd_sync_me(mdev);
+ /* since inc_local() only works as long as disk >= Inconsistent,
+ and it is Diskless here, local_cnt can only go down, it can
+ not increase... It will reach zero */
interrupted = wait_event_interruptible(mdev->cstate_wait,
- atomic_read(&mdev->local_cnt)==0);
+ !atomic_read(&mdev->local_cnt));
if ( interrupted ) {
drbd_force_state(mdev,NS(disk,os.disk));
return -EINTR;
}
- drbd_free_ll_dev(mdev);
after_state_ch(mdev, os, ns);
/* FIXME
@@ -1058,9 +1042,9 @@
memset(&cn,0,sizeof(cn));
for (i = Current; i < UUID_SIZE; i++) {
- cn.uuid[i]=mdev->md.uuid[i];
+ cn.uuid[i]=mdev->bc->md.uuid[i];
}
- cn.flags = mdev->md.flags;
+ cn.flags = mdev->bc->md.flags;
cn.bits_set = drbd_bm_total_weight(mdev);
cn.current_size = drbd_get_capacity(mdev->this_bdev);
@@ -1197,10 +1181,10 @@
break;
}
err=0;
- mdev->lo_usize = (sector_t)(u64)arg;
+ mdev->bc->u_size = (sector_t)(u64)arg;
drbd_bm_lock(mdev);
drbd_determin_dev_size(mdev);
- drbd_md_write(mdev); // Write mdev->md.la_size_sect to disk.
+ drbd_md_write(mdev); // Write mdev->bc->md.la_size_sect to disk.
drbd_bm_unlock(mdev);
if (mdev->state.conn == Connected) {
drbd_send_uuids(mdev); // to start sync...
Modified: trunk/drbd/drbd_int.h
===================================================================
--- trunk/drbd/drbd_int.h 2005-11-15 15:29:47 UTC (rev 2003)
+++ trunk/drbd/drbd_int.h 2005-11-15 15:38:40 UTC (rev 2004)
@@ -65,8 +65,6 @@
#define FALSE 0
#endif
-#define INITIAL_BLOCK_SIZE (1<<12) // 4K
-
/* I don't remember why XCPU ...
* This is used to wake the asender,
* and to interrupt sending the sending task
@@ -636,7 +634,6 @@
UNPLUG_QUEUED, // only relevant with kernel 2.4
UNPLUG_REMOTE, // whether sending a "UnplugRemote" makes sense
PROCESS_EE_RUNNING, // eek!
- MD_IO_ALLOWED, // EXPLAIN
MD_DIRTY, // current gen counts and flags not yet on disk
SYNC_STARTED, // Needed to agree on the exact point in time..
UNIQUE, // Set on one node, cleared on the peer!
@@ -696,6 +693,17 @@
*/
};
+struct drbd_backing_dev {
+ struct block_device *backing_bdev;
+ struct block_device *md_bdev;
+ struct file *lo_file;
+ struct file *md_file;
+ int md_index;
+ enum io_error_handler on_io_error;
+ sector_t u_size; /* user provided size */
+ struct drbd_md md;
+};
+
struct Drbd_Conf {
#ifdef PARANOIA
long magic;
@@ -703,25 +711,18 @@
/* things that are stored as / read from meta data on disk */
unsigned long flags;
- struct drbd_md md;
-
/* config data protected by: */
struct semaphore device_mutex;
/* configured by drbdsetup */
- struct net_config conf;
+ struct net_config *net_conf; // protected by inc_net() and dec_net()
struct syncer_config sync_conf;
- enum io_error_handler on_io_error;
+ struct drbd_backing_dev *bc; // protected by inc_local() dec_local()
- /* to become struct disk_config soonish */
- struct file *lo_file;
- struct file *md_file;
- int md_index;
- struct block_device *backing_bdev;
+ sector_t p_size; /* partner's disk size */
+ request_queue_t *rq_queue;
struct block_device *this_bdev;
- struct block_device *md_bdev;
struct gendisk *vdisk;
- request_queue_t *rq_queue;
struct drbd_socket data; // for data/barrier/cstate/parameter packets
struct drbd_socket meta; // for ping/ack (metadata) packets
@@ -732,10 +733,7 @@
unplug_work;
struct timer_list resync_timer;
- sector_t lo_usize; /* user provided size */
- sector_t p_size; /* partner's disk size */
drbd_state_t state;
-
wait_queue_head_t cstate_wait; // TODO Rename into "misc_wait".
unsigned int send_cnt;
unsigned int recv_cnt;
@@ -748,6 +746,7 @@
atomic_t rs_pending_cnt; // RS request/data packets on the wire
atomic_t unacked_cnt; // Need to send replys for
atomic_t local_cnt; // Waiting for local disk to signal completion
+ atomic_t net_cnt; // Users of net_conf
spinlock_t req_lock;
spinlock_t tl_lock;
struct drbd_barrier* newest_barrier;
@@ -859,13 +858,13 @@
extern int drbd_send_bitmap(drbd_dev *mdev);
extern int _drbd_send_bitmap(drbd_dev *mdev);
extern int drbd_send_discard(drbd_dev *mdev, drbd_request_t *req);
-extern void drbd_free_ll_dev(drbd_dev *mdev);
+extern void drbd_free_bc(struct drbd_backing_dev* bc);
extern int drbd_io_error(drbd_dev* mdev);
extern void drbd_mdev_cleanup(drbd_dev *mdev);
// drbd_meta-data.c (still in drbd_main.c)
extern void drbd_md_write(drbd_dev *mdev);
-extern int drbd_md_read(drbd_dev *mdev);
+extern int drbd_md_read(drbd_dev *mdev, struct drbd_backing_dev * bdev);
// maybe define them below as inline?
extern void drbd_uuid_set(drbd_dev *mdev,int idx, u64 val);
extern void _drbd_uuid_set(drbd_dev *mdev, int idx, u64 val);
@@ -1083,7 +1082,8 @@
extern void drbd_start_resync(drbd_dev *mdev, drbd_conns_t side);
extern int drbd_resync_finished(drbd_dev *mdev);
// maybe rather drbd_main.c ?
-extern int drbd_md_sync_page_io(drbd_dev *mdev, sector_t sector, int rw);
+extern int drbd_md_sync_page_io(drbd_dev *mdev, struct drbd_backing_dev *bdev,
+ sector_t sector, int rw);
// worker callbacks
extern int w_is_app_read (drbd_dev *, struct drbd_work *, int);
extern int w_is_resync_read (drbd_dev *, struct drbd_work *, int);
@@ -1208,7 +1208,7 @@
unsigned long flags;
spin_lock_irqsave(&mdev->req_lock,flags);
- switch(mdev->on_io_error) {
+ switch(mdev->bc->on_io_error) {
case PassOn:
ERR("Ignoring local IO error!\n");
break;
@@ -1217,7 +1217,6 @@
drbd_panic("IO error on backing device!\n");
break;
case Detach:
- set_bit(MD_IO_ALLOWED,&mdev->flags);
if (_drbd_set_state(mdev,_NS(disk,Failed),1) == 1) {
ERR("Local IO failed. Detaching...\n");
}
@@ -1240,66 +1239,67 @@
* which, for internal meta data, happens to be the maximum capacity
* we could agree upon with our peer
*/
-static inline sector_t drbd_md_first_sector(drbd_dev *mdev)
+static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev)
{
- switch (mdev->md_index) {
+ switch (bdev->md_index) {
case DRBD_MD_INDEX_INTERNAL:
case DRBD_MD_INDEX_FLEX_INT:
- return mdev->md.md_offset + mdev->md.bm_offset;
+ return bdev->md.md_offset + bdev->md.bm_offset;
case DRBD_MD_INDEX_FLEX_EXT:
default:
- return mdev->md.md_offset;
+ return bdev->md.md_offset;
}
}
/* returns the last sector number of our meta data,
* to be able to catch out of band md access */
-static inline sector_t drbd_md_last_sector(drbd_dev *mdev)
+static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev)
{
- switch (mdev->md_index) {
+ switch (bdev->md_index) {
case DRBD_MD_INDEX_INTERNAL:
case DRBD_MD_INDEX_FLEX_INT:
- return mdev->md.md_offset + MD_AL_OFFSET -1;
+ return bdev->md.md_offset + MD_AL_OFFSET -1;
case DRBD_MD_INDEX_FLEX_EXT:
default:
- return mdev->md.md_offset + mdev->md.md_size_sect;
+ return bdev->md.md_offset + bdev->md.md_size_sect;
}
}
/* returns the capacity we announce to out peer */
static inline sector_t drbd_get_max_capacity(drbd_dev *mdev)
{
- switch (mdev->md_index) {
+ switch (mdev->bc->md_index) {
case DRBD_MD_INDEX_INTERNAL:
case DRBD_MD_INDEX_FLEX_INT:
- return drbd_get_capacity(mdev->backing_bdev)
- ? drbd_md_first_sector(mdev)
+ return drbd_get_capacity(mdev->bc->backing_bdev)
+ ? drbd_md_first_sector(mdev->bc)
: 0;
case DRBD_MD_INDEX_FLEX_EXT:
default:
- return drbd_get_capacity(mdev->backing_bdev);
+ return drbd_get_capacity(mdev->bc->backing_bdev);
}
}
/* returns the sector number of our meta data 'super' block */
-static inline sector_t drbd_md_ss__(drbd_dev *mdev)
+static inline sector_t drbd_md_ss__(drbd_dev *mdev,
+ struct drbd_backing_dev *bdev)
{
- switch (mdev->md_index) {
+ switch (bdev->md_index) {
default: /* external, some index */
- return MD_RESERVED_SECT * mdev->md_index;
+ return MD_RESERVED_SECT * bdev->md_index;
case DRBD_MD_INDEX_INTERNAL:
/* with drbd08, internal meta data is always "flexible" */
case DRBD_MD_INDEX_FLEX_INT:
/* sizeof(struct md_on_disk_07) == 4k
* position: last 4k aligned block of 4k size */
- if (!mdev->backing_bdev) {
+ if (!bdev->backing_bdev) {
if (DRBD_ratelimit(5*HZ,5)) {
- ERR("mdev->backing_bdev==NULL\n");
+ ERR("bdev->backing_bdev==NULL\n");
dump_stack();
}
return 0;
}
- return (drbd_get_capacity(mdev->backing_bdev) & ~7ULL)
+ return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL)
- MD_AL_OFFSET;
case DRBD_MD_INDEX_FLEX_EXT:
return 0;
@@ -1308,26 +1308,27 @@
/* initializes the md.*_offset members, so we are able to find
* the on disk meta data */
-static inline void drbd_md_set_sector_offsets(drbd_dev *mdev)
+static inline void drbd_md_set_sector_offsets(drbd_dev *mdev,
+ struct drbd_backing_dev *bdev)
{
sector_t md_size_sect = 0;
- mdev->md.md_offset = drbd_md_ss__(mdev);
- switch(mdev->md_index) {
+ bdev->md.md_offset = drbd_md_ss__(mdev,bdev);
+ switch(bdev->md_index) {
default:
case DRBD_MD_INDEX_FLEX_EXT:
/* just occupy the full device; unit: sectors */
- mdev->md.md_size_sect = drbd_get_capacity(mdev->md_bdev);
- mdev->md.md_offset = 0;
- mdev->md.al_offset = MD_AL_OFFSET;
- mdev->md.bm_offset = MD_BM_OFFSET;
+ bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
+ bdev->md.md_offset = 0;
+ bdev->md.al_offset = MD_AL_OFFSET;
+ bdev->md.bm_offset = MD_BM_OFFSET;
break;
case DRBD_MD_INDEX_INTERNAL:
case DRBD_MD_INDEX_FLEX_INT:
/* al size is still fixed */
- mdev->md.al_offset = -MD_AL_MAX_SIZE;
+ bdev->md.al_offset = -MD_AL_MAX_SIZE;
#warning FIXME max size check missing.
/* we need (slightly less than) ~ this much bitmap sectors: */
- md_size_sect = drbd_get_capacity(mdev->backing_bdev);
+ md_size_sect = drbd_get_capacity(bdev->backing_bdev);
DUMPI(md_size_sect);
md_size_sect = ALIGN(md_size_sect,BM_SECT_PER_EXT);
DUMPI(md_size_sect);
@@ -1340,14 +1341,14 @@
* and the activity log; */
md_size_sect += MD_BM_OFFSET;
- mdev->md.md_size_sect = md_size_sect;
+ bdev->md.md_size_sect = md_size_sect;
/* bitmap offset is adjusted by 'super' block size */
- mdev->md.bm_offset = -md_size_sect + MD_AL_OFFSET;
+ bdev->md.bm_offset = -md_size_sect + MD_AL_OFFSET;
break;
}
- DUMPI(mdev->md.md_offset);
- DUMPI(mdev->md.al_offset);
- DUMPI(mdev->md.bm_offset);
+ DUMPI(bdev->md.md_offset);
+ DUMPI(bdev->md.al_offset);
+ DUMPI(bdev->md.bm_offset);
DUMPI(md_size_sect);
}
@@ -1515,6 +1516,27 @@
/**
+ * inc_net: Returns TRUE when it is ok to access mdev->net_conf. You
+ * should call dec_net() when finished looking at mdev->net_conf.
+ */
+static inline int inc_net(drbd_dev* mdev)
+{
+ int have_net_conf;
+
+ atomic_inc(&mdev->net_cnt);
+ have_net_conf = mdev->state.conn >= Unconnected;
+ if(!have_net_conf) atomic_dec(&mdev->net_cnt);
+ return have_net_conf;
+}
+
+static inline void dec_net(drbd_dev* mdev)
+{
+ if(atomic_dec_and_test(&mdev->net_cnt)) {
+ wake_up(&mdev->cstate_wait);
+ }
+}
+
+/**
* inc_local: Returns TRUE when local IO is possible. If it returns
* TRUE you should call dec_local() after IO is completed.
*/
@@ -1530,13 +1552,12 @@
return io_allowed;
}
-static inline int inc_local_md_only(drbd_dev* mdev)
+static inline int inc_md_only(drbd_dev* mdev, drbd_disks_t mins)
{
int io_allowed;
atomic_inc(&mdev->local_cnt);
- io_allowed = (mdev->state.disk >= Inconsistent) ||
- test_bit(MD_IO_ALLOWED,&mdev->flags);
+ io_allowed = (mdev->state.disk >= mins );
if( !io_allowed ) {
atomic_dec(&mdev->local_cnt);
}
@@ -1546,8 +1567,7 @@
static inline void dec_local(drbd_dev* mdev)
{
if(atomic_dec_and_test(&mdev->local_cnt) &&
- mdev->state.disk == Diskless &&
- mdev->lo_file) {
+ mdev->state.disk == Diskless && mdev->bc ) {
wake_up(&mdev->cstate_wait);
}
@@ -1661,7 +1681,7 @@
{
int rv;
#if !defined(QUEUE_FLAG_ORDERED)
- rv = bdev_get_queue(mdev->backing_bdev)->ordered;
+ rv = bdev_get_queue(mdev->bc->backing_bdev)->ordered;
#else
# define QUEUE_ORDERED_NONE 0
# define QUEUE_ORDERED_TAG 1
Modified: trunk/drbd/drbd_main.c
===================================================================
--- trunk/drbd/drbd_main.c 2005-11-15 15:29:47 UTC (rev 2003)
+++ trunk/drbd/drbd_main.c 2005-11-15 15:38:40 UTC (rev 2004)
@@ -202,7 +202,7 @@
req->rq_status |= RQ_DRBD_IN_TL;
list_add(&req->w.list,&b->requests);
- if( b->n_req++ > mdev->conf.max_epoch_size ) {
+ if( b->n_req++ > mdev->net_conf->max_epoch_size ) {
set_bit(ISSUE_BARRIER,&mdev->flags);
}
@@ -366,12 +366,12 @@
sector = drbd_req_get_sector(r);
size = drbd_req_get_size(r);
if( !(r->rq_status & RQ_DRBD_SENT) ) {
- if(mdev->conf.wire_protocol != DRBD_PROT_A )
+ if(mdev->net_conf->wire_protocol != DRBD_PROT_A )
dec_ap_pending(mdev);
drbd_end_req(r,RQ_DRBD_SENT,ERF_NOTLD|1, sector);
goto mark;
}
- if(mdev->conf.wire_protocol != DRBD_PROT_C ) {
+ if(mdev->net_conf->wire_protocol != DRBD_PROT_C ) {
mark:
drbd_set_out_of_sync(mdev, sector, size);
}
@@ -477,7 +477,7 @@
unsigned long flags;
int send,ok=1;
- if(mdev->on_io_error != Panic && mdev->on_io_error != Detach) return 1;
+ if(mdev->bc->on_io_error != Panic && mdev->bc->on_io_error != Detach) return 1;
spin_lock_irqsave(&mdev->req_lock,flags);
if( (send = (mdev->state.disk == Failed)) ) {
@@ -510,8 +510,9 @@
* further references to local_cnt are shortlived,
* and no real references on the device. */
WARN("Releasing backing storage device.\n");
- drbd_free_ll_dev(mdev);
- mdev->md.la_size_sect=0;
+ drbd_free_bc(mdev->bc);
+ mdev->bc=0;
+ mdev->bc->md.la_size_sect=0;
}
return ok;
@@ -629,8 +630,11 @@
/* pre-state-change checks ; only look at ns */
/* See drbd_state_sw_errors in drbd_strings.c */
- if( !mdev->conf.two_primaries &&
- ns.role == Primary && ns.peer == Primary ) rv=-1;
+ if(inc_net(mdev)) {
+ if( !mdev->net_conf->two_primaries &&
+ ns.role == Primary && ns.peer == Primary ) rv=-1;
+ dec_net(mdev);
+ }
else if( ns.role == Primary && ns.conn < Connected &&
ns.disk <= Outdated ) rv=-2;
@@ -691,7 +695,7 @@
}
if ( os.disk == Diskless && os.conn == StandAlone &&
- (ns.disk >= Inconsistent || ns.conn > StandAlone) ) {
+ (ns.disk > Diskless || ns.conn > StandAlone) ) {
__module_get(THIS_MODULE);
}
@@ -715,7 +719,7 @@
}
/* it feels better to have the module_put last ... */
- if ( (os.disk >= Inconsistent || ns.conn > StandAlone) &&
+ if ( (os.disk > Diskless || ns.conn > StandAlone) &&
ns.disk == Diskless && ns.conn == StandAlone ) {
drbd_mdev_cleanup(mdev);
module_put(THIS_MODULE);
@@ -1013,7 +1017,7 @@
{
Drbd_Protocol_Packet p;
- p.protocol = cpu_to_be32(mdev->conf.wire_protocol);
+ p.protocol = cpu_to_be32(mdev->net_conf->wire_protocol);
return drbd_send_cmd(mdev,mdev->data.socket,ReportProtocol,
(Drbd_Header*)&p,sizeof(p));
@@ -1025,11 +1029,11 @@
int i;
for (i = Current; i < UUID_SIZE; i++) {
- p.uuid[i] = cpu_to_be64(mdev->md.uuid[i]);
+ p.uuid[i] = cpu_to_be64(mdev->bc->md.uuid[i]);
}
p.uuid[UUID_SIZE] = cpu_to_be64(drbd_bm_total_weight(mdev));
- p.uuid[UUID_FLAGS] = cpu_to_be64(mdev->conf.want_lose);
+ p.uuid[UUID_FLAGS] = cpu_to_be64(mdev->net_conf->want_lose);
return drbd_send_cmd(mdev,mdev->data.socket,ReportUUIDs,
(Drbd_Header*)&p,sizeof(p));
@@ -1048,16 +1052,16 @@
int drbd_send_sizes(drbd_dev *mdev)
{
Drbd_Sizes_Packet p;
- int ok, have_disk;
sector_t d_size;
+ int ok;
- have_disk=inc_local(mdev);
- if(have_disk) {
- D_ASSERT(mdev->backing_bdev);
+ if(inc_local(mdev)) {
+ D_ASSERT(mdev->bc->backing_bdev);
d_size = drbd_get_max_capacity(mdev);
+ p.u_size = cpu_to_be64(mdev->bc->u_size);
+ dec_local(mdev);
} else d_size = 0;
- p.u_size = cpu_to_be64(mdev->lo_usize);
p.d_size = cpu_to_be64(d_size);
p.c_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
p.max_segment_size = cpu_to_be32(mdev->rq_queue->max_segment_size);
@@ -1065,8 +1069,6 @@
ok = drbd_send_cmd(mdev,mdev->data.socket,ReportSizes,
(Drbd_Header*)&p,sizeof(p));
- if (have_disk) dec_local(mdev);
-
return ok;
}
@@ -1411,7 +1413,7 @@
if(test_and_clear_bit(ISSUE_BARRIER,&mdev->flags))
ok = _drbd_send_barrier(mdev);
if(ok) {
- if (mdev->conf.two_primaries) {
+ if (mdev->net_conf->two_primaries) {
if(ee_have_write(mdev,req)) {
ok=-1;
goto out;
@@ -1443,7 +1445,7 @@
set_bit(UNPLUG_REMOTE,&mdev->flags);
ok = sizeof(p) == drbd_send(mdev,mdev->data.socket,&p,sizeof(p),MSG_MORE);
if(ok) {
- if(mdev->conf.wire_protocol == DRBD_PROT_A) {
+ if(mdev->net_conf->wire_protocol == DRBD_PROT_A) {
ok = _drbd_send_bio(mdev,drbd_req_private_bio(req));
} else {
ok = _drbd_send_zc_bio(mdev,drbd_req_private_bio(req));
@@ -1562,7 +1564,7 @@
#endif
if (sock == mdev->data.socket)
- mdev->ko_count = mdev->conf.ko_count;
+ mdev->ko_count = mdev->net_conf->ko_count;
do {
/* STRANGE
* tcp_sendmsg does _not_ use its size parameter at all ?
@@ -1720,6 +1722,7 @@
atomic_set(&mdev->rs_pending_cnt,0);
atomic_set(&mdev->unacked_cnt,0);
atomic_set(&mdev->local_cnt,0);
+ atomic_set(&mdev->net_cnt,0);
atomic_set(&mdev->resync_locked,0);
atomic_set(&mdev->packet_seq,0);
atomic_set(&mdev->pp_in_use, 0);
@@ -1818,11 +1821,9 @@
if ( mdev->epoch_size != 0)
ERR("epoch_size:%d\n",mdev->epoch_size);
#define ZAP(x) memset(&x,0,sizeof(x))
- ZAP(mdev->conf);
ZAP(mdev->sync_conf);
// ZAP(mdev->data); Not yet!
// ZAP(mdev->meta); Not yet!
- ZAP(mdev->md.uuid);
#undef ZAP
mdev->al_writ_cnt =
mdev->bm_writ_cnt =
@@ -1830,13 +1831,13 @@
mdev->recv_cnt =
mdev->send_cnt =
mdev->writ_cnt =
- mdev->md.la_size_sect =
- mdev->lo_usize =
+ mdev->bc->md.la_size_sect =
mdev->p_size =
mdev->rs_start =
mdev->rs_total =
mdev->rs_mark_left =
mdev->rs_mark_time = 0;
+ mdev->net_conf = NULL;
mdev->send_task = NULL;
drbd_set_my_capacity(mdev,0);
@@ -2295,26 +2296,17 @@
return err;
}
-void drbd_free_ll_dev(drbd_dev *mdev)
+void drbd_free_bc(struct drbd_backing_dev* bc)
{
- struct file *lo_file;
+ if(bc == NULL) return;
- lo_file = mdev->lo_file;
- mdev->lo_file = 0;
- wmb();
+ bd_release(bc->backing_bdev);
+ bd_release(bc->md_bdev);
- if (lo_file) {
- bd_release(mdev->backing_bdev);
- bd_release(mdev->md_bdev);
+ fput(bc->lo_file);
+ fput(bc->md_file);
- mdev->md_bdev =
- mdev->backing_bdev = 0;
-
- fput(lo_file);
- fput(mdev->md_file);
- // mdev->lo_file = 0;
- mdev->md_file = 0;
- }
+ kfree(bc);
}
void drbd_free_sock(drbd_dev *mdev)
@@ -2337,7 +2329,8 @@
mdev->cram_hmac_tfm = NULL;
}
drbd_free_sock(mdev);
- drbd_free_ll_dev(mdev);
+ drbd_free_bc(mdev->bc);
+ mdev->bc=0;
}
/*********************************/
@@ -2367,34 +2360,34 @@
sector_t sector;
int i;
- ERR_IF(!inc_local_md_only(mdev)) return;
+ ERR_IF(!inc_md_only(mdev,Attaching)) return;
down(&mdev->md_io_mutex);
buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
memset(buffer,0,512);
- flags = mdev->md.flags & ~(MDF_Consistent|MDF_PrimaryInd|
+ flags = mdev->bc->md.flags & ~(MDF_Consistent|MDF_PrimaryInd|
MDF_ConnectedInd|MDF_WasUpToDate);
if (mdev->state.role == Primary) flags |= MDF_PrimaryInd;
if (mdev->state.conn >= WFReportParams) flags |= MDF_ConnectedInd;
if (mdev->state.disk > Inconsistent) flags |= MDF_Consistent;
if (mdev->state.disk > Outdated) flags |= MDF_WasUpToDate;
- mdev->md.flags = flags;
+ mdev->bc->md.flags = flags;
buffer->la_size=cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
for (i = Current; i < UUID_SIZE; i++)
- buffer->uuid[i]=cpu_to_be64(mdev->md.uuid[i]);
+ buffer->uuid[i]=cpu_to_be64(mdev->bc->md.uuid[i]);
buffer->flags = cpu_to_be32(flags);
buffer->magic = cpu_to_be32(DRBD_MD_MAGIC);
- buffer->md_size_sect = cpu_to_be32(mdev->md.md_size_sect);
- buffer->al_offset = cpu_to_be32(mdev->md.al_offset);
+ buffer->md_size_sect = cpu_to_be32(mdev->bc->md.md_size_sect);
+ buffer->al_offset = cpu_to_be32(mdev->bc->md.al_offset);
buffer->al_nr_extents = cpu_to_be32(mdev->act_log->nr_elements);
- buffer->bm_offset = cpu_to_be32(mdev->md.bm_offset);
+ buffer->bm_offset = cpu_to_be32(mdev->bc->md.bm_offset);
- D_ASSERT(drbd_md_ss__(mdev) == mdev->md.md_offset);
- sector = mdev->md.md_offset;
+ D_ASSERT(drbd_md_ss__(mdev,mdev->bc) == mdev->bc->md.md_offset);
+ sector = mdev->bc->md.md_offset;
#if 0
/* FIXME sooner or later I'd like to use the MD_DIRTY flag everywhere,
@@ -2405,7 +2398,7 @@
}
#endif
- if (drbd_md_sync_page_io(mdev,sector,WRITE)) {
+ if (drbd_md_sync_page_io(mdev,mdev->bc,sector,WRITE)) {
clear_bit(MD_DIRTY,&mdev->flags);
} else {
if (mdev->state.disk <= Failed) {
@@ -2420,64 +2413,64 @@
}
}
- // Update mdev->md.la_size_sect, since we updated it on metadata.
- mdev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
+ // Update mdev->bc->md.la_size_sect, since we updated it on metadata.
+ mdev->bc->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
up(&mdev->md_io_mutex);
dec_local(mdev);
}
-/*
- * return:
- * = 0 if we could read valid gen counts,
- * and reading the bitmap and act log does make sense.
- * != 0 if we had an error
- * MDIOError no meta data IO allowed
- * MDIOError IO not possible
- * MDInvalid no correct magic present
- */
-int drbd_md_read(drbd_dev *mdev)
+/**
+ * drbd_md_read:
+ * @bdev: describes the backing storage and the meta-data storage
+ * Reads the meta data from bdev. Return 0 (NoError) on success, and an
+ * enum ret_codes in case something goes wrong.
+ * Currently only: MDIOError, MDInvalid.
+ */
+int drbd_md_read(drbd_dev *mdev, struct drbd_backing_dev *bdev)
{
struct meta_data_on_disk * buffer;
int i,rv = NoError;
- if(!inc_local_md_only(mdev)) return MDIOError;
+ if(!inc_md_only(mdev,Attaching)) return MDIOError;
down(&mdev->md_io_mutex);
buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
- if ( ! drbd_md_sync_page_io(mdev,mdev->md.md_offset,READ) ) {
+ if ( ! drbd_md_sync_page_io(mdev,bdev,bdev->md.md_offset,READ) ) {
+ ERR("Error while reading metadata.\n");
rv = MDIOError;
goto err;
}
if(be32_to_cpu(buffer->magic) != DRBD_MD_MAGIC) {
+ ERR("Error while reading metadata, magic not found.\n");
rv = MDInvalid;
goto err;
}
- if (be32_to_cpu(buffer->al_offset) != mdev->md.al_offset) {
+ if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) {
ERR("unexpected al_offset: %d (expected %d)\n",
- be32_to_cpu(buffer->al_offset), mdev->md.al_offset);
+ be32_to_cpu(buffer->al_offset), bdev->md.al_offset);
rv = MDInvalid;
goto err;
}
- if (be32_to_cpu(buffer->bm_offset) != mdev->md.bm_offset) {
+ if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) {
ERR("unexpected bm_offset: %d (expected %d)\n",
- be32_to_cpu(buffer->bm_offset), mdev->md.bm_offset);
+ be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset);
rv = MDInvalid;
goto err;
}
- if (be32_to_cpu(buffer->md_size_sect) != mdev->md.md_size_sect) {
+ if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) {
ERR("unexpected md_size: %u (expected %u)\n",
- be32_to_cpu(buffer->md_size_sect), mdev->md.md_size_sect);
+ be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect);
rv = MDInvalid;
goto err;
}
- mdev->md.la_size_sect = be64_to_cpu(buffer->la_size);
+ bdev->md.la_size_sect = be64_to_cpu(buffer->la_size);
for (i = Current; i < UUID_SIZE; i++)
- mdev->md.uuid[i]=be64_to_cpu(buffer->uuid[i]);
- mdev->md.flags = be32_to_cpu(buffer->flags);
+ bdev->md.uuid[i]=be64_to_cpu(buffer->uuid[i]);
+ bdev->md.flags = be32_to_cpu(buffer->flags);
mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents);
if (mdev->sync_conf.al_extents < 7)
@@ -2493,61 +2486,59 @@
return rv;
}
-
-
static void drbd_uuid_move_history(drbd_dev *mdev)
{
int i;
for ( i=History_start ; i<History_end ; i++ ) {
- mdev->md.uuid[i+1] = mdev->md.uuid[i];
+ mdev->bc->md.uuid[i+1] = mdev->bc->md.uuid[i];
}
}
void _drbd_uuid_set(drbd_dev *mdev, int idx, u64 val)
{
if (mdev->state.role == Primary) {
- mdev->md.uuid[idx] = val | 1;
+ mdev->bc->md.uuid[idx] = val | 1;
} else {
- mdev->md.uuid[idx] = val & ~((u64)1);
+ mdev->bc->md.uuid[idx] = val & ~((u64)1);
}
}
void drbd_uuid_set(drbd_dev *mdev, int idx, u64 val)
{
- if(mdev->md.uuid[idx]) {
+ if(mdev->bc->md.uuid[idx]) {
drbd_uuid_move_history(mdev);
- mdev->md.uuid[History_start]=mdev->md.uuid[idx];
+ mdev->bc->md.uuid[History_start]=mdev->bc->md.uuid[idx];
}
_drbd_uuid_set(mdev,idx,val);
}
void drbd_uuid_new_current(drbd_dev *mdev)
{
- D_ASSERT(mdev->md.uuid[Bitmap] == 0);
- mdev->md.uuid[Bitmap] = mdev->md.uuid[Current];
- get_random_bytes(&mdev->md.uuid[Current], sizeof(u64));
+ D_ASSERT(mdev->bc->md.uuid[Bitmap] == 0);
+ mdev->bc->md.uuid[Bitmap] = mdev->bc->md.uuid[Current];
+ get_random_bytes(&mdev->bc->md.uuid[Current], sizeof(u64));
if (mdev->state.role == Primary) {
- mdev->md.uuid[Current] |= 1;
+ mdev->bc->md.uuid[Current] |= 1;
} else {
- mdev->md.uuid[Current] &= ~((u64)1);
+ mdev->bc->md.uuid[Current] &= ~((u64)1);
}
}
void drbd_uuid_set_bm(drbd_dev *mdev, u64 val)
{
- if( mdev->md.uuid[Bitmap]==0 && val==0 ) return;
+ if( mdev->bc->md.uuid[Bitmap]==0 && val==0 ) return;
if(val==0) {
drbd_uuid_move_history(mdev);
- mdev->md.uuid[History_start]=mdev->md.uuid[Bitmap];
- mdev->md.uuid[Bitmap]=0;
+ mdev->bc->md.uuid[History_start]=mdev->bc->md.uuid[Bitmap];
+ mdev->bc->md.uuid[Bitmap]=0;
} else {
- if( mdev->md.uuid[Bitmap] ) WARN("bm UUID already set");
+ if( mdev->bc->md.uuid[Bitmap] ) WARN("bm UUID already set");
- mdev->md.uuid[Bitmap] = val;
- mdev->md.uuid[Bitmap] &= ~((u64)1);
+ mdev->bc->md.uuid[Bitmap] = val;
+ mdev->bc->md.uuid[Bitmap] &= ~((u64)1);
}
}
@@ -2555,23 +2546,23 @@
void drbd_md_set_flag(drbd_dev *mdev, int flag)
{
MUST_HOLD(mdev->req_lock);
- if ( (mdev->md.flags & flag) != flag) {
+ if ( (mdev->bc->md.flags & flag) != flag) {
set_bit(MD_DIRTY,&mdev->flags);
- mdev->md.flags |= flag;
+ mdev->bc->md.flags |= flag;
}
}
void drbd_md_clear_flag(drbd_dev *mdev, int flag)
{
MUST_HOLD(mdev->req_lock);
- if ( (mdev->md.flags & flag) != 0 ) {
+ if ( (mdev->bc->md.flags & flag) != 0 ) {
set_bit(MD_DIRTY,&mdev->flags);
- mdev->md.flags &= ~flag;
+ mdev->bc->md.flags &= ~flag;
}
}
int drbd_md_test_flag(drbd_dev *mdev, int flag)
{
MUST_HOLD(mdev->req_lock);
- return ((mdev->md.flags & flag) != 0);
+ return ((mdev->bc->md.flags & flag) != 0);
}
module_init(drbd_init)
Modified: trunk/drbd/drbd_receiver.c
===================================================================
--- trunk/drbd/drbd_receiver.c 2005-11-15 15:29:47 UTC (rev 2003)
+++ trunk/drbd/drbd_receiver.c 2005-11-15 15:38:40 UTC (rev 2004)
@@ -159,7 +159,7 @@
/* hm. pool was empty. try to allocate from kernel.
* don't wait, if none is available, though.
*/
- if ( atomic_read(&mdev->pp_in_use) < mdev->conf.max_buffers ) {
+ if ( atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers ) {
if( (page = alloc_page(GFP_TRY)) )
break;
}
@@ -240,7 +240,7 @@
bio = bio_alloc(GFP_KERNEL, div_ceil(data_size,PAGE_SIZE));
if (!bio) goto fail1;
- bio->bi_bdev = mdev->backing_bdev;
+ bio->bi_bdev = mdev->bc->backing_bdev;
bio->bi_sector = sector;
ds = data_size;
@@ -378,7 +378,7 @@
le = mdev->done_ee.next;
list_del(le);
e = list_entry(le, struct Tl_epoch_entry, w.list);
- if(mdev->conf.wire_protocol == DRBD_PROT_C ||
+ if(mdev->net_conf->wire_protocol == DRBD_PROT_C ||
is_syncer_block_id(e->block_id)) {
++n;
}
@@ -540,18 +540,24 @@
err = sock_create(AF_INET, SOCK_STREAM, 0, &sock);
if (err) {
ERR("sock_creat(..)=%d\n", err);
+ return NULL;
}
+
+ if(!inc_net(mdev)) return NULL;
+
sock->sk->sk_rcvtimeo =
- sock->sk->sk_sndtimeo = mdev->conf.try_connect_int*HZ;
+ sock->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ;
err = sock->ops->connect(sock,
- (struct sockaddr *) mdev->conf.other_addr,
- mdev->conf.other_addr_len, 0);
+ (struct sockaddr *)mdev->net_conf->other_addr,
+ mdev->net_conf->other_addr_len, 0);
if (err) {
sock_release(sock);
sock = NULL;
}
+
+ dec_net(mdev);
return sock;
}
@@ -566,18 +572,22 @@
// FIXME return NULL ?
}
+ if(!inc_net(mdev)) return NULL;
+
sock2->sk->sk_reuse = 1; /* SO_REUSEADDR */
sock2->sk->sk_rcvtimeo =
- sock2->sk->sk_sndtimeo = mdev->conf.try_connect_int*HZ;
+ sock2->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ;
err = sock2->ops->bind(sock2,
- (struct sockaddr *) mdev->conf.my_addr,
- mdev->conf.my_addr_len);
+ (struct sockaddr *) mdev->net_conf->my_addr,
+ mdev->net_conf->my_addr_len);
+ dec_net(mdev);
+
if (err) {
ERR("Unable to bind (%d)\n", err);
sock_release(sock2);
drbd_force_state(mdev,NS(conn,Unconnected));
- return 0;
+ return NULL;
}
sock = drbd_accept(mdev,sock2);
@@ -646,10 +656,10 @@
sock->sk->sk_priority=TC_PRIO_BULK;
tcp_sk(sock->sk)->nonagle = 0;
// FIXME fold to limits. should be done in drbd_ioctl
- sock->sk->sk_sndbuf = mdev->conf.sndbuf_size;
- sock->sk->sk_rcvbuf = mdev->conf.sndbuf_size;
+ sock->sk->sk_sndbuf = mdev->net_conf->sndbuf_size;
+ sock->sk->sk_rcvbuf = mdev->net_conf->sndbuf_size;
/* NOT YET ...
- * sock->sk->sk_sndtimeo = mdev->conf.timeout*HZ/20;
+ * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/20;
* sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
* first set it to the HandShake timeout, wich is hardcoded for now: */
sock->sk->sk_sndtimeo =
@@ -659,8 +669,8 @@
msock->sk->sk_priority=TC_PRIO_INTERACTIVE;
tcp_sk(sock->sk)->nonagle = 1;
msock->sk->sk_sndbuf = 2*32767;
- msock->sk->sk_sndtimeo = mdev->conf.timeout*HZ/20;
- msock->sk->sk_rcvtimeo = mdev->conf.ping_int*HZ;
+ msock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/20;
+ msock->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ;
mdev->data.socket = sock;
mdev->meta.socket = msock;
@@ -680,7 +690,7 @@
}
}
- sock->sk->sk_sndtimeo = mdev->conf.timeout*HZ/20;
+ sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/20;
sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
atomic_set(&mdev->packet_seq,0);
@@ -767,7 +777,7 @@
inc_unacked(mdev);
- if (mdev->conf.wire_protocol != DRBD_PROT_C)
+ if (mdev->net_conf->wire_protocol != DRBD_PROT_C)
drbd_kick_lo(mdev);
@@ -822,7 +832,7 @@
* XXX maybe: make that arbitrary number configurable.
* for now, I choose 1/16 of max-epoch-size.
*/
- if (atomic_read(&mdev->local_cnt) >= (mdev->conf.max_epoch_size>>4) ) {
+ if (atomic_read(&mdev->local_cnt) >= (mdev->net_conf->max_epoch_size>>4) ) {
drbd_kick_lo(mdev);
}
mdev->writ_cnt+=data_size>>9;
@@ -1008,7 +1018,7 @@
// unsigned int epoch_size;
int ok=1;
- if(mdev->conf.wire_protocol == DRBD_PROT_C) {
+ if(mdev->net_conf->wire_protocol == DRBD_PROT_C) {
if(likely(drbd_bio_uptodate(e->private_bio))) {
if(e->barrier_nr) {
# warning "epoch_size no more atomic_t"
@@ -1060,7 +1070,7 @@
// unsigned int epoch_size;
int ok=1;
- if(mdev->conf.wire_protocol == DRBD_PROT_C) {
+ if(mdev->net_conf->wire_protocol == DRBD_PROT_C) {
if(likely(drbd_bio_uptodate(e->private_bio))) {
ok &= drbd_send_ack(mdev,WriteAck,e);
/* FIXME
@@ -1156,7 +1166,7 @@
traveling on msock
PRE TODO: Wrap around of seq_num !!!
*/
- if (mdev->conf.two_primaries) {
+ if (mdev->net_conf->two_primaries) {
packet_seq = be32_to_cpu(p->seq_num);
/* if( packet_seq > peer_seq(mdev)+1 ) {
WARN(" will wait till (packet_seq) %d <= %d\n",
@@ -1291,7 +1301,7 @@
(void)drbd_send_b_ack(mdev, cpu_to_be32(barrier_nr), epoch_size);
}
- switch(mdev->conf.wire_protocol) {
+ switch(mdev->net_conf->wire_protocol) {
case DRBD_PROT_C:
inc_unacked(mdev);
break;
@@ -1397,7 +1407,7 @@
mdev->read_cnt += size >> 9;
inc_unacked(mdev);
drbd_generic_make_request(READ,e->private_bio);
- if (atomic_read(&mdev->local_cnt) >= (mdev->conf.max_epoch_size>>4) ) {
+ if (atomic_read(&mdev->local_cnt) >= (mdev->net_conf->max_epoch_size>>4) ) {
drbd_kick_lo(mdev);
}
@@ -1410,10 +1420,10 @@
int self, peer, rv=-100;
unsigned long ch_self, ch_peer;
- self = mdev->md.uuid[Bitmap] & 1;
+ self = mdev->bc->md.uuid[Bitmap] & 1;
peer = mdev->p_uuid[Bitmap] & 1;
- switch ( mdev->conf.after_sb_0p ) {
+ switch ( mdev->net_conf->after_sb_0p ) {
case Consensus:
case DiscardSecondary:
case PanicPrimary:
@@ -1439,10 +1449,10 @@
else /* ( ch_self == ch_peer ) */ {
// Well, then use the order of the IP addresses...
ch_self = (unsigned long)
- (((struct sockaddr_in *)mdev->conf.my_addr)
+ (((struct sockaddr_in *)mdev->net_conf->my_addr)
->sin_addr.s_addr);
ch_peer = (unsigned long)
- (((struct sockaddr_in *)mdev->conf.other_addr)
+ (((struct sockaddr_in *)mdev->net_conf->other_addr)
->sin_addr.s_addr);
if ( ch_self < ch_peer ) rv = -1;
else if ( ch_self > ch_peer ) rv = 1;
@@ -1463,10 +1473,10 @@
{
int self, peer, hg, rv=-100;
- self = mdev->md.uuid[Bitmap] & 1;
+ self = mdev->bc->md.uuid[Bitmap] & 1;
peer = mdev->p_uuid[Bitmap] & 1;
- switch ( mdev->conf.after_sb_1p ) {
+ switch ( mdev->net_conf->after_sb_1p ) {
case DiscardYoungerPri:
case DiscardOlderPri:
case DiscardLeastChg:
@@ -1505,10 +1515,10 @@
{
int self, peer, hg, rv=-100;
- self = mdev->md.uuid[Bitmap] & 1;
+ self = mdev->bc->md.uuid[Bitmap] & 1;
peer = mdev->p_uuid[Bitmap] & 1;
- switch ( mdev->conf.after_sb_2p ) {
+ switch ( mdev->net_conf->after_sb_2p ) {
case DiscardYoungerPri:
case DiscardOlderPri:
case DiscardLeastChg:
@@ -1553,7 +1563,7 @@
u64 self, peer;
int i,j;
- self = mdev->md.uuid[Current] & ~((u64)1);
+ self = mdev->bc->md.uuid[Current] & ~((u64)1);
peer = mdev->p_uuid[Current] & ~((u64)1);
if (self == UUID_JUST_CREATED &&
@@ -1575,17 +1585,17 @@
if (self == peer) return -2;
}
- self = mdev->md.uuid[Bitmap] & ~((u64)1);
+ self = mdev->bc->md.uuid[Bitmap] & ~((u64)1);
peer = mdev->p_uuid[Current] & ~((u64)1);
if (self == peer) return 1;
for ( i=History_start ; i<=History_end ; i++ ) {
- self = mdev->md.uuid[i] & ~((u64)1);
+ self = mdev->bc->md.uuid[i] & ~((u64)1);
if (self == peer) return 2;
}
- self = mdev->md.uuid[Bitmap] & ~((u64)1);
+ self = mdev->bc->md.uuid[Bitmap] & ~((u64)1);
peer = mdev->p_uuid[Bitmap] & ~((u64)1);
if (self == peer) return 100;
@@ -1620,10 +1630,10 @@
hg = drbd_asb_recover_1p(mdev);
}
if ( hg == -100 ) {
- if(mdev->conf.want_lose && !mdev->p_uuid[UUID_FLAGS]){
+ if(mdev->net_conf->want_lose && !mdev->p_uuid[UUID_FLAGS]){
hg = -1;
}
- if(!mdev->conf.want_lose && mdev->p_uuid[UUID_FLAGS]){
+ if(!mdev->net_conf->want_lose && mdev->p_uuid[UUID_FLAGS]){
hg = 1;
}
} else {
@@ -1703,17 +1713,17 @@
if (drbd_recv(mdev, h->payload, h->length) != h->length)
return FALSE;
- if(be32_to_cpu(p->protocol)!=mdev->conf.wire_protocol) {
+ if(be32_to_cpu(p->protocol)!=mdev->net_conf->wire_protocol) {
int peer_proto = be32_to_cpu(p->protocol);
if (DRBD_PROT_A <= peer_proto && peer_proto <= DRBD_PROT_C) {
ERR("incompatible communication protocols: "
"me %c, peer %c\n",
- 'A'-1+mdev->conf.wire_protocol,
+ 'A'-1+mdev->net_conf->wire_protocol,
'A'-1+peer_proto);
} else {
ERR("incompatible communication protocols: "
"me %c, peer [%d]\n",
- 'A'-1+mdev->conf.wire_protocol,
+ 'A'-1+mdev->net_conf->wire_protocol,
peer_proto);
}
drbd_force_state(mdev,NS(conn,StandAlone));
@@ -1806,33 +1816,36 @@
return FALSE;
}
- warn_if_differ_considerably(mdev, "lower level device sizes",
- p_size, drbd_get_capacity(mdev->backing_bdev));
- warn_if_differ_considerably(mdev, "user requested size",
- p_usize, mdev->lo_usize);
+#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
+ if(inc_local(mdev)) {
+ warn_if_differ_considerably(mdev, "lower level device sizes",
+ p_size, drbd_get_capacity(mdev->bc->backing_bdev));
+ warn_if_differ_considerably(mdev, "user requested size",
+ p_usize, mdev->bc->u_size);
+ if (mdev->state.conn == WFReportParams) {
+ /* this is first connect, or an otherwise expected
+ param exchange. choose the minimum */
+ p_usize = min_not_zero(mdev->bc->u_size, p_usize);
+ }
+
+ if( mdev->bc->u_size != p_usize ) {
+ mdev->bc->u_size = p_usize;
+ INFO("Peer sets u_size to %lu KB\n",
+ (unsigned long)mdev->bc->u_size);
+ }
+ dec_local(mdev);
+ }
+#undef min_not_zero
+
+ mdev->p_size=p_size;
drbd_bm_lock(mdev); // {
- mdev->p_size=p_size;
/*
* you may get a flip-flop connection established/connection loss, in
* case both really have different usize uppon first connect!
* try to solve it thus:
***/
-#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
- if (mdev->state.conn == WFReportParams) {
- /* this is first connect, or an otherwise expected param
- * exchange. choose the minimum */
- p_usize = min_not_zero(mdev->lo_usize, p_usize);
- } else {
- /* this was an "unexpected" param packet,
- * just do what the peer suggests */
- }
-#undef min_not_zero
- if( mdev->lo_usize != p_usize ) {
- mdev->lo_usize = p_usize;
- INFO("Peer sets u_size to %lu KB\n",
- (unsigned long)mdev->lo_usize);
- }
+
drbd_determin_dev_size(mdev);
drbd_bm_unlock(mdev); // }
@@ -1935,7 +1948,7 @@
return FALSE;
}
- mdev->conf.want_lose = 0;
+ mdev->net_conf->want_lose = 0;
/* FIXME assertion for (gencounts do not diverge) */
drbd_md_write(mdev); // update connected indicator, la_size, ...
@@ -2020,8 +2033,6 @@
*/
D_ASSERT(mdev->state.disk >= Inconsistent);
D_ASSERT(mdev->state.pdsk >= Inconsistent);
-// EXPLAIN:
- clear_bit(MD_IO_ALLOWED,&mdev->flags);
ok=TRUE;
out:
@@ -2371,7 +2382,7 @@
if ( mdev->state.role == Primary ) {
if ( mdev->state.pdsk >= DUnknown &&
- mdev->md.uuid[Bitmap] == 0 ) {
+ mdev->bc->md.uuid[Bitmap] == 0 ) {
/* We only create a new UUID if the peer might
possibly be UpToDate. Since the connection is
already gone it is DUnknown by now.
@@ -2495,7 +2506,7 @@
char *right_response = NULL;
char *peers_ch = NULL;
Drbd_Header p;
- unsigned int key_len = strlen(mdev->conf.shared_secret);
+ unsigned int key_len = strlen(mdev->net_conf->shared_secret);
unsigned int resp_size;
int rv;
@@ -2546,7 +2557,7 @@
sg.page = virt_to_page(peers_ch);
sg.offset = offset_in_page(peers_ch);
sg.length = p.length;
- crypto_hmac(mdev->cram_hmac_tfm, (u8*)mdev->conf.shared_secret,
+ crypto_hmac(mdev->cram_hmac_tfm, (u8*)mdev->net_conf->shared_secret,
&key_len, &sg, 1, response);
rv = drbd_send_cmd2(mdev,AuthResponse,response,resp_size);
@@ -2586,14 +2597,14 @@
sg.page = virt_to_page(my_challenge);
sg.offset = offset_in_page(my_challenge);
sg.length = CHALLENGE_LEN;
- crypto_hmac(mdev->cram_hmac_tfm, (u8*)mdev->conf.shared_secret,
+ crypto_hmac(mdev->cram_hmac_tfm, (u8*)mdev->net_conf->shared_secret,
&key_len, &sg, 1, right_response);
rv = ! memcmp(response,right_response,resp_size);
if(rv) {
INFO("Peer authenticated usind %d bytes of '%s' HMAC\n",
- resp_size,mdev->conf.cram_hmac_alg);
+ resp_size,mdev->net_conf->cram_hmac_alg);
}
fail:
@@ -2607,6 +2618,7 @@
int drbdd_init(struct Drbd_thread *thi)
{
+ enum disconnect_handler on_disconnect = Reconnect;
drbd_dev *mdev = thi->mdev;
int minor = (int)(mdev-drbd_conf);
@@ -2626,10 +2638,14 @@
break;
}
if (get_t_state(thi) == Exiting) break;
- drbdd(mdev);
+ if(inc_net(mdev)) {
+ drbdd(mdev);
+ on_disconnect = mdev->net_conf->on_disconnect;
+ dec_net(mdev);
+ }
drbd_disconnect(mdev);
if (get_t_state(thi) == Exiting) break;
- if(mdev->conf.on_disconnect == DropNetConf) {
+ if(on_disconnect == DropNetConf) {
drbd_force_state(mdev,NS(conn,StandAlone));
break;
}
@@ -2660,7 +2676,7 @@
STATIC int got_PingAck(drbd_dev *mdev, Drbd_Header* h)
{
// restore idle timeout
- mdev->meta.socket->sk->sk_rcvtimeo = mdev->conf.ping_int*HZ;
+ mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ;
return TRUE;
}
@@ -2698,7 +2714,7 @@
drbd_end_req(req, RQ_DRBD_SENT, 1, sector);
if (test_bit(SYNC_STARTED,&mdev->flags) &&
- mdev->conf.wire_protocol == DRBD_PROT_C)
+ mdev->net_conf->wire_protocol == DRBD_PROT_C)
drbd_set_in_sync(mdev,sector,blksize);
}
}
@@ -2706,7 +2722,7 @@
if(is_syncer_block_id(p->block_id)) {
dec_rs_pending(mdev);
} else {
- D_ASSERT(mdev->conf.wire_protocol != DRBD_PROT_A);
+ D_ASSERT(mdev->net_conf->wire_protocol != DRBD_PROT_A);
dec_ap_pending(mdev);
}
return TRUE;
@@ -2849,7 +2865,7 @@
// half ack timeout only,
// since sendmsg waited the other half already
mdev->meta.socket->sk->sk_rcvtimeo =
- mdev->conf.timeout*HZ/20;
+ mdev->net_conf->timeout*HZ/20;
}
if (!drbd_process_done_ee(mdev)) goto err;
@@ -2878,7 +2894,7 @@
goto err;
} else if (rv == -EAGAIN) {
if( mdev->meta.socket->sk->sk_rcvtimeo ==
- mdev->conf.timeout*HZ/20) {
+ mdev->net_conf->timeout*HZ/20) {
ERR("PingAck did not arrive in time.\n");
goto err;
}
Modified: trunk/drbd/drbd_req.c
===================================================================
--- trunk/drbd/drbd_req.c 2005-11-15 15:29:47 UTC (rev 2003)
+++ trunk/drbd/drbd_req.c 2005-11-15 15:38:40 UTC (rev 2004)
@@ -85,7 +85,7 @@
}
uptodate = req->rq_status & 0x0001;
- if( !uptodate && mdev->on_io_error == Detach) {
+ if( !uptodate && mdev->bc->on_io_error == Detach) {
drbd_set_out_of_sync(mdev,rsector, drbd_req_get_size(req));
// It should also be as out of sync on
// the other side! See w_io_error()
@@ -207,7 +207,7 @@
req->master_bio = bio_src;
req->private_bio = bio;
- bio->bi_bdev = mdev->backing_bdev;
+ bio->bi_bdev = mdev->bc->backing_bdev;
bio->bi_private = req;
bio->bi_end_io =
bio_data_dir(bio) == WRITE
@@ -342,7 +342,7 @@
drbd_end_req(req, RQ_DRBD_DONE, 1, sector);
break;
default: /* block was sent */
- if(mdev->conf.wire_protocol == DRBD_PROT_A) {
+ if(mdev->net_conf->wire_protocol == DRBD_PROT_A) { // PRE LOCKING
dec_ap_pending(mdev);
drbd_end_req(req, RQ_DRBD_SENT, 1, sector);
}
Modified: trunk/drbd/drbd_strings.c
===================================================================
--- trunk/drbd/drbd_strings.c 2005-11-15 15:29:47 UTC (rev 2003)
+++ trunk/drbd/drbd_strings.c 2005-11-15 15:38:40 UTC (rev 2004)
@@ -52,6 +52,7 @@
[DUnknown] = "DUnknown",
[Diskless] = "Diskless",
[Failed] = "Failed",
+ [Attaching] = "Attaching",
[Inconsistent] = "Inconsistent",
[Outdated] = "Outdated",
[Consistent] = "Consistent",
Modified: trunk/drbd/drbd_worker.c
===================================================================
--- trunk/drbd/drbd_worker.c 2005-11-15 15:29:47 UTC (rev 2003)
+++ trunk/drbd/drbd_worker.c 2005-11-15 15:38:40 UTC (rev 2004)
@@ -172,7 +172,7 @@
if (bio_rw(bio) == READA) goto pass_on;
if (error) {
drbd_chk_io_error(mdev,error); // handle panic and detach.
- if(mdev->on_io_error == PassOn) goto pass_on;
+ if(mdev->bc->on_io_error == PassOn) goto pass_on;
// ok, if we survived this, retry:
// FIXME sector ...
if (DRBD_ratelimit(5*HZ,5))
@@ -203,7 +203,7 @@
* a "we are diskless" param packet anyways, and the peer
* will then set the FullSync bit in the meta data ...
*/
- D_ASSERT(mdev->on_io_error != PassOn);
+ D_ASSERT(mdev->bc->on_io_error != PassOn);
drbd_req_free(req);
Modified: trunk/drbd/linux/drbd.h
===================================================================
--- trunk/drbd/linux/drbd.h 2005-11-15 15:29:47 UTC (rev 2003)
+++ trunk/drbd/linux/drbd.h 2005-11-15 15:38:40 UTC (rev 2004)
@@ -154,6 +154,8 @@
CRAMAlgNotDigest,
KMallocFailed,
DiscardNotAllowed,
+ StateNotAllowed,
+ GotSignal, // EINTR
};
struct ioctl_disk_config {
@@ -227,6 +229,7 @@
typedef enum {
Diskless,
Failed, /* Becomes Diskless as soon as we told it the peer */
+ Attaching, /* In the process of reading the meta-data */
Inconsistent,
Outdated,
DUnknown,
More information about the drbd-cvs
mailing list