[DRBD-cvs] r1914 - branches/drbd-0.7/drbd
drbd-cvs at lists.linbit.com
drbd-cvs at lists.linbit.com
Wed Aug 10 14:46:44 CEST 2005
Author: lars
Date: 2005-08-10 14:46:43 +0200 (Wed, 10 Aug 2005)
New Revision: 1914
Modified:
branches/drbd-0.7/drbd/drbd_bitmap.c
branches/drbd-0.7/drbd/drbd_fs.c
branches/drbd-0.7/drbd/drbd_main.c
branches/drbd-0.7/drbd/drbd_receiver.c
Log:
* (try to) handle bitmap allocation failure more gracefully
* reduce a race between ioctl initiated invalidate
and receiver thread initiated become-sync-source
Modified: branches/drbd-0.7/drbd/drbd_bitmap.c
===================================================================
--- branches/drbd-0.7/drbd/drbd_bitmap.c 2005-08-05 13:21:30 UTC (rev 1913)
+++ branches/drbd-0.7/drbd/drbd_bitmap.c 2005-08-10 12:46:43 UTC (rev 1914)
@@ -361,7 +361,7 @@
unsigned long bits, bytes, words, *nbm, *obm = 0;
int err = 0, growing;
- D_BUG_ON(!b);
+ ERR_IF(!b) return -ENOMEM;
MUST_BE_LOCKED();
ERR_IF (down_trylock(&b->bm_change)) {
@@ -462,7 +462,7 @@
unsigned long s;
unsigned long flags;
- D_BUG_ON(!(b && b->bm));
+ ERR_IF(!b) return 0;
// MUST_BE_LOCKED(); well. yes. but ...
spin_lock_irqsave(&b->bm_lock,flags);
@@ -475,7 +475,7 @@
size_t drbd_bm_words(drbd_dev *mdev)
{
struct drbd_bitmap *b = mdev->bitmap;
- D_BUG_ON(!(b && b->bm));
+ ERR_IF(!b) return 0;
/* FIXME
* actually yes. really. otherwise it could just change its size ...
@@ -497,7 +497,8 @@
unsigned long word, bits;
size_t n = number;
- D_BUG_ON(!(b && b->bm));
+ ERR_IF(!b) return;
+ ERR_IF(!b->bm) return;
D_BUG_ON(offset >= b->bm_words);
D_BUG_ON(offset+number > b->bm_words);
D_BUG_ON(number > PAGE_SIZE/sizeof(long));
@@ -536,7 +537,8 @@
unsigned long word, bits;
size_t n = number;
- D_BUG_ON(!(b && b->bm));
+ ERR_IF(!b) return;
+ ERR_IF(!b->bm) return;
D_BUG_ON(offset >= b->bm_words);
D_BUG_ON(offset+number > b->bm_words);
D_BUG_ON(number > PAGE_SIZE/sizeof(long));
@@ -573,7 +575,8 @@
struct drbd_bitmap *b = mdev->bitmap;
unsigned long *bm;
- D_BUG_ON(!(b && b->bm));
+ ERR_IF(!b) return;
+ ERR_IF(!b->bm) return;
if ( (offset >= b->bm_words) ||
(offset+number > b->bm_words) ||
(number > PAGE_SIZE/sizeof(long)) ||
@@ -599,7 +602,8 @@
void drbd_bm_set_all(drbd_dev *mdev)
{
struct drbd_bitmap *b = mdev->bitmap;
- D_BUG_ON(!(b && b->bm));
+ ERR_IF(!b) return;
+ ERR_IF(!b->bm) return;
MUST_BE_LOCKED();
@@ -743,7 +747,8 @@
void drbd_bm_clear_all(drbd_dev *mdev)
{
struct drbd_bitmap *b = mdev->bitmap;
- D_BUG_ON(!(b && b->bm));
+ ERR_IF(!b) return;
+ ERR_IF(!b->bm) return;
MUST_BE_LOCKED(); \
@@ -757,7 +762,7 @@
void drbd_bm_reset_find(drbd_dev *mdev)
{
struct drbd_bitmap *b = mdev->bitmap;
- D_BUG_ON(!(b && b->bm));
+ ERR_IF(!b) return;
MUST_BE_LOCKED();
@@ -777,7 +782,8 @@
{
struct drbd_bitmap *b = mdev->bitmap;
unsigned long i = -1UL;
- D_BUG_ON(!(b && b->bm));
+ ERR_IF(!b) return i;
+ ERR_IF(!b->bm) return i;
spin_lock_irq(&b->bm_lock);
BM_PARANOIA_CHECK();
@@ -810,7 +816,8 @@
{
struct drbd_bitmap *b = mdev->bitmap;
int i;
- D_BUG_ON(!(b && b->bm));
+ ERR_IF(!b) return 1;
+ ERR_IF(!b->bm) return 1;
/*
* only called from drbd_set_out_of_sync.
@@ -844,7 +851,8 @@
{
struct drbd_bitmap *b = mdev->bitmap;
int i;
- D_BUG_ON(!(b && b->bm));
+ ERR_IF(!b) return 0;
+ ERR_IF(!b->bm) return 0;
spin_lock_irq(&b->bm_lock);
BM_PARANOIA_CHECK();
@@ -877,7 +885,8 @@
{
struct drbd_bitmap *b = mdev->bitmap;
int i;
- D_BUG_ON(!(b && b->bm));
+ ERR_IF(!b) return 0;
+ ERR_IF(!b->bm) return 0;
spin_lock_irq(&b->bm_lock);
BM_PARANOIA_CHECK();
@@ -911,7 +920,8 @@
int count, s, e;
unsigned long flags;
- D_BUG_ON(!(b && b->bm));
+ ERR_IF(!b) return 0;
+ ERR_IF(!b->bm) return 0;
spin_lock_irqsave(&b->bm_lock,flags);
BM_PARANOIA_CHECK();
@@ -938,7 +948,8 @@
struct drbd_bitmap *b = mdev->bitmap;
unsigned long weight;
int count, s, e;
- D_BUG_ON(!(b && b->bm));
+ ERR_IF(!b) return 0;
+ ERR_IF(!b->bm) return 0;
MUST_BE_LOCKED();
Modified: branches/drbd-0.7/drbd/drbd_fs.c
===================================================================
--- branches/drbd-0.7/drbd/drbd_fs.c 2005-08-05 13:21:30 UTC (rev 1913)
+++ branches/drbd-0.7/drbd/drbd_fs.c 2005-08-10 12:46:43 UTC (rev 1914)
@@ -81,6 +81,7 @@
la_size = mdev->la_size;
rv = do_determin_dev_size(mdev);
+ if (rv < 0) goto out;
la_size_changed = (la_size != mdev->la_size);
md_moved = pmdss != drbd_md_ss(mdev) /* && mdev->md_index == -1 */;
@@ -96,6 +97,7 @@
// Write mdev->la_size to [possibly new position on] disk.
drbd_md_write(mdev);
}
+ out:
lc_unlock(mdev->act_log);
return rv;
@@ -116,7 +118,9 @@
}
-/* Returns 1 if there is a disk-less node, 0 if both nodes have a disk. */
+/* Returns 1 if there is a disk-less node, 0 if both nodes have a disk.
+ * -ENOMEM if we could not allocate the bitmap
+ */
/*
* THINK do we want the size to be KB or sectors ?
* note, *_capacity operates in 512 byte sectors!!
@@ -182,7 +186,7 @@
/* currently there is only one error: ENOMEM! */
size = drbd_bm_capacity(mdev)>>1;
if (size == 0) {
- ERR("Could not allocate bitmap! Set device size => 0\n");
+ ERR("OUT OF MEMORY! Could not allocate bitmap! Set device size => 0\n");
} else {
/* FIXME this is problematic,
* if we in fact are smaller now! */
@@ -190,6 +194,7 @@
"Leaving size unchanged at size = %lu KB\n",
(unsigned long)size);
}
+ rv = err;
}
// racy, see comments above.
drbd_set_my_capacity(mdev,size<<1);
@@ -252,6 +257,8 @@
return 0;
}
+STATIC int drbd_detach_ioctl(drbd_dev *mdev);
+
STATIC
int drbd_ioctl_set_disk(struct Drbd_Conf *mdev,
struct ioctl_disk_config * arg)
@@ -305,6 +312,9 @@
__module_get(THIS_MODULE);
mput = 1;
} else {
+ /* We currently cannot handle reattach while connected */
+ return -EBUSY;
+
/* FIXME allow reattach while connected,
* and allow it in Primary/Diskless state...
* currently there are strange races leading to a distributed
@@ -475,12 +485,23 @@
/* FIXME if (md_gc_valid < 0) META DATA IO NOT POSSIBLE! */
drbd_bm_lock(mdev); // racy...
- drbd_determin_dev_size(mdev);
- /* FIXME
- * what if we now have la_size == 0 ?? eh?
- * BOOM?
- */
+ if (drbd_determin_dev_size(mdev) < 0) {
+ /* could not allocate bitmap.
+ * try to undo ... */
+ D_ASSERT(mdev->cstate == Unconfigured);
+ D_ASSERT(mput == 1);
+ drbd_bm_unlock(mdev);
+
+ /* from drbd_detach_ioctl */
+ drbd_free_ll_dev(mdev);
+
+ set_cstate(mdev,Unconfigured);
+ drbd_mdev_cleanup(mdev);
+ module_put(THIS_MODULE);
+ return -ENOMEM;
+ }
+
if (md_gc_valid <= 0) {
INFO("Assuming that all blocks are out of sync (aka FullSync)\n");
drbd_bm_set_all(mdev);
@@ -494,7 +515,6 @@
i = drbd_check_al_size(mdev);
if (i) {
-// FATAL!
/* FIXME see the comment above.
* if this fails I need to undo all changes,
* go back into Unconfigured,
@@ -502,7 +522,6 @@
*/
// return i;
drbd_panic("Cannot allocate act_log\n");
- drbd_suicide();
}
if (md_gc_valid > 0) {
@@ -1277,6 +1296,10 @@
break;
}
+ drbd_md_set_flag(mdev,MDF_FullSync);
+ drbd_md_clear_flag(mdev,MDF_Consistent);
+ drbd_md_write(mdev);
+
if (mdev->cstate == Connected) {
/* avoid races with set_in_sync
* for successfull mirrored writes
@@ -1288,10 +1311,6 @@
drbd_bm_lock(mdev); // racy...
- drbd_md_set_flag(mdev,MDF_FullSync);
- drbd_md_clear_flag(mdev,MDF_Consistent);
- drbd_md_write(mdev);
-
drbd_bm_set_all(mdev);
drbd_bm_write(mdev);
Modified: branches/drbd-0.7/drbd/drbd_main.c
===================================================================
--- branches/drbd-0.7/drbd/drbd_main.c 2005-08-05 13:21:30 UTC (rev 1913)
+++ branches/drbd-0.7/drbd/drbd_main.c 2005-08-10 12:46:43 UTC (rev 1914)
@@ -1472,6 +1472,7 @@
mdev->rs_mark_time = 0;
mdev->send_task = NULL;
drbd_set_my_capacity(mdev,0);
+ drbd_bm_resize(mdev,0);
// just in case
drbd_free_resources(mdev);
Modified: branches/drbd-0.7/drbd/drbd_receiver.c
===================================================================
--- branches/drbd-0.7/drbd/drbd_receiver.c 2005-08-05 13:21:30 UTC (rev 1913)
+++ branches/drbd-0.7/drbd/drbd_receiver.c 2005-08-10 12:46:43 UTC (rev 1914)
@@ -1690,31 +1690,36 @@
STATIC int receive_BecomeSyncTarget(drbd_dev *mdev, Drbd_Header *h)
{
ERR_IF(!mdev->bitmap) return FALSE;
-
- /* THINK
- * otherwise this does not make much sense, no?
- * and some other assertion maybe about cstate...
- */
- ERR_IF(mdev->state != Secondary || mdev->cstate != Connected)
+ ERR_IF(mdev->state != Secondary)
return FALSE;
+ ERR_IF(mdev->cstate != Connected)
+ return FALSE;
+ ERR_IF(test_bit(DISKLESS,&mdev->flags))
+ return FALSE;
drbd_bm_lock(mdev);
drbd_bm_set_all(mdev);
drbd_bm_write(mdev);
drbd_start_resync(mdev,SyncTarget);
drbd_bm_unlock(mdev);
- return TRUE; // cannot fail ?
+ return TRUE;
}
STATIC int receive_BecomeSyncSource(drbd_dev *mdev, Drbd_Header *h)
{
- // FIXME asserts ?
+ ERR_IF(mdev->cstate != Connected)
+ return FALSE;
+ ERR_IF(test_bit(DISKLESS,&mdev->flags))
+ return FALSE;
+ ERR_IF(!drbd_md_test_flag(mdev,MDF_Consistent))
+ return FALSE;
+
drbd_bm_lock(mdev);
drbd_bm_set_all(mdev);
drbd_bm_write(mdev);
drbd_start_resync(mdev,SyncSource);
drbd_bm_unlock(mdev);
- return TRUE; // cannot fail ?
+ return TRUE;
}
STATIC int receive_UnplugRemote(drbd_dev *mdev, Drbd_Header *h)
@@ -2144,25 +2149,40 @@
STATIC int got_NegDReply(drbd_dev *mdev, Drbd_Header* h)
{
- drbd_request_t *req;
+ /* drbd_request_t *req;
+ * unused now */
Drbd_BlockAck_Packet *p = (Drbd_BlockAck_Packet*)h;
+ if (is_syncer_blk(mdev,p->block_id)) {
+ /* no resync data available. don't panic just yet ... */
+ printk(KERN_EMERG DEVICE_NAME "%d: "
+ "Got NegDReply for resync request. "
+ "WE ARE LOST. We lost our up-to-date disk.\n",
+ (int)(mdev-drbd_conf));
+ return FALSE;
+ } /* else { */
+
+#if 0
+ /* hey, we panic anyways. so why bother? */
req = (drbd_request_t *)(long)p->block_id;
- D_ASSERT(req->w.cb == w_is_app_read);
+ if (VALID_POINTER(req)) {
+ D_ASSERT(req->w.cb == w_is_app_read);
- spin_lock(&mdev->pr_lock);
- list_del(&req->w.list);
- spin_unlock(&mdev->pr_lock);
+ spin_lock(&mdev->pr_lock);
+ list_del(&req->w.list);
+ spin_unlock(&mdev->pr_lock);
- INVALIDATE_MAGIC(req);
- mempool_free(req,drbd_request_mempool);
+ INVALIDATE_MAGIC(req);
+ mempool_free(req,drbd_request_mempool);
+ }
+#endif
drbd_panic("Got NegDReply. WE ARE LOST. We lost our up-to-date disk.\n");
// THINK do we have other options, but panic?
// what about bio_endio, in case we don't panic ??
- return TRUE;
+ return FALSE;
}
STATIC int got_NegRSDReply(drbd_dev *mdev, Drbd_Header* h)
More information about the drbd-cvs
mailing list