[DRBD-cvs] r1914 - branches/drbd-0.7/drbd

drbd-cvs at lists.linbit.com drbd-cvs at lists.linbit.com
Wed Aug 10 14:46:44 CEST 2005


Author: lars
Date: 2005-08-10 14:46:43 +0200 (Wed, 10 Aug 2005)
New Revision: 1914

Modified:
   branches/drbd-0.7/drbd/drbd_bitmap.c
   branches/drbd-0.7/drbd/drbd_fs.c
   branches/drbd-0.7/drbd/drbd_main.c
   branches/drbd-0.7/drbd/drbd_receiver.c
Log:
* (try to) handle bitmap allocation failure more gracefully
* reduce a race between ioctl initiated invalidate
  and receiver thread initiated become-sync-source


Modified: branches/drbd-0.7/drbd/drbd_bitmap.c
===================================================================
--- branches/drbd-0.7/drbd/drbd_bitmap.c	2005-08-05 13:21:30 UTC (rev 1913)
+++ branches/drbd-0.7/drbd/drbd_bitmap.c	2005-08-10 12:46:43 UTC (rev 1914)
@@ -361,7 +361,7 @@
 	unsigned long bits, bytes, words, *nbm, *obm = 0;
 	int err = 0, growing;
 
-	D_BUG_ON(!b);
+	ERR_IF(!b) return -ENOMEM;
 	MUST_BE_LOCKED();
 
 	ERR_IF (down_trylock(&b->bm_change)) {
@@ -462,7 +462,7 @@
 	unsigned long s;
 	unsigned long flags;
 
-	D_BUG_ON(!(b && b->bm));
+	ERR_IF(!b) return 0;
 	// MUST_BE_LOCKED(); well. yes. but ...
 
 	spin_lock_irqsave(&b->bm_lock,flags);
@@ -475,7 +475,7 @@
 size_t drbd_bm_words(drbd_dev *mdev)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	D_BUG_ON(!(b && b->bm));
+	ERR_IF(!b) return 0;
 
 	/* FIXME
 	 * actually yes. really. otherwise it could just change its size ...
@@ -497,7 +497,8 @@
 	unsigned long word, bits;
 	size_t n = number;
 
-	D_BUG_ON(!(b && b->bm));
+	ERR_IF(!b) return;
+	ERR_IF(!b->bm) return;
 	D_BUG_ON(offset        >= b->bm_words);
 	D_BUG_ON(offset+number >  b->bm_words);
 	D_BUG_ON(number > PAGE_SIZE/sizeof(long));
@@ -536,7 +537,8 @@
 	unsigned long word, bits;
 	size_t n = number;
 
-	D_BUG_ON(!(b && b->bm));
+	ERR_IF(!b) return;
+	ERR_IF(!b->bm) return;
 	D_BUG_ON(offset        >= b->bm_words);
 	D_BUG_ON(offset+number >  b->bm_words);
 	D_BUG_ON(number > PAGE_SIZE/sizeof(long));
@@ -573,7 +575,8 @@
 	struct drbd_bitmap *b = mdev->bitmap;
 	unsigned long *bm;
 
-	D_BUG_ON(!(b && b->bm));
+	ERR_IF(!b) return;
+	ERR_IF(!b->bm) return;
 	if ( (offset        >= b->bm_words) ||
 	     (offset+number >  b->bm_words) ||
 	     (number > PAGE_SIZE/sizeof(long)) ||
@@ -599,7 +602,8 @@
 void drbd_bm_set_all(drbd_dev *mdev)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	D_BUG_ON(!(b && b->bm));
+	ERR_IF(!b) return;
+	ERR_IF(!b->bm) return;
 
 	MUST_BE_LOCKED();
 
@@ -743,7 +747,8 @@
 void drbd_bm_clear_all(drbd_dev *mdev)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	D_BUG_ON(!(b && b->bm));
+	ERR_IF(!b) return;
+	ERR_IF(!b->bm) return;
 
 	MUST_BE_LOCKED();						\
 
@@ -757,7 +762,7 @@
 void drbd_bm_reset_find(drbd_dev *mdev)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	D_BUG_ON(!(b && b->bm));
+	ERR_IF(!b) return;
 
 	MUST_BE_LOCKED();
 
@@ -777,7 +782,8 @@
 {
 	struct drbd_bitmap *b = mdev->bitmap;
 	unsigned long i = -1UL;
-	D_BUG_ON(!(b && b->bm));
+	ERR_IF(!b) return i;
+	ERR_IF(!b->bm) return i;
 
 	spin_lock_irq(&b->bm_lock);
 	BM_PARANOIA_CHECK();
@@ -810,7 +816,8 @@
 {
 	struct drbd_bitmap *b = mdev->bitmap;
 	int i;
-	D_BUG_ON(!(b && b->bm));
+	ERR_IF(!b) return 1;
+	ERR_IF(!b->bm) return 1;
 
 /*
  * only called from drbd_set_out_of_sync.
@@ -844,7 +851,8 @@
 {
 	struct drbd_bitmap *b = mdev->bitmap;
 	int i;
-	D_BUG_ON(!(b && b->bm));
+	ERR_IF(!b) return 0;
+	ERR_IF(!b->bm) return 0;
 
 	spin_lock_irq(&b->bm_lock);
 	BM_PARANOIA_CHECK();
@@ -877,7 +885,8 @@
 {
 	struct drbd_bitmap *b = mdev->bitmap;
 	int i;
-	D_BUG_ON(!(b && b->bm));
+	ERR_IF(!b) return 0;
+	ERR_IF(!b->bm) return 0;
 
 	spin_lock_irq(&b->bm_lock);
 	BM_PARANOIA_CHECK();
@@ -911,7 +920,8 @@
 	int count, s, e;
 	unsigned long flags;
 
-	D_BUG_ON(!(b && b->bm));
+	ERR_IF(!b) return 0;
+	ERR_IF(!b->bm) return 0;
 	spin_lock_irqsave(&b->bm_lock,flags);
 	BM_PARANOIA_CHECK();
 
@@ -938,7 +948,8 @@
 	struct drbd_bitmap *b = mdev->bitmap;
 	unsigned long weight;
 	int count, s, e;
-	D_BUG_ON(!(b && b->bm));
+	ERR_IF(!b) return 0;
+	ERR_IF(!b->bm) return 0;
 
 	MUST_BE_LOCKED();
 

Modified: branches/drbd-0.7/drbd/drbd_fs.c
===================================================================
--- branches/drbd-0.7/drbd/drbd_fs.c	2005-08-05 13:21:30 UTC (rev 1913)
+++ branches/drbd-0.7/drbd/drbd_fs.c	2005-08-10 12:46:43 UTC (rev 1914)
@@ -81,6 +81,7 @@
 	la_size = mdev->la_size;
 
 	rv = do_determin_dev_size(mdev);
+	if (rv < 0) goto out;
 
 	la_size_changed = (la_size != mdev->la_size);
 	md_moved = pmdss != drbd_md_ss(mdev) /* && mdev->md_index == -1 */;
@@ -96,6 +97,7 @@
 		// Write mdev->la_size to [possibly new position on] disk.
 		drbd_md_write(mdev);
 	}
+  out:
 	lc_unlock(mdev->act_log);
 
 	return rv;
@@ -116,7 +118,9 @@
 }
 
 
-/* Returns 1 if there is a disk-less node, 0 if both nodes have a disk. */
+/* Returns 1 if there is a disk-less node, 0 if both nodes have a disk.
+ * -ENOMEM if we could not allocate the bitmap
+ */
 /*
  * THINK do we want the size to be KB or sectors ?
  * note, *_capacity operates in 512 byte sectors!!
@@ -182,7 +186,7 @@
 			/* currently there is only one error: ENOMEM! */
 			size = drbd_bm_capacity(mdev)>>1;
 			if (size == 0) {
-				ERR("Could not allocate bitmap! Set device size => 0\n");
+				ERR("OUT OF MEMORY! Could not allocate bitmap! Set device size => 0\n");
 			} else {
 				/* FIXME this is problematic,
 				 * if we in fact are smaller now! */
@@ -190,6 +194,7 @@
 				    "Leaving size unchanged at size = %lu KB\n", 
 				    (unsigned long)size);
 			}
+			rv = err;
 		}
 		// racy, see comments above.
 		drbd_set_my_capacity(mdev,size<<1);
@@ -252,6 +257,8 @@
 	return 0;
 }
 
+STATIC int drbd_detach_ioctl(drbd_dev *mdev);
+
 STATIC
 int drbd_ioctl_set_disk(struct Drbd_Conf *mdev,
 			struct ioctl_disk_config * arg)
@@ -305,6 +312,9 @@
 		__module_get(THIS_MODULE);
 		mput = 1;
 	} else {
+		/* We currently cannot handle reattach while connected */
+		return -EBUSY;
+
 		/* FIXME allow reattach while connected,
 		 * and allow it in Primary/Diskless state...
 		 * currently there are strange races leading to a distributed
@@ -475,12 +485,23 @@
 /* FIXME if (md_gc_valid < 0) META DATA IO NOT POSSIBLE! */
 
 	drbd_bm_lock(mdev); // racy...
-	drbd_determin_dev_size(mdev);
-	/* FIXME
-	 * what if we now have la_size == 0 ?? eh?
-	 * BOOM?
-	 */
+	if (drbd_determin_dev_size(mdev) < 0) {
+		/* could not allocate bitmap.
+		 * try to undo ... */
+		D_ASSERT(mdev->cstate == Unconfigured);
+		D_ASSERT(mput == 1);
 
+		drbd_bm_unlock(mdev);
+
+		/* from drbd_detach_ioctl */
+		drbd_free_ll_dev(mdev);
+
+		set_cstate(mdev,Unconfigured);
+		drbd_mdev_cleanup(mdev);
+		module_put(THIS_MODULE);
+		return -ENOMEM;
+	}
+
 	if (md_gc_valid <= 0) {
 		INFO("Assuming that all blocks are out of sync (aka FullSync)\n");
 		drbd_bm_set_all(mdev);
@@ -494,7 +515,6 @@
 
 	i = drbd_check_al_size(mdev);
 	if (i) {
-// FATAL!
 		/* FIXME see the comment above.
 		 * if this fails I need to undo all changes,
 		 * go back into Unconfigured,
@@ -502,7 +522,6 @@
 		 */
 		// return i;
 		drbd_panic("Cannot allocate act_log\n");
-		drbd_suicide();
 	}
 
 	if (md_gc_valid > 0) {
@@ -1277,6 +1296,10 @@
 			break;
 		}
 
+		drbd_md_set_flag(mdev,MDF_FullSync);
+		drbd_md_clear_flag(mdev,MDF_Consistent);
+		drbd_md_write(mdev);
+
 		if (mdev->cstate == Connected) {
 			/* avoid races with set_in_sync
 			 * for successfull mirrored writes
@@ -1288,10 +1311,6 @@
 
 		drbd_bm_lock(mdev); // racy...
 
-		drbd_md_set_flag(mdev,MDF_FullSync);
-		drbd_md_clear_flag(mdev,MDF_Consistent);
-		drbd_md_write(mdev);
-
 		drbd_bm_set_all(mdev);
 		drbd_bm_write(mdev);
 

Modified: branches/drbd-0.7/drbd/drbd_main.c
===================================================================
--- branches/drbd-0.7/drbd/drbd_main.c	2005-08-05 13:21:30 UTC (rev 1913)
+++ branches/drbd-0.7/drbd/drbd_main.c	2005-08-10 12:46:43 UTC (rev 1914)
@@ -1472,6 +1472,7 @@
 	mdev->rs_mark_time = 0;
 	mdev->send_task    = NULL;
 	drbd_set_my_capacity(mdev,0);
+	drbd_bm_resize(mdev,0);
 
 	// just in case
 	drbd_free_resources(mdev);

Modified: branches/drbd-0.7/drbd/drbd_receiver.c
===================================================================
--- branches/drbd-0.7/drbd/drbd_receiver.c	2005-08-05 13:21:30 UTC (rev 1913)
+++ branches/drbd-0.7/drbd/drbd_receiver.c	2005-08-10 12:46:43 UTC (rev 1914)
@@ -1690,31 +1690,36 @@
 STATIC int receive_BecomeSyncTarget(drbd_dev *mdev, Drbd_Header *h)
 {
 	ERR_IF(!mdev->bitmap) return FALSE;
-
-	/* THINK
-	 * otherwise this does not make much sense, no?
-	 * and some other assertion maybe about cstate...
-	 */
-	ERR_IF(mdev->state != Secondary || mdev->cstate != Connected)
+	ERR_IF(mdev->state != Secondary)
 		return FALSE;
+	ERR_IF(mdev->cstate != Connected)
+		return FALSE;
+	ERR_IF(test_bit(DISKLESS,&mdev->flags))
+		return FALSE;
 
 	drbd_bm_lock(mdev);
 	drbd_bm_set_all(mdev);
 	drbd_bm_write(mdev);
 	drbd_start_resync(mdev,SyncTarget);
 	drbd_bm_unlock(mdev);
-	return TRUE; // cannot fail ?
+	return TRUE;
 }
 
 STATIC int receive_BecomeSyncSource(drbd_dev *mdev, Drbd_Header *h)
 {
-	// FIXME asserts ?
+	ERR_IF(mdev->cstate != Connected)
+		return FALSE;
+	ERR_IF(test_bit(DISKLESS,&mdev->flags))
+		return FALSE;
+	ERR_IF(!drbd_md_test_flag(mdev,MDF_Consistent))
+		return FALSE;
+
 	drbd_bm_lock(mdev);
 	drbd_bm_set_all(mdev);
 	drbd_bm_write(mdev);
 	drbd_start_resync(mdev,SyncSource);
 	drbd_bm_unlock(mdev);
-	return TRUE; // cannot fail ?
+	return TRUE;
 }
 
 STATIC int receive_UnplugRemote(drbd_dev *mdev, Drbd_Header *h)
@@ -2144,25 +2149,40 @@
 
 STATIC int got_NegDReply(drbd_dev *mdev, Drbd_Header* h)
 {
-	drbd_request_t *req;
+	/* drbd_request_t *req;
+	 * unused now */
 	Drbd_BlockAck_Packet *p = (Drbd_BlockAck_Packet*)h;
 
+	if (is_syncer_blk(mdev,p->block_id)) {
+		/* no resync data available. don't panic just yet ... */
+		printk(KERN_EMERG DEVICE_NAME "%d: "
+		       "Got NegDReply for resync request. "
+		       "WE ARE LOST. We lost our up-to-date disk.\n",
+			(int)(mdev-drbd_conf));
+		return FALSE;
+	} /* else { */
+
+#if 0
+	/* hey, we panic anyways. so why bother? */
 	req = (drbd_request_t *)(long)p->block_id;
-	D_ASSERT(req->w.cb == w_is_app_read);
+	if (VALID_POINTER(req)) {
+		D_ASSERT(req->w.cb == w_is_app_read);
 
-	spin_lock(&mdev->pr_lock);
-	list_del(&req->w.list);
-	spin_unlock(&mdev->pr_lock);
+		spin_lock(&mdev->pr_lock);
+		list_del(&req->w.list);
+		spin_unlock(&mdev->pr_lock);
 
-	INVALIDATE_MAGIC(req);
-	mempool_free(req,drbd_request_mempool);
+		INVALIDATE_MAGIC(req);
+		mempool_free(req,drbd_request_mempool);
+	}
+#endif
 
 	drbd_panic("Got NegDReply. WE ARE LOST. We lost our up-to-date disk.\n");
 
 	// THINK do we have other options, but panic?
 	//       what about bio_endio, in case we don't panic ??
 
-	return TRUE;
+	return FALSE;
 }
 
 STATIC int got_NegRSDReply(drbd_dev *mdev, Drbd_Header* h)



More information about the drbd-cvs mailing list