[DRBD-cvs] svn commit by phil - r2693 - trunk/drbd - In case a resync is finished by application-IO instead

drbd-cvs at lists.linbit.com drbd-cvs at lists.linbit.com
Mon Jan 15 15:26:19 CET 2007


Author: phil
Date: 2007-01-15 15:26:17 +0100 (Mon, 15 Jan 2007)
New Revision: 2693

Modified:
   trunk/drbd/drbd_actlog.c
   trunk/drbd/drbd_int.h
   trunk/drbd/drbd_worker.c
Log:
In case a resync is finished by application-IO instead of our self
generated resync requests we usually get into troubles in drbd_rs_del_all()

Now that function might return -EAGAIN which indicates that there are
still open references.

drbd_resync_finished() retries drbd_rs_del_all() until it suceedes.


Modified: trunk/drbd/drbd_actlog.c
===================================================================
--- trunk/drbd/drbd_actlog.c	2007-01-15 09:41:51 UTC (rev 2692)
+++ trunk/drbd/drbd_actlog.c	2007-01-15 14:26:17 UTC (rev 2693)
@@ -1144,10 +1144,11 @@
 
 /**
  * drbd_rs_del_all: Gracefully remove all extents from the resync LRU.
- * there may be still a reference hold by w_make_resync_request
- * (drbd_try_rs_begin_io). we lc_del that here anyways...
+ * there may be still a reference hold by someone. In that this function
+ * returns -EAGAIN.
+ * In case all elements got removed it returns zero.
  */
-void drbd_rs_del_all(drbd_dev* mdev)
+int drbd_rs_del_all(drbd_dev* mdev)
 {
 	struct bm_extent* bm_ext;
 	int i;
@@ -1162,18 +1163,7 @@
 		for(i=0;i<mdev->resync->nr_elements;i++) {
 			bm_ext = (struct bm_extent*) lc_entry(mdev->resync,i);
 			if(bm_ext->lce.lc_number == LC_FREE) continue;
-			if(bm_ext->lce.refcnt != 0) {
-				if (bm_ext->lce.refcnt != 1) {
-					ALERT("LOGIC BUG detected in %s:%d\n", __FILE__ , __LINE__ );
-					/* this should not happen. but rather
-					 * have some asserts trigger
-					 * than BUG() in lc_del! */
-					continue;
-				}
-				if (bm_ext->lce.lc_number != mdev->resync_wenr) {
-					ALERT("LOGIC BUG detected in %s:%d\n", __FILE__ , __LINE__ );
-					continue;
-				}
+			if (bm_ext->lce.lc_number == mdev->resync_wenr) {
 				INFO("dropping %u in drbd_rs_del_all, "
 				     "aparently got 'synced' by application io\n",
 				     mdev->resync_wenr);
@@ -1183,6 +1173,12 @@
 				mdev->resync_wenr = LC_FREE;
 				lc_put(mdev->resync,&bm_ext->lce);
 			}
+			if(bm_ext->lce.refcnt != 0) {
+				INFO("Retrying drbd_rs_del_all() later. "
+				     "refcnt=%d\n",bm_ext->lce.refcnt);
+				spin_unlock_irq(&mdev->al_lock);
+				return -EAGAIN;
+			}
 			D_ASSERT(bm_ext->rs_left == 0);
 			D_ASSERT(!test_bit(BME_LOCKED,&bm_ext->flags));
 			D_ASSERT(!test_bit(BME_NO_WRITES,&bm_ext->flags));
@@ -1192,6 +1188,8 @@
 		dec_local(mdev);
 	}
 	spin_unlock_irq(&mdev->al_lock);
+	
+	return 0;
 }
 
 /* Record information on a failure to resync the specified blocks

Modified: trunk/drbd/drbd_int.h
===================================================================
--- trunk/drbd/drbd_int.h	2007-01-15 09:41:51 UTC (rev 2692)
+++ trunk/drbd/drbd_int.h	2007-01-15 14:26:17 UTC (rev 2693)
@@ -1387,7 +1387,7 @@
 extern int drbd_rs_begin_io(struct Drbd_Conf *mdev, sector_t sector);
 extern int drbd_try_rs_begin_io(struct Drbd_Conf *mdev, sector_t sector);
 extern void drbd_rs_cancel_all(drbd_dev* mdev);
-extern void drbd_rs_del_all(drbd_dev* mdev);
+extern int drbd_rs_del_all(drbd_dev* mdev);
 extern void drbd_rs_failed_io(drbd_dev* mdev, sector_t sector, int size);
 extern int drbd_al_read_log(struct Drbd_Conf *mdev,struct drbd_backing_dev *);
 extern void __drbd_set_in_sync(drbd_dev* mdev, sector_t sector, int size, const char* file, const unsigned int line);

Modified: trunk/drbd/drbd_worker.c
===================================================================
--- trunk/drbd/drbd_worker.c	2007-01-15 09:41:51 UTC (rev 2692)
+++ trunk/drbd/drbd_worker.c	2007-01-15 14:26:17 UTC (rev 2693)
@@ -384,11 +384,44 @@
 	return 1;
 }
 
+int w_resync_finished(drbd_dev *mdev, struct drbd_work *w, int cancel)
+{
+	kfree(w);
+
+	drbd_bm_lock(mdev);
+	drbd_resync_finished(mdev);
+	drbd_bm_unlock(mdev);
+
+	return 1;
+}
+
 int drbd_resync_finished(drbd_dev* mdev)
 {
 	unsigned long db,dt,dbdt;
 	int dstate, pdstate;
+	struct drbd_work *w;
 
+	// Remove all elements from the resync LRU. Since future actions
+	// might set bits in the (main) bitmap, then the entries in the
+	// resync LRU would be wrong.
+	if(drbd_rs_del_all(mdev)) {
+		// In case this is not possible now, most probabely because
+		// there are RSDataReply Packets lingering on the worker's
+		// queue (or even the read operations for those packets
+		// is not finished by now).   Retry in 100ms.
+		
+		drbd_kick_lo(mdev);
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(HZ / 10);
+		w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
+		if(w) {
+			w->cb = w_resync_finished;
+			drbd_queue_work(&mdev->data.work,w);
+			return 1;
+		}
+		ERR("Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
+	}
+
 	dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
 	if (dt <= 0) dt=1;
 	db = mdev->rs_total;
@@ -443,11 +476,6 @@
 	mdev->rs_failed = 0;
 	mdev->rs_paused = 0;
 
-	// Remove all elements from the resync LRU. Since future actions
-	// might set bits in the (main) bitmap, then the entries in the
-	// resync LRU would be wrong.
-	drbd_rs_del_all(mdev);
-
 	if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC,&mdev->flags)) {
 		WARN("Writing the whole bitmap, due to failed kmalloc\n");
 		drbd_bm_write(mdev);



More information about the drbd-cvs mailing list