[DRBD-cvs] svn commit by phil - r2693 - trunk/drbd - In case a
resync is finished by application-IO instead
drbd-cvs at lists.linbit.com
drbd-cvs at lists.linbit.com
Mon Jan 15 15:26:19 CET 2007
Author: phil
Date: 2007-01-15 15:26:17 +0100 (Mon, 15 Jan 2007)
New Revision: 2693
Modified:
trunk/drbd/drbd_actlog.c
trunk/drbd/drbd_int.h
trunk/drbd/drbd_worker.c
Log:
In case a resync is finished by application-IO instead of our self
generated resync requests we usually get into troubles in drbd_rs_del_all()
Now that function might return -EAGAIN which indicates that there are
still open references.
drbd_resync_finished() retries drbd_rs_del_all() until it suceedes.
Modified: trunk/drbd/drbd_actlog.c
===================================================================
--- trunk/drbd/drbd_actlog.c 2007-01-15 09:41:51 UTC (rev 2692)
+++ trunk/drbd/drbd_actlog.c 2007-01-15 14:26:17 UTC (rev 2693)
@@ -1144,10 +1144,11 @@
/**
* drbd_rs_del_all: Gracefully remove all extents from the resync LRU.
- * there may be still a reference hold by w_make_resync_request
- * (drbd_try_rs_begin_io). we lc_del that here anyways...
+ * there may be still a reference hold by someone. In that this function
+ * returns -EAGAIN.
+ * In case all elements got removed it returns zero.
*/
-void drbd_rs_del_all(drbd_dev* mdev)
+int drbd_rs_del_all(drbd_dev* mdev)
{
struct bm_extent* bm_ext;
int i;
@@ -1162,18 +1163,7 @@
for(i=0;i<mdev->resync->nr_elements;i++) {
bm_ext = (struct bm_extent*) lc_entry(mdev->resync,i);
if(bm_ext->lce.lc_number == LC_FREE) continue;
- if(bm_ext->lce.refcnt != 0) {
- if (bm_ext->lce.refcnt != 1) {
- ALERT("LOGIC BUG detected in %s:%d\n", __FILE__ , __LINE__ );
- /* this should not happen. but rather
- * have some asserts trigger
- * than BUG() in lc_del! */
- continue;
- }
- if (bm_ext->lce.lc_number != mdev->resync_wenr) {
- ALERT("LOGIC BUG detected in %s:%d\n", __FILE__ , __LINE__ );
- continue;
- }
+ if (bm_ext->lce.lc_number == mdev->resync_wenr) {
INFO("dropping %u in drbd_rs_del_all, "
"aparently got 'synced' by application io\n",
mdev->resync_wenr);
@@ -1183,6 +1173,12 @@
mdev->resync_wenr = LC_FREE;
lc_put(mdev->resync,&bm_ext->lce);
}
+ if(bm_ext->lce.refcnt != 0) {
+ INFO("Retrying drbd_rs_del_all() later. "
+ "refcnt=%d\n",bm_ext->lce.refcnt);
+ spin_unlock_irq(&mdev->al_lock);
+ return -EAGAIN;
+ }
D_ASSERT(bm_ext->rs_left == 0);
D_ASSERT(!test_bit(BME_LOCKED,&bm_ext->flags));
D_ASSERT(!test_bit(BME_NO_WRITES,&bm_ext->flags));
@@ -1192,6 +1188,8 @@
dec_local(mdev);
}
spin_unlock_irq(&mdev->al_lock);
+
+ return 0;
}
/* Record information on a failure to resync the specified blocks
Modified: trunk/drbd/drbd_int.h
===================================================================
--- trunk/drbd/drbd_int.h 2007-01-15 09:41:51 UTC (rev 2692)
+++ trunk/drbd/drbd_int.h 2007-01-15 14:26:17 UTC (rev 2693)
@@ -1387,7 +1387,7 @@
extern int drbd_rs_begin_io(struct Drbd_Conf *mdev, sector_t sector);
extern int drbd_try_rs_begin_io(struct Drbd_Conf *mdev, sector_t sector);
extern void drbd_rs_cancel_all(drbd_dev* mdev);
-extern void drbd_rs_del_all(drbd_dev* mdev);
+extern int drbd_rs_del_all(drbd_dev* mdev);
extern void drbd_rs_failed_io(drbd_dev* mdev, sector_t sector, int size);
extern int drbd_al_read_log(struct Drbd_Conf *mdev,struct drbd_backing_dev *);
extern void __drbd_set_in_sync(drbd_dev* mdev, sector_t sector, int size, const char* file, const unsigned int line);
Modified: trunk/drbd/drbd_worker.c
===================================================================
--- trunk/drbd/drbd_worker.c 2007-01-15 09:41:51 UTC (rev 2692)
+++ trunk/drbd/drbd_worker.c 2007-01-15 14:26:17 UTC (rev 2693)
@@ -384,11 +384,44 @@
return 1;
}
+int w_resync_finished(drbd_dev *mdev, struct drbd_work *w, int cancel)
+{
+ kfree(w);
+
+ drbd_bm_lock(mdev);
+ drbd_resync_finished(mdev);
+ drbd_bm_unlock(mdev);
+
+ return 1;
+}
+
int drbd_resync_finished(drbd_dev* mdev)
{
unsigned long db,dt,dbdt;
int dstate, pdstate;
+ struct drbd_work *w;
+ // Remove all elements from the resync LRU. Since future actions
+ // might set bits in the (main) bitmap, then the entries in the
+ // resync LRU would be wrong.
+ if(drbd_rs_del_all(mdev)) {
+ // In case this is not possible now, most probabely because
+ // there are RSDataReply Packets lingering on the worker's
+ // queue (or even the read operations for those packets
+ // is not finished by now). Retry in 100ms.
+
+ drbd_kick_lo(mdev);
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(HZ / 10);
+ w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
+ if(w) {
+ w->cb = w_resync_finished;
+ drbd_queue_work(&mdev->data.work,w);
+ return 1;
+ }
+ ERR("Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
+ }
+
dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
if (dt <= 0) dt=1;
db = mdev->rs_total;
@@ -443,11 +476,6 @@
mdev->rs_failed = 0;
mdev->rs_paused = 0;
- // Remove all elements from the resync LRU. Since future actions
- // might set bits in the (main) bitmap, then the entries in the
- // resync LRU would be wrong.
- drbd_rs_del_all(mdev);
-
if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC,&mdev->flags)) {
WARN("Writing the whole bitmap, due to failed kmalloc\n");
drbd_bm_write(mdev);
More information about the drbd-cvs
mailing list