From 229a5ee6327088ddd0f1d27c0aea17d94a6342f6 Mon Sep 17 00:00:00 2001 From: Simon P. Graham Date: Mon, 31 Dec 2007 20:28:57 -0500 Subject: [PATCH] Fix more sync stall cases due to deadlock between transfer log and resync. The following are fixed: 1. When changing role from Primary, it is necessary to clear out transfer log - do this by initiating barrier 2. When SyncTarget is also Primary, it is possible for drbd_try_rs_begin_io to never make progress due to entries in tl that will not be flushed. Change code to initiate barrier IF conflict with AL is found. --- drbd/drbd_actlog.c | 28 ++++++++++++++-------------- drbd/drbd_int.h | 3 ++- drbd/drbd_main.c | 29 ++++++++++++++++++++++++++++- drbd/drbd_nl.c | 19 +++++++++++++++++-- drbd/drbd_req.c | 9 +-------- 5 files changed, 62 insertions(+), 26 deletions(-) diff --git a/drbd/drbd_actlog.c b/drbd/drbd_actlog.c index 5505113..251d8fc 100644 --- a/drbd/drbd_actlog.c +++ b/drbd/drbd_actlog.c @@ -1143,20 +1143,7 @@ int drbd_rs_begin_io(drbd_dev* mdev, sector_t sector) return 0; } - WARN("Creating new epoch in drbd_rs_begin_io\n"); - spin_lock_irq(&mdev->req_lock); - - b = _tl_add_barrier(mdev,b); - b->w.cb = w_send_barrier; - /* inc_ap_pending done here, so we won't - * get imbalanced on connection loss. - * dec_ap_pending will be done in got_BarrierAck - * or (on connection loss) in tl_clear. */ - inc_ap_pending(mdev); - drbd_queue_work(&mdev->data.work, &b->w); - - spin_unlock_irq(&mdev->req_lock); - + tl_new_epoch(mdev, "drbd_rs_begin_io", b); break; } } @@ -1198,6 +1185,7 @@ int drbd_try_rs_begin_io(drbd_dev* mdev, sector_t sector) const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT; struct bm_extent* bm_ext; int i; + int need_barrier=0; MTRACE(TraceTypeResync, TraceLvlAll, INFO("drbd_try_rs_begin_io: sector=%llus\n", @@ -1241,6 +1229,7 @@ int drbd_try_rs_begin_io(drbd_dev* mdev, sector_t sector) goto proceed; } if (!test_and_set_bit(BME_NO_WRITES,&bm_ext->flags)) { + need_barrier = 1; mdev->resync_locked++; } else { /* we did set the BME_NO_WRITES, @@ -1278,6 +1267,7 @@ int drbd_try_rs_begin_io(drbd_dev* mdev, sector_t sector) } set_bit(BME_NO_WRITES,&bm_ext->flags); D_ASSERT(bm_ext->lce.refcnt == 1); + need_barrier = 1; mdev->resync_locked++; goto check_al; } @@ -1303,6 +1293,16 @@ int drbd_try_rs_begin_io(drbd_dev* mdev, sector_t sector) ); if (bm_ext) mdev->resync_wenr = enr; spin_unlock_irq(&mdev->al_lock); + + if (need_barrier) { + // Found conflicting AL update - start new epoch if possible + struct drbd_barrier *b = kmalloc(sizeof(struct drbd_barrier),GFP_NOIO); + if (b) + tl_new_epoch(mdev, "drbd_try_rs_begin_io", b); + else + ERR("Failed to allocate barrier in drbd_try_rs_begin_io\n"); + } + return -EAGAIN; } diff --git a/drbd/drbd_int.h b/drbd/drbd_int.h index b374dc7..d2db85d 100644 --- a/drbd/drbd_int.h +++ b/drbd/drbd_int.h @@ -941,7 +941,8 @@ extern void drbd_free_resources(drbd_dev *mdev); extern void tl_release(drbd_dev *mdev,unsigned int barrier_nr, unsigned int set_size); extern void tl_clear(drbd_dev *mdev); -extern struct drbd_barrier *_tl_add_barrier(drbd_dev *,struct drbd_barrier *); +extern void _tl_new_epoch(drbd_dev *, struct drbd_barrier *); +extern void tl_new_epoch(drbd_dev *, char *, struct drbd_barrier *); extern void drbd_free_sock(drbd_dev *mdev); extern int drbd_send(drbd_dev *mdev, struct socket *sock, void* buf, size_t size, unsigned msg_flags); diff --git a/drbd/drbd_main.c b/drbd/drbd_main.c index 98a1c63..4ff56f7 100644 --- a/drbd/drbd_main.c +++ b/drbd/drbd_main.c @@ -198,7 +198,7 @@ STATIC void tl_cleanup(drbd_dev *mdev) * It returns the previously newest barrier * (not the just created barrier) to the caller. */ -struct drbd_barrier *_tl_add_barrier(drbd_dev *mdev,struct drbd_barrier *new) +STATIC struct drbd_barrier *_tl_add_barrier(drbd_dev *mdev,struct drbd_barrier *new) { struct drbd_barrier *newest_before; @@ -217,6 +217,33 @@ struct drbd_barrier *_tl_add_barrier(drbd_dev *mdev,struct drbd_barrier *new) return newest_before; } +/** + * _tl_new_epoch: Starts a new epoch using the barrier structure passed in + */ +void _tl_new_epoch(drbd_dev *mdev, struct drbd_barrier *newb) +{ + newb = _tl_add_barrier(mdev,newb); + newb->w.cb = w_send_barrier; + /* inc_ap_pending done here, so we won't + * get imbalanced on connection loss. + * dec_ap_pending will be done in got_BarrierAck + * or (on connection loss) in tl_clear. */ + inc_ap_pending(mdev); + drbd_queue_work(&mdev->data.work, &newb->w); +} + +/** + * tl_new_epoch: Starts a new epoch using the barrier structure passed in + */ +void tl_new_epoch(drbd_dev *mdev, char *caller, struct drbd_barrier *newb) +{ + WARN("Creating new epoch in %s\n", caller); + + spin_lock_irq(&mdev->req_lock); + _tl_new_epoch(mdev, newb); + spin_unlock_irq(&mdev->req_lock); +} + /* when we receive a barrier ack */ void tl_release(drbd_dev *mdev,unsigned int barrier_nr, unsigned int set_size) diff --git a/drbd/drbd_nl.c b/drbd/drbd_nl.c index 5ef1d89..ad91761 100644 --- a/drbd/drbd_nl.c +++ b/drbd/drbd_nl.c @@ -237,10 +237,20 @@ int drbd_set_role(drbd_dev *mdev, drbd_role_t new_role, int force) int r=0,forced = 0, try=0; drbd_state_t mask, val; drbd_disks_t nps; + struct drbd_barrier *newb = NULL; if ( new_role == Primary ) { request_ping(mdev); // Detect a dead peer ASAP } + else { + // Might need to send barrier if going secondary + newb = kmalloc(sizeof(struct drbd_barrier),GFP_NOIO); + + if (newb == NULL) { + // Oh dear - fail request + return SS_UnknownError; + } + } mask.i = 0; mask.role = role_mask; val.i = 0; val.role = new_role; @@ -318,6 +328,10 @@ int drbd_set_role(drbd_dev *mdev, drbd_role_t new_role, int force) * */ if (new_role == Secondary) { + // Initiate barrier to clear out transfer log + tl_new_epoch(mdev, "drbd_set_role", newb); + newb = NULL; // newb has been consumed + set_disk_ro(mdev->vdisk, TRUE ); if ( inc_local(mdev) ) { mdev->bc->md.uuid[Current] &= ~(u64)1; @@ -357,9 +371,10 @@ int drbd_set_role(drbd_dev *mdev, drbd_role_t new_role, int force) drbd_md_sync(mdev); - return r; - fail: + if (newb) + kfree(newb); + return r; } diff --git a/drbd/drbd_req.c b/drbd/drbd_req.c index d303ad7..01371a0 100644 --- a/drbd/drbd_req.c +++ b/drbd/drbd_req.c @@ -934,15 +934,8 @@ drbd_make_request_common(drbd_dev *mdev, struct bio *bio) if (remote && mdev->unused_spare_barrier && test_and_clear_bit(ISSUE_BARRIER,&mdev->flags)) { struct drbd_barrier *b = mdev->unused_spare_barrier; - b = _tl_add_barrier(mdev,b); mdev->unused_spare_barrier = NULL; - b->w.cb = w_send_barrier; - /* inc_ap_pending done here, so we won't - * get imbalanced on connection loss. - * dec_ap_pending will be done in got_BarrierAck - * or (on connection loss) in tl_clear. */ - inc_ap_pending(mdev); - drbd_queue_work(&mdev->data.work, &b->w); + _tl_new_epoch(mdev, b); } else { D_ASSERT(!(remote && rw == WRITE && test_bit(ISSUE_BARRIER,&mdev->flags))); -- 1.5.4.rc1