From 62d745fe45e3ebd317340bcdff32a11f8a8585e4 Mon Sep 17 00:00:00 2001
From: Simon P. Graham <Simon.Graham@stratus.com>
Date: Sat, 29 Dec 2007 16:32:15 -0500
Subject: [PATCH] Ensure blocks are resynced even if disk cache is used.

This change includes the following:

1. DRBD Barrier implementation on target now flushes disk to
   force cached data to disk.
2. All protocols now use the transfer log to keep track of
   updates between barriers
3. A deadlock between resync and requests sitting in the TL
   is fixed - if a resync request is started that conflicts
   with entries in the TL, a DRBD barrier is initiated - this
   will clear up the TL when the barrier ack is received and
   allow the resync to procede.
---
 drbd/drbd_actlog.c   |   31 +++++++++++++++++++++++++++++++
 drbd/drbd_receiver.c |   11 +++++++++++
 drbd/drbd_req.c      |    2 +-
 3 files changed, 43 insertions(+), 1 deletions(-)

diff --git a/drbd/drbd_actlog.c b/drbd/drbd_actlog.c
index e8535fe..5505113 100644
--- a/drbd/drbd_actlog.c
+++ b/drbd/drbd_actlog.c
@@ -1131,6 +1131,37 @@ int drbd_rs_begin_io(drbd_dev* mdev, sector_t sector)
 
 	if(test_bit(BME_LOCKED,&bm_ext->flags)) return 1;
 
+	// Look for conflicting AL updates and if found start new
+	// epoch -- this will ensure the AL updates get removed 
+	// from the tl and freed.
+	for(i=0;i<AL_EXT_PER_BM_SECT;i++) {
+		if (_is_in_al(mdev,enr*AL_EXT_PER_BM_SECT+i)) {
+			// Found conflicting AL update - start new epoch if possible
+			struct drbd_barrier *b = kmalloc(sizeof(struct drbd_barrier),GFP_NOIO);
+			if (!b) {
+				ERR("Failed to allocate barrier in drbd_rs_begin_io\n");
+				return 0;
+			}
+
+			WARN("Creating new epoch in drbd_rs_begin_io\n");
+			spin_lock_irq(&mdev->req_lock);
+
+			b = _tl_add_barrier(mdev,b);
+			b->w.cb =  w_send_barrier;
+			/* inc_ap_pending done here, so we won't
+			 * get imbalanced on connection loss.
+			 * dec_ap_pending will be done in got_BarrierAck
+			 * or (on connection loss) in tl_clear.  */
+			inc_ap_pending(mdev);
+			drbd_queue_work(&mdev->data.work, &b->w);
+
+			spin_unlock_irq(&mdev->req_lock);
+
+			break;
+		}
+	}
+
+	// Now actually wait for AL to drain of any conflicting entries
 	for(i=0;i<AL_EXT_PER_BM_SECT;i++) {
 		sig = wait_event_interruptible( mdev->al_wait,
 				!_is_in_al(mdev,enr*AL_EXT_PER_BM_SECT+i) );
diff --git a/drbd/drbd_receiver.c b/drbd/drbd_receiver.c
index a89921c..98076dd 100644
--- a/drbd/drbd_receiver.c
+++ b/drbd/drbd_receiver.c
@@ -928,6 +928,17 @@ STATIC int receive_Barrier_no_tcq(drbd_dev *mdev, Drbd_Header* h)
 	mdev->epoch_size = 0;
 	spin_unlock_irq(&mdev->req_lock);
 
+	// Flush everything to disk to implement barrier
+	if (inc_local(mdev)) {
+	    rv = blkdev_issue_flush(mdev->bc->backing_bdev, NULL);
+
+	    if (rv) {
+		ERR("local disk flush failed with status %d\n",rv);
+	    }
+
+	    dec_local(mdev);
+	}
+
 	/* FIXME CAUTION! receiver thread sending via msock.
 	 * to make sure this BarrierAck will not be received before the asender
 	 * had a chance to send all the write acks corresponding to this epoch,
diff --git a/drbd/drbd_req.c b/drbd/drbd_req.c
index d5e6795..d303ad7 100644
--- a/drbd/drbd_req.c
+++ b/drbd/drbd_req.c
@@ -883,7 +883,7 @@ drbd_make_request_common(drbd_dev *mdev, struct bio *bio)
   allocate_barrier:
 		b = kmalloc(sizeof(struct drbd_barrier),GFP_NOIO);
 		if(!b) {
-			ERR("Failed to alloc barrier.");
+			ERR("Failed to alloc barrier.\n");
 			err = -ENOMEM;
 			goto fail_and_free_req;
 		}
-- 
1.5.4.rc1