[DRBD-cvs] r1741 - in trunk: . drbd

Sun Jan 30 23:04:18 CET 2005

Author: phil
Date: 2005-01-30 23:04:15 +0100 (Sun, 30 Jan 2005)
New Revision: 1741

Modified:
   trunk/ROADMAP
   trunk/drbd/drbd_int.h
   trunk/drbd/drbd_main.c
   trunk/drbd/drbd_receiver.c
Log:
Slowly realizing the whole scope of the problem...


Modified: trunk/ROADMAP
===================================================================

--- trunk/ROADMAP	2005-01-28 15:02:23 UTC (rev 1740)
+++ trunk/ROADMAP	2005-01-30 22:04:15 UTC (rev 1741)
@@ -262,7 +262,8 @@
 
   7. An data packet overtakes an ACK packet on the network.
     Although this case is quite unlikely, we have to take int into 
-    account. 
+    account. From N2's point of fiew this looks a lot like case 4,
+    but N2 should not delete the data packet now!
 
  Proposed solution
 
@@ -279,19 +280,26 @@
 
   *  If the sequence number of the data packet is higher than
      last_seq+1 sleep until last_seq+1 == seq_num(data packet)
+     [needed to satisfy example case 7]
 
   1. If the packet's sequence number is on the discard list,
-     simply drop it.
+     simply drop it. 
+     [ ex.c. 3]
   2. Do we have a concurrent request? (i.e. Do I have a request
      to the same block in my transfer log.) If not -> write now.
+     [ default ]
   3. Have I already got an ACK packet for the concurrent 
      request ? (Has the request the RQ_DRBD_SENT bit already set)
      If yes -> write the data from the data packet afterwards.
+     [ ex.c. 6]
   4. Do I have the "discard-concurrent-write-flag" ?
      If yes -> discard the data packet.
      If no -> Write data from the data packet afterwards and set
               the RQ_DRBD_SENT bit in the request object ( Since
-              will will not get an ACK from our peer )
+              will will not get an ACK from our peer). Mark the
+	      ee to prepend the ACK packet with a discard info
+	      packet.
+     [ ex.c. *]
 
   The algorithm which is performed upon the reception of an 
   ACK packet [drbd_asender]
@@ -313,8 +321,8 @@
         to find IO operations starting in the same 4k block of
         data quickly. -> With two lookups the hash table we can
 	find any concurrent access.
-  70% DONE ; Implement real overlap check, Implement discard info
-	     Packets. Look for example case 5.
+  70% DONE ; Implement discard info packets. Code an equivalent
+             hash table for EEs, to solve example case 5.
 
 10 Change Sync-groups to sync-after
   
@@ -323,7 +331,7 @@
   are not flexible enough to cover all real world scenarios.
 
   E.g. Two physical disks should be mirrored with DRBD. On one
-       of the disks there is only a single partition, while the
+svn d       of the disks there is only a single partition, while the
        other one is divided into many (e.g. 4 smaller) partitions.
        One would want to sync the big one in parallel to the 
        4 small ones. While the resync process of the 4 small
@@ -365,6 +373,11 @@
   /dev/mapper/control
   0% DONE
 
+15 Accept BIOs bigger than one page, probabely up to 64k (16 pages) 
+  would be a good choce. When this is done make the bits in the
+  bitmap to account for more then 4k e.g. 64k
+  0% DONE
+
 plus-banches:
 ----------------------
 

Modified: trunk/drbd/drbd_int.h
===================================================================
--- trunk/drbd/drbd_int.h	2005-01-28 15:02:23 UTC (rev 1740)
+++ trunk/drbd/drbd_int.h	2005-01-30 22:04:15 UTC (rev 1741)
@@ -788,7 +788,7 @@
 extern int tl_verify(drbd_dev *mdev, drbd_request_t * item, sector_t sector);
 #define TLHW_FLAG_SENT   0x10000000
 #define TLHW_FLAG_RECVW  0x20000000
-extern int tl_have_write(drbd_dev *mdev, sector_t sector, int size_n_flags);
+extern int req_have_write(drbd_dev *mdev, sector_t sector, int size_n_flags);
 extern void drbd_free_sock(drbd_dev *mdev);
 extern int drbd_send(drbd_dev *mdev, struct socket *sock,
 		     void* buf, size_t size, unsigned msg_flags);
@@ -936,6 +936,11 @@
           ( (MD_RESERVED_SIZE*2LL - MD_BM_OFFSET) * (1LL<<(BM_EXT_SIZE_B-9)) )
 #endif
 
+/* Sector shift value for hash functions for tl_hash table and ee_hash
+   table. A value of 3 makes all IOs in on 4K block to make to the same
+   slot of the hash table. */
+#define HT_SHIFT 3
+
 extern int  drbd_bm_init      (drbd_dev *mdev);
 extern int  drbd_bm_resize    (drbd_dev *mdev, sector_t sectors);
 extern void drbd_bm_cleanup   (drbd_dev *mdev);

Modified: trunk/drbd/drbd_main.c
===================================================================
--- trunk/drbd/drbd_main.c	2005-01-28 15:02:23 UTC (rev 1740)
+++ trunk/drbd/drbd_main.c	2005-01-30 22:04:15 UTC (rev 1741)
@@ -164,8 +164,7 @@
 
 STATIC unsigned int tl_hash_fn(drbd_dev *mdev, sector_t sector)
 {
-	// map sectors in the same 4k block to the same hash key.
-	return (sector>>3) % mdev->tl_hash_s;
+	return (sector>>HT_SHIFT) % mdev->tl_hash_s;
 }
 
 
@@ -283,41 +282,6 @@
 	return rv;
 }
 
-/* Return values:
- *
- * 0 ... no conflicting write
- * 1 ... a conflicting write, have not got ack by now.
- * 2 ... a conflicting write, have got also got ack.
- */
-int tl_have_write(drbd_dev *mdev, sector_t sector, int size_n_flags)
-{
-	// PRE TODO: Real overlap check... using size etc...
-	struct hlist_head *slot = mdev->tl_hash + tl_hash_fn(mdev,sector);
-	struct hlist_node *n;
-	drbd_request_t * i;
-	int rv=0;
-
-	spin_lock_irq(&mdev->tl_lock);
-
-	hlist_for_each_entry(i, n, slot, colision) {
-		if (drbd_req_get_sector(i) == sector) {
-			rv=1;
-			if( i->rq_status & RQ_DRBD_SENT ) rv++;
-			if(size_n_flags & TLHW_FLAG_SENT) {
-				i->rq_status |= RQ_DRBD_SENT;
-			}
-			if(size_n_flags & TLHW_FLAG_RECVW) {
-				i->rq_status |= RQ_DRBD_RECVW;
-			}
-			break;
-		}
-	}
-
-	spin_unlock_irq(&mdev->tl_lock);
-
-	return rv;
-}
-
 /* tl_dependence reports if this sector was present in the current
    epoch.
    As side effect it clears also the pointer to the request if it
@@ -396,6 +360,63 @@
 	}
 }
 
+STATIC int overlaps(sector_t s1, int l1, sector_t s2, int l2)
+{
+	return !( ( s1 + (l1>>9) <= s2 ) || ( s1 >= s2 + (l2>>9) ) );
+}
+
+/* Return values:
+ *
+ * 0 ... no conflicting write
+ * 1 ... a conflicting write, have not got ack by now.
+ * 2 ... a conflicting write, have got also got ack.
+ */
+int req_have_write(drbd_dev *mdev, sector_t sector, int size_n_flags)
+{
+	struct hlist_head *slot;
+	struct hlist_node *n;
+	drbd_request_t * req;
+	int size = size_n_flags & ~(TLHW_FLAG_SENT|TLHW_FLAG_RECVW);
+	int i, rv=0;
+
+	D_ASSERT(size <= 1<<(HT_SHIFT+9) );
+
+	spin_lock_irq(&mdev->tl_lock);
+
+	for(i=-1;i<=1;i++ ) {
+		slot = mdev->tl_hash + tl_hash_fn(mdev,
+						  sector + i*(1<<(HT_SHIFT)));
+		hlist_for_each_entry(req, n, slot, colision) {
+			if( overlaps(drbd_req_get_sector(req),
+				     drbd_req_get_size(req),
+				     sector,
+				     size) ) {
+				rv=1;
+				if( req->rq_status & RQ_DRBD_SENT ) rv++;
+				if( size_n_flags & TLHW_FLAG_SENT ) {
+					req->rq_status |= RQ_DRBD_SENT;
+				}
+				if( size_n_flags & TLHW_FLAG_RECVW ) {
+					req->rq_status |= RQ_DRBD_RECVW;
+				}
+				goto out;
+			} //overlaps()
+		} // hlist_for_each_entry()
+	}
+
+	// PRE TODO: insert ee onto ee_hash_table here...
+ out:
+	spin_unlock_irq(&mdev->tl_lock);
+
+	return rv;
+}
+
+int ee_have_write(drbd_dev *mdev, drbd_request_t * req)
+{
+	// PRE TODO: same as above for a request agains our acive EEs.
+	return 0;
+}
+
 /**
  * drbd_io_error: Handles the on_io_error setting, should be called in the
  * unlikely(!drbd_bio_uptodate(e->bio)) case from kernel thread context.

Modified: trunk/drbd/drbd_receiver.c
===================================================================
--- trunk/drbd/drbd_receiver.c	2005-01-28 15:02:23 UTC (rev 1740)
+++ trunk/drbd/drbd_receiver.c	2005-01-30 22:04:15 UTC (rev 1741)
@@ -1081,13 +1081,13 @@
 		return TRUE;
 	}
 
-	switch( tl_have_write(mdev, sector, data_size) ) {
+	switch( req_have_write(mdev, sector, data_size) ) {
 	case 2: /* Conflicting write, got ACK */
 		/* write afterwards ...*/
 		WARN("Concurrent write! [W AFTERWARDS] sec=%lu\n",
 		     (unsigned long)sector);
 		if( wait_event_interruptible(mdev->cstate_wait,
-		     !tl_have_write(mdev,sector,data_size|TLHW_FLAG_RECVW))) {
+		     !req_have_write(mdev,sector,data_size|TLHW_FLAG_RECVW))) {
 			spin_lock_irq(&mdev->ee_lock);
 			drbd_put_ee(mdev,e);
 			spin_unlock_irq(&mdev->ee_lock);
@@ -1106,7 +1106,7 @@
 			WARN("Concurrent write! [W AFTERWARDS] sec=%lu\n",
 			     (unsigned long)sector);
 			if( wait_event_interruptible(mdev->cstate_wait,
-			      !tl_have_write(mdev,sector,data_size|
+			      !req_have_write(mdev,sector,data_size|
 				            TLHW_FLAG_RECVW|TLHW_FLAG_SENT))) {
 				spin_lock_irq(&mdev->ee_lock);
 				drbd_put_ee(mdev,e);