[DRBD-cvs] r1741 - in trunk: . drbd
svn at svn.drbd.org
svn at svn.drbd.org
Sun Jan 30 23:04:18 CET 2005
Author: phil
Date: 2005-01-30 23:04:15 +0100 (Sun, 30 Jan 2005)
New Revision: 1741
Modified:
trunk/ROADMAP
trunk/drbd/drbd_int.h
trunk/drbd/drbd_main.c
trunk/drbd/drbd_receiver.c
Log:
Slowly realizing the whole scope of the problem...
Modified: trunk/ROADMAP
===================================================================
--- trunk/ROADMAP 2005-01-28 15:02:23 UTC (rev 1740)
+++ trunk/ROADMAP 2005-01-30 22:04:15 UTC (rev 1741)
@@ -262,7 +262,8 @@
7. An data packet overtakes an ACK packet on the network.
Although this case is quite unlikely, we have to take int into
- account.
+ account. From N2's point of fiew this looks a lot like case 4,
+ but N2 should not delete the data packet now!
Proposed solution
@@ -279,19 +280,26 @@
* If the sequence number of the data packet is higher than
last_seq+1 sleep until last_seq+1 == seq_num(data packet)
+ [needed to satisfy example case 7]
1. If the packet's sequence number is on the discard list,
- simply drop it.
+ simply drop it.
+ [ ex.c. 3]
2. Do we have a concurrent request? (i.e. Do I have a request
to the same block in my transfer log.) If not -> write now.
+ [ default ]
3. Have I already got an ACK packet for the concurrent
request ? (Has the request the RQ_DRBD_SENT bit already set)
If yes -> write the data from the data packet afterwards.
+ [ ex.c. 6]
4. Do I have the "discard-concurrent-write-flag" ?
If yes -> discard the data packet.
If no -> Write data from the data packet afterwards and set
the RQ_DRBD_SENT bit in the request object ( Since
- will will not get an ACK from our peer )
+ will will not get an ACK from our peer). Mark the
+ ee to prepend the ACK packet with a discard info
+ packet.
+ [ ex.c. *]
The algorithm which is performed upon the reception of an
ACK packet [drbd_asender]
@@ -313,8 +321,8 @@
to find IO operations starting in the same 4k block of
data quickly. -> With two lookups the hash table we can
find any concurrent access.
- 70% DONE ; Implement real overlap check, Implement discard info
- Packets. Look for example case 5.
+ 70% DONE ; Implement discard info packets. Code an equivalent
+ hash table for EEs, to solve example case 5.
10 Change Sync-groups to sync-after
@@ -323,7 +331,7 @@
are not flexible enough to cover all real world scenarios.
E.g. Two physical disks should be mirrored with DRBD. On one
- of the disks there is only a single partition, while the
+svn d of the disks there is only a single partition, while the
other one is divided into many (e.g. 4 smaller) partitions.
One would want to sync the big one in parallel to the
4 small ones. While the resync process of the 4 small
@@ -365,6 +373,11 @@
/dev/mapper/control
0% DONE
+15 Accept BIOs bigger than one page, probabely up to 64k (16 pages)
+ would be a good choce. When this is done make the bits in the
+ bitmap to account for more then 4k e.g. 64k
+ 0% DONE
+
plus-banches:
----------------------
Modified: trunk/drbd/drbd_int.h
===================================================================
--- trunk/drbd/drbd_int.h 2005-01-28 15:02:23 UTC (rev 1740)
+++ trunk/drbd/drbd_int.h 2005-01-30 22:04:15 UTC (rev 1741)
@@ -788,7 +788,7 @@
extern int tl_verify(drbd_dev *mdev, drbd_request_t * item, sector_t sector);
#define TLHW_FLAG_SENT 0x10000000
#define TLHW_FLAG_RECVW 0x20000000
-extern int tl_have_write(drbd_dev *mdev, sector_t sector, int size_n_flags);
+extern int req_have_write(drbd_dev *mdev, sector_t sector, int size_n_flags);
extern void drbd_free_sock(drbd_dev *mdev);
extern int drbd_send(drbd_dev *mdev, struct socket *sock,
void* buf, size_t size, unsigned msg_flags);
@@ -936,6 +936,11 @@
( (MD_RESERVED_SIZE*2LL - MD_BM_OFFSET) * (1LL<<(BM_EXT_SIZE_B-9)) )
#endif
+/* Sector shift value for hash functions for tl_hash table and ee_hash
+ table. A value of 3 makes all IOs in on 4K block to make to the same
+ slot of the hash table. */
+#define HT_SHIFT 3
+
extern int drbd_bm_init (drbd_dev *mdev);
extern int drbd_bm_resize (drbd_dev *mdev, sector_t sectors);
extern void drbd_bm_cleanup (drbd_dev *mdev);
Modified: trunk/drbd/drbd_main.c
===================================================================
--- trunk/drbd/drbd_main.c 2005-01-28 15:02:23 UTC (rev 1740)
+++ trunk/drbd/drbd_main.c 2005-01-30 22:04:15 UTC (rev 1741)
@@ -164,8 +164,7 @@
STATIC unsigned int tl_hash_fn(drbd_dev *mdev, sector_t sector)
{
- // map sectors in the same 4k block to the same hash key.
- return (sector>>3) % mdev->tl_hash_s;
+ return (sector>>HT_SHIFT) % mdev->tl_hash_s;
}
@@ -283,41 +282,6 @@
return rv;
}
-/* Return values:
- *
- * 0 ... no conflicting write
- * 1 ... a conflicting write, have not got ack by now.
- * 2 ... a conflicting write, have got also got ack.
- */
-int tl_have_write(drbd_dev *mdev, sector_t sector, int size_n_flags)
-{
- // PRE TODO: Real overlap check... using size etc...
- struct hlist_head *slot = mdev->tl_hash + tl_hash_fn(mdev,sector);
- struct hlist_node *n;
- drbd_request_t * i;
- int rv=0;
-
- spin_lock_irq(&mdev->tl_lock);
-
- hlist_for_each_entry(i, n, slot, colision) {
- if (drbd_req_get_sector(i) == sector) {
- rv=1;
- if( i->rq_status & RQ_DRBD_SENT ) rv++;
- if(size_n_flags & TLHW_FLAG_SENT) {
- i->rq_status |= RQ_DRBD_SENT;
- }
- if(size_n_flags & TLHW_FLAG_RECVW) {
- i->rq_status |= RQ_DRBD_RECVW;
- }
- break;
- }
- }
-
- spin_unlock_irq(&mdev->tl_lock);
-
- return rv;
-}
-
/* tl_dependence reports if this sector was present in the current
epoch.
As side effect it clears also the pointer to the request if it
@@ -396,6 +360,63 @@
}
}
+STATIC int overlaps(sector_t s1, int l1, sector_t s2, int l2)
+{
+ return !( ( s1 + (l1>>9) <= s2 ) || ( s1 >= s2 + (l2>>9) ) );
+}
+
+/* Return values:
+ *
+ * 0 ... no conflicting write
+ * 1 ... a conflicting write, have not got ack by now.
+ * 2 ... a conflicting write, have got also got ack.
+ */
+int req_have_write(drbd_dev *mdev, sector_t sector, int size_n_flags)
+{
+ struct hlist_head *slot;
+ struct hlist_node *n;
+ drbd_request_t * req;
+ int size = size_n_flags & ~(TLHW_FLAG_SENT|TLHW_FLAG_RECVW);
+ int i, rv=0;
+
+ D_ASSERT(size <= 1<<(HT_SHIFT+9) );
+
+ spin_lock_irq(&mdev->tl_lock);
+
+ for(i=-1;i<=1;i++ ) {
+ slot = mdev->tl_hash + tl_hash_fn(mdev,
+ sector + i*(1<<(HT_SHIFT)));
+ hlist_for_each_entry(req, n, slot, colision) {
+ if( overlaps(drbd_req_get_sector(req),
+ drbd_req_get_size(req),
+ sector,
+ size) ) {
+ rv=1;
+ if( req->rq_status & RQ_DRBD_SENT ) rv++;
+ if( size_n_flags & TLHW_FLAG_SENT ) {
+ req->rq_status |= RQ_DRBD_SENT;
+ }
+ if( size_n_flags & TLHW_FLAG_RECVW ) {
+ req->rq_status |= RQ_DRBD_RECVW;
+ }
+ goto out;
+ } //overlaps()
+ } // hlist_for_each_entry()
+ }
+
+ // PRE TODO: insert ee onto ee_hash_table here...
+ out:
+ spin_unlock_irq(&mdev->tl_lock);
+
+ return rv;
+}
+
+int ee_have_write(drbd_dev *mdev, drbd_request_t * req)
+{
+ // PRE TODO: same as above for a request agains our acive EEs.
+ return 0;
+}
+
/**
* drbd_io_error: Handles the on_io_error setting, should be called in the
* unlikely(!drbd_bio_uptodate(e->bio)) case from kernel thread context.
Modified: trunk/drbd/drbd_receiver.c
===================================================================
--- trunk/drbd/drbd_receiver.c 2005-01-28 15:02:23 UTC (rev 1740)
+++ trunk/drbd/drbd_receiver.c 2005-01-30 22:04:15 UTC (rev 1741)
@@ -1081,13 +1081,13 @@
return TRUE;
}
- switch( tl_have_write(mdev, sector, data_size) ) {
+ switch( req_have_write(mdev, sector, data_size) ) {
case 2: /* Conflicting write, got ACK */
/* write afterwards ...*/
WARN("Concurrent write! [W AFTERWARDS] sec=%lu\n",
(unsigned long)sector);
if( wait_event_interruptible(mdev->cstate_wait,
- !tl_have_write(mdev,sector,data_size|TLHW_FLAG_RECVW))) {
+ !req_have_write(mdev,sector,data_size|TLHW_FLAG_RECVW))) {
spin_lock_irq(&mdev->ee_lock);
drbd_put_ee(mdev,e);
spin_unlock_irq(&mdev->ee_lock);
@@ -1106,7 +1106,7 @@
WARN("Concurrent write! [W AFTERWARDS] sec=%lu\n",
(unsigned long)sector);
if( wait_event_interruptible(mdev->cstate_wait,
- !tl_have_write(mdev,sector,data_size|
+ !req_have_write(mdev,sector,data_size|
TLHW_FLAG_RECVW|TLHW_FLAG_SENT))) {
spin_lock_irq(&mdev->ee_lock);
drbd_put_ee(mdev,e);
More information about the drbd-cvs
mailing list