[DRBD-cvs] svn commit by phil - r2298 - in trunk: . drbd - * Mark
requests as they are moved from the queue to the
drbd-cvs at lists.linbit.com
drbd-cvs at lists.linbit.com
Wed Jul 26 12:23:36 CEST 2006
Author: phil
Date: 2006-07-26 12:23:35 +0200 (Wed, 26 Jul 2006)
New Revision: 2298
Modified:
trunk/ROADMAP
trunk/drbd/drbd_int.h
trunk/drbd/drbd_main.c
trunk/drbd/drbd_worker.c
Log:
* Mark requests as they are moved from the queue to the socket.
The tl_clear() function should only decrease mdev->ap_pending_cnt
for requests that where already on the socket...
( The worker should not exit upon loss of connection.
Done by previous patch )
The bug I want to fix:
-----------------------
The client (kjournald) hangs in drbd_al_begin_io(). It waits for
the completion of w_al_write_transaction(). That will not happen
since the worker exited alrady.
The receiver hangs in drbd_disconnect(), actuall at this line:
wait_event( mdev->cstate_wait, atomic_read(&mdev->ap_pending_cnt)==0 );
BTW, mdev->ap_pending_cnt is -1.
The counter could got below zero, since the requests gets onto the
TL first, and then it gets queued on the worker's queue.
The the disconnect code comes in, and decreases mdev->ap_pending_cnt
for each entry in the TL.
The entry on the worker's queue is still there and would increase
the counter again, but the worker is dead by now.
Modified: trunk/ROADMAP
===================================================================
--- trunk/ROADMAP 2006-07-26 09:25:48 UTC (rev 2297)
+++ trunk/ROADMAP 2006-07-26 10:23:35 UTC (rev 2298)
@@ -842,36 +842,3 @@
6 Have protocol version 74 available in drbd-0.8, to allow rolling
upgrades
-
-
-
-Currently known bug(s):
------------------------
-The client (kjournald) hangs in drbd_al_begin_io(). It waits for
-the completion of w_al_write_transaction(). That will not happen
-since the worker exited alrady.
-
-The receiver hangs in drbd_disconnect(), actuall at this line:
-wait_event( mdev->cstate_wait, atomic_read(&mdev->ap_pending_cnt)==0 );
-
-BTW, mdev->ap_pending_cnt is -1.
-
-The counter could got below zero, since the requests gets onto the
-TL first, and then it gets queued on the worker's queue.
-
-The the disconnect code comes in, and decreases mdev->ap_pending_cnt
-for each entry in the TL.
-
-The entry on the worker's queue is still there and would increase
-the counter again, but the worker is dead by now.
-
-Solution:
-
- * Mark requests as they are moved from the queue to the socket.
- The tl_clear() function should only decrease mdev->ap_pending_cnt
- for requests that where already on the socket...
- * The worker should not exit upon loss of connection.
-
-
-
-
Modified: trunk/drbd/drbd_int.h
===================================================================
--- trunk/drbd/drbd_int.h 2006-07-26 09:25:48 UTC (rev 2297)
+++ trunk/drbd/drbd_int.h 2006-07-26 10:23:35 UTC (rev 2298)
@@ -231,11 +231,11 @@
/* these defines should go into blkdev.h
(if it will be ever includet into linus' linux) */
#define RQ_DRBD_NOTHING 0x0001
-#define RQ_DRBD_SENT 0x0010
-#define RQ_DRBD_LOCAL 0x0020
-#define RQ_DRBD_DONE 0x0030
-#define RQ_DRBD_IN_TL 0x0040
-#define RQ_DRBD_RECVW 0x0080
+#define RQ_DRBD_SENT 0x0010 // We got an ack
+#define RQ_DRBD_LOCAL 0x0020 // We wrote it to the local disk
+#define RQ_DRBD_DONE 0x0030 // We are done ;)
+#define RQ_DRBD_IN_TL 0x0040 // Set when it is in the TL
+#define RQ_DRBD_ON_WIRE 0x0080 // Set as soon as it is on the socket...
/* drbd_meta-data.c (still in drbd_main.c) */
#define DRBD_MD_MAGIC (DRBD_MAGIC+4) // 4th incarnation of the disk layout.
Modified: trunk/drbd/drbd_main.c
===================================================================
--- trunk/drbd/drbd_main.c 2006-07-26 09:25:48 UTC (rev 2297)
+++ trunk/drbd/drbd_main.c 2006-07-26 10:23:35 UTC (rev 2298)
@@ -337,8 +337,6 @@
hlist_del(&req->colision);
// req->barrier->n_req--; // Barrier migh be free'ed already!
- if( req->rq_status & RQ_DRBD_RECVW ) wake_up(&mdev->cstate_wait);
-
spin_unlock_irqrestore(&mdev->tl_lock,flags);
return r;
}
@@ -365,9 +363,13 @@
// bi_size and bi_sector are modified in bio_endio!
sector = drbd_req_get_sector(r);
size = drbd_req_get_size(r);
+
+ if( r->rq_status & RQ_DRBD_ON_WIRE &&
+ mdev->net_conf->wire_protocol != DRBD_PROT_A ) {
+ dec_ap_pending(mdev);
+ }
+
if( !(r->rq_status & RQ_DRBD_SENT) ) {
- if(mdev->net_conf->wire_protocol != DRBD_PROT_A )
- dec_ap_pending(mdev);
drbd_end_req(r,RQ_DRBD_SENT,ERF_NOTLD|1, sector);
goto mark;
}
@@ -1702,6 +1704,10 @@
set_bit(UNPLUG_REMOTE,&mdev->flags);
ok = sizeof(p) == drbd_send(mdev,mdev->data.socket,&p,sizeof(p),MSG_MORE);
if(ok) {
+ spin_lock_irq(&mdev->req_lock);
+ req->rq_status |= RQ_DRBD_ON_WIRE;
+ spin_unlock_irq(&mdev->req_lock);
+
if(mdev->net_conf->wire_protocol == DRBD_PROT_A) {
ok = _drbd_send_bio(mdev,req->master_bio);
} else {
Modified: trunk/drbd/drbd_worker.c
===================================================================
--- trunk/drbd/drbd_worker.c 2006-07-26 09:25:48 UTC (rev 2297)
+++ trunk/drbd/drbd_worker.c 2006-07-26 10:23:35 UTC (rev 2298)
@@ -561,21 +561,24 @@
drbd_request_t *req = (drbd_request_t *)w;
int ok;
- inc_ap_pending(mdev); // Right here, since tl_clear() will decrease it
-
if (unlikely(cancel)) {
/* Nothing to do, here. tl_clear() does the work. */
return 1;
}
ok = drbd_send_dblock(mdev,req);
- if (!ok) {
+ if (ok) {
+ inc_ap_pending(mdev);
+
+ if(mdev->net_conf->wire_protocol == DRBD_PROT_A) {
+ dec_ap_pending(mdev);
+ drbd_end_req(req, RQ_DRBD_SENT, 1,
+ drbd_req_get_sector(req));
+ }
+ } else {
if (mdev->state.conn >= Connected)
drbd_force_state(mdev,NS(conn,NetworkFailure));
drbd_thread_restart_nowait(&mdev->receiver);
- } else if(mdev->net_conf->wire_protocol == DRBD_PROT_A) {
- dec_ap_pending(mdev);
- drbd_end_req(req, RQ_DRBD_SENT, 1, drbd_req_get_sector(req));
}
return ok;
More information about the drbd-cvs
mailing list