[DRBD-cvs] svn commit by phil - r2590 - in trunk/drbd: . linux - *
DRBD could not cope with failed READA requests from t
drbd-cvs at lists.linbit.com
drbd-cvs at lists.linbit.com
Mon Nov 6 12:12:33 CET 2006
Author: phil
Date: 2006-11-06 12:12:31 +0100 (Mon, 06 Nov 2006)
New Revision: 2590
Modified:
trunk/drbd/drbd_int.h
trunk/drbd/drbd_main.c
trunk/drbd/drbd_receiver.c
trunk/drbd/drbd_req.c
trunk/drbd/drbd_req.h
trunk/drbd/drbd_worker.c
trunk/drbd/linux/drbd_config.h
Log:
* DRBD could not cope with failed READA requests from the backing
device. Fixed that.
* Added the possibility to simulate the failure of READA requests.
* Added an error parameter _req_mod() and _req_may_be_done() to pass
errnos through.
Modified: trunk/drbd/drbd_int.h
===================================================================
--- trunk/drbd/drbd_int.h 2006-11-04 11:13:58 UTC (rev 2589)
+++ trunk/drbd/drbd_int.h 2006-11-06 11:12:31 UTC (rev 2590)
@@ -200,6 +200,7 @@
DRBD_FAULT_RS_RD,
DRBD_FAULT_DT_WR,
DRBD_FAULT_DT_RD,
+ DRBD_FAULT_DT_RA, // READA = Read ahead
DRBD_FAULT_MAX,
};
Modified: trunk/drbd/drbd_main.c
===================================================================
--- trunk/drbd/drbd_main.c 2006-11-04 11:13:58 UTC (rev 2589)
+++ trunk/drbd/drbd_main.c 2006-11-06 11:12:31 UTC (rev 2590)
@@ -230,7 +230,7 @@
* master_bio's could already been completed. */
list_for_each_safe(le, tle, &b->requests) {
r = list_entry(le, struct drbd_request,tl_requests);
- _req_mod(r, barrier_acked);
+ _req_mod(r, barrier_acked, 0);
}
list_del(&b->requests);
/* There could be requests on the list waiting for completion
@@ -273,7 +273,7 @@
list_for_each_safe(le, tle, &b->requests) {
r = list_entry(le, struct drbd_request,tl_requests);
- _req_mod(r, connection_lost_while_pending);
+ _req_mod(r, connection_lost_while_pending, 0);
}
tmp = b->next;
@@ -2844,6 +2844,7 @@
"Resync read",
"Data write",
"Data read",
+ "Data read ahead",
};
return (type < DRBD_FAULT_MAX)? _faults[type] : "**Unknown**";
Modified: trunk/drbd/drbd_receiver.c
===================================================================
--- trunk/drbd/drbd_receiver.c 2006-11-04 11:13:58 UTC (rev 2589)
+++ trunk/drbd/drbd_receiver.c 2006-11-06 11:12:31 UTC (rev 2590)
@@ -1108,7 +1108,7 @@
* still no race with drbd_fail_pending_reads */
ok = recv_dless_read(mdev,req,sector,data_size);
- if (ok) req_mod(req, data_received);
+ if (ok) req_mod(req, data_received, 0);
/* else: nothing. handled from drbd_disconnect...
* I don't think we may complete this just yet
* in case we are "on-disconnect: freeze" */
@@ -2583,7 +2583,7 @@
req = list_entry(le, drbd_request_t, w.list);
list_del(le);
- _req_mod(req, connection_lost_while_pending);
+ _req_mod(req, connection_lost_while_pending, 0);
}
spin_unlock_irq(&mdev->req_lock);
}
@@ -3096,18 +3096,18 @@
switch (be16_to_cpu(h->command)) {
case WriteAck:
D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
- _req_mod(req,write_acked_by_peer);
+ _req_mod(req,write_acked_by_peer,0);
break;
case RecvAck:
D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_A);
- _req_mod(req,recv_acked_by_peer);
+ _req_mod(req,recv_acked_by_peer,0);
break;
case DiscardAck:
D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
ALERT("Got DiscardAck packet %llus +%u!"
" DRBD is not a random data generator!\n",
(unsigned long long)req->sector, req->size);
- _req_mod(req, conflict_discarded_by_peer);
+ _req_mod(req, conflict_discarded_by_peer, 0);
break;
default:
D_ASSERT(0);
@@ -3168,7 +3168,7 @@
ERR("Got NegDReply; Sector %llus, len %u; Fail original request.\n",
(unsigned long long)sector,be32_to_cpu(p->blksize));
- _req_mod(req, neg_acked);
+ _req_mod(req, neg_acked, 0);
spin_unlock_irq(&mdev->req_lock);
// warning LGE "ugly and wrong"
Modified: trunk/drbd/drbd_req.c
===================================================================
--- trunk/drbd/drbd_req.c 2006-11-04 11:13:58 UTC (rev 2589)
+++ trunk/drbd/drbd_req.c 2006-11-06 11:12:31 UTC (rev 2590)
@@ -103,7 +103,7 @@
#define print_req_mod(T,W)
#endif
-void _req_may_be_done(drbd_request_t *req)
+void _req_may_be_done(drbd_request_t *req, int error)
{
const unsigned long s = req->rq_state;
drbd_dev *mdev = req->mdev;
@@ -207,7 +207,8 @@
* then again, if it is a READ, it is not in the TL at all.
* is it still leagal to complete a READ during freeze? */
dump_bio(mdev,req->master_bio,1);
- bio_endio(req->master_bio, req->master_bio->bi_size, ok ? 0 : -EIO);
+ bio_endio(req->master_bio, req->master_bio->bi_size,
+ ok ? 0 : ( error ? error : -EIO ) );
req->master_bio = NULL;
dec_ap_bio(mdev);
} else {
@@ -389,11 +390,15 @@
* Though I think it is likely that we break this again into many
* static inline void _req_mod_ ## what (req) ...
*/
-void _req_mod(drbd_request_t *req, drbd_req_event_t what)
+void _req_mod(drbd_request_t *req, drbd_req_event_t what, int error)
{
drbd_dev *mdev = req->mdev;
MUST_HOLD(&mdev->req_lock);
+ if (error && ( bio_rw(req->master_bio) != READA ) ) {
+ ERR("got an _req_mod() errno of %d\n",error);
+ }
+
print_req_mod(req,what);
switch(what) {
@@ -437,7 +442,7 @@
req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK);
req->rq_state &= ~RQ_LOCAL_PENDING;
- _req_may_be_done(req);
+ _req_may_be_done(req,error);
break;
case write_completed_with_error:
@@ -452,20 +457,24 @@
/* and now: check how to handle local io error.
* FIXME see comment below in read_completed_with_error */
__drbd_chk_io_error(mdev,FALSE);
- _req_may_be_done(req);
+ _req_may_be_done(req,error);
break;
case read_completed_with_error:
- drbd_set_out_of_sync(mdev,req->sector,req->size);
+ if (bio_rw(req->master_bio) != READA) {
+ drbd_set_out_of_sync(mdev,req->sector,req->size);
+ }
req->rq_state |= RQ_LOCAL_COMPLETED;
req->rq_state &= ~RQ_LOCAL_PENDING;
bio_put(req->private_bio);
req->private_bio = NULL;
dec_local(mdev);
- if (bio_rw(req->master_bio) == READA)
+ if (bio_rw(req->master_bio) == READA) {
/* it is legal to fail READA */
+ _req_may_be_done(req,error);
break;
+ }
/* else */
ALERT("Local READ failed sec=%llus size=%u\n",
(unsigned long long)req->sector, req->size);
@@ -558,7 +567,7 @@
req->rq_state &= ~RQ_NET_QUEUED;
/* if we did it right, tl_clear should be scheduled only after this,
* so this should not be necessary! */
- _req_may_be_done(req);
+ _req_may_be_done(req,error);
break;
case handed_over_to_network:
@@ -589,7 +598,7 @@
* "completed_ok" events came in, once we return from
* _drbd_send_zc_bio (drbd_send_dblock), we have to check
* whether it is done already, and end it. */
- _req_may_be_done(req);
+ _req_may_be_done(req,error);
break;
case connection_lost_while_pending:
@@ -602,7 +611,7 @@
* it will be canceled soon.
* FIXME we should change the code so this can not happen. */
if (!(req->rq_state & RQ_NET_QUEUED))
- _req_may_be_done(req);
+ _req_may_be_done(req,error);
break;
case conflict_discarded_by_peer:
@@ -621,7 +630,7 @@
dec_ap_pending(mdev);
req->rq_state &= ~RQ_NET_PENDING;
if (req->rq_state & RQ_NET_SENT)
- _req_may_be_done(req);
+ _req_may_be_done(req,error);
/* else: done by handed_over_to_network */
break;
@@ -632,7 +641,7 @@
/* FIXME THINK! is it DONE now, or is it not? */
req->rq_state |= RQ_NET_DONE;
if (req->rq_state & RQ_NET_SENT)
- _req_may_be_done(req);
+ _req_may_be_done(req,error);
/* else: done by handed_over_to_network */
break;
@@ -664,7 +673,7 @@
}
D_ASSERT(req->rq_state & RQ_NET_SENT);
req->rq_state |= RQ_NET_DONE;
- _req_may_be_done(req);
+ _req_may_be_done(req,error);
break;
case data_received:
@@ -675,7 +684,7 @@
/* can it happen that we receive the DataReply
* before the send DataRequest function returns? */
if (req->rq_state & RQ_NET_SENT)
- _req_may_be_done(req);
+ _req_may_be_done(req,error);
/* else: done by handed_over_to_network */
break;
};
@@ -924,8 +933,8 @@
/* mark them early for readability.
* this just sets some state flags. */
- if (remote) _req_mod(req, to_be_send);
- if (local) _req_mod(req, to_be_submitted);
+ if (remote) _req_mod(req, to_be_send, 0);
+ if (local) _req_mod(req, to_be_submitted, 0);
/* check this request on the colison detection hash tables.
* if we have a conflict, just complete it here.
@@ -961,8 +970,8 @@
* or READ, and no local disk,
* or READ, but not in sync.
*/
- if (rw == WRITE) _req_mod(req,queue_for_net_write);
- else _req_mod(req,queue_for_net_read);
+ if (rw == WRITE) _req_mod(req,queue_for_net_write, 0);
+ else _req_mod(req,queue_for_net_read, 0);
}
spin_unlock_irq(&mdev->req_lock);
if (b) kfree(b); /* if someone else has beaten us to it... */
@@ -972,7 +981,9 @@
* was not detached below us? */
req->private_bio->bi_bdev = mdev->bc->backing_bdev;
- if (FAULT_ACTIVE(rw==WRITE? DRBD_FAULT_DT_WR : DRBD_FAULT_DT_RD))
+ if (FAULT_ACTIVE(rw==WRITE ? DRBD_FAULT_DT_WR :
+ ( rw==READ ? DRBD_FAULT_DT_RD :
+ DRBD_FAULT_DT_RA ) ))
bio_endio(req->private_bio, req->private_bio->bi_size, -EIO);
else
generic_make_request(req->private_bio);
Modified: trunk/drbd/drbd_req.h
===================================================================
--- trunk/drbd/drbd_req.h 2006-11-04 11:13:58 UTC (rev 2589)
+++ trunk/drbd/drbd_req.h 2006-11-06 11:12:31 UTC (rev 2590)
@@ -291,15 +291,15 @@
/* aparently too large to be inlined...
* moved to drbd_req.c */
-extern void _req_may_be_done(drbd_request_t *req);
-extern void _req_mod(drbd_request_t *req, drbd_req_event_t what);
+extern void _req_may_be_done(drbd_request_t *req, int error);
+extern void _req_mod(drbd_request_t *req, drbd_req_event_t what, int error);
/* If you need it irqsave, do it your self! */
-static inline void req_mod(drbd_request_t *req, drbd_req_event_t what)
+static inline void req_mod(drbd_request_t *req, drbd_req_event_t what, int error)
{
drbd_dev *mdev = req->mdev;
spin_lock_irq(&mdev->req_lock);
- _req_mod(req,what);
+ _req_mod(req,what,error);
spin_unlock_irq(&mdev->req_lock);
}
#endif
Modified: trunk/drbd/drbd_worker.c
===================================================================
--- trunk/drbd/drbd_worker.c 2006-11-04 11:13:58 UTC (rev 2589)
+++ trunk/drbd/drbd_worker.c 2006-11-06 11:12:31 UTC (rev 2590)
@@ -165,8 +165,6 @@
// see above
if (bio->bi_size) return 1;
- if(error) DUMPI(error);
-
/* to avoid recursion in _req_mod */
what = error
? (bio_data_dir(bio) == WRITE)
@@ -174,7 +172,7 @@
: read_completed_with_error
: completed_ok;
spin_lock_irqsave(&mdev->req_lock,flags);
- _req_mod(req, what);
+ _req_mod(req, what, error);
spin_unlock_irqrestore(&mdev->req_lock,flags);
return 0;
}
@@ -211,7 +209,7 @@
if ( cancel ||
mdev->state.conn < Connected ||
mdev->state.pdsk <= Inconsistent ) {
- _req_mod(req, send_canceled); /* FIXME freeze? ... */
+ _req_mod(req, send_canceled, 0); /* FIXME freeze? ... */
spin_unlock_irq(&mdev->req_lock);
drbd_khelper(mdev,"pri-on-incon-degr"); /* FIXME REALLY? */
ALERT("WE ARE LOST. Local IO failure, no peer.\n");
@@ -607,12 +605,12 @@
int ok;
if (unlikely(cancel)) {
- req_mod(req, send_canceled);
+ req_mod(req, send_canceled, 0);
return 1;
}
ok = drbd_send_dblock(mdev,req);
- req_mod(req,ok ? handed_over_to_network : send_failed);
+ req_mod(req,ok ? handed_over_to_network : send_failed, 0);
return ok;
}
@@ -626,7 +624,7 @@
int ok;
if (unlikely(cancel)) {
- req_mod(req, send_canceled);
+ req_mod(req, send_canceled, 0);
return 1;
}
@@ -634,7 +632,7 @@
(unsigned long)req);
if(ok) {
- req_mod(req, handed_over_to_network);
+ req_mod(req, handed_over_to_network, 0);
} else {
/* ?? we set Timeout or BrokenPipe in drbd_send() */
if (mdev->state.conn >= Connected)
Modified: trunk/drbd/linux/drbd_config.h
===================================================================
--- trunk/drbd/linux/drbd_config.h 2006-11-04 11:13:58 UTC (rev 2589)
+++ trunk/drbd/linux/drbd_config.h 2006-11-06 11:12:31 UTC (rev 2590)
@@ -50,7 +50,7 @@
// #define DRBD_DISABLE_SENDPAGE
// Enable fault insertion code
-//#define DRBD_ENABLE_FAULTS
+#define DRBD_ENABLE_FAULTS
// RedHat's 2.6.9 kernels have the gfp_t type. Mainline has this feature
// since 2.6.16. If you build for RedHat enable the line below.
More information about the drbd-cvs
mailing list