[DRBD-cvs] drbd by phil; * Replaced the "do_panic" option with on...
drbd-user@lists.linbit.com
drbd-user@lists.linbit.com
Wed, 11 Feb 2004 14:25:25 +0100 (CET)
DRBD CVS committal
Author : phil
Module : drbd
Dir : drbd/drbd
Modified Files:
Tag: rel-0_7-branch
drbd.h drbd_dsender.c drbd_fs.c drbd_int.h drbd_receiver.c
drbd_req-2.4.c
Log Message:
* Replaced the "do_panic" option with
on-io-error = { PassOn | Panic | Detach }
[ Untested; Unfinished. Need to rework the make_request funtion a bit,
and kick out the remapping of requests to finish this. ]
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd.h,v
retrieving revision 1.34.2.21
retrieving revision 1.34.2.22
diff -u -3 -r1.34.2.21 -r1.34.2.22
--- drbd.h 8 Feb 2004 12:26:13 -0000 1.34.2.21
+++ drbd.h 11 Feb 2004 13:25:20 -0000 1.34.2.22
@@ -55,10 +55,17 @@
:unix_mkname()).
*/
+enum io_error_handler {
+ PassOn,
+ Panic,
+ Detach
+};
+
+
struct disk_config {
IN int lower_device;
IN unsigned int disk_size;
- IN int do_panic; /* Panic on error upon LL_DEV */
+ IN enum io_error_handler on_io_error;
IN int meta_device;
IN int meta_index;
};
@@ -168,7 +175,7 @@
OUT int lower_device_major;
OUT int lower_device_minor;
OUT unsigned int disk_size_user;
- OUT int do_panic;
+ OUT enum io_error_handler on_io_error;
OUT Drbd_CState cstate;
OUT int meta_device_major;
OUT int meta_device_minor;
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/Attic/drbd_dsender.c,v
retrieving revision 1.1.2.72
retrieving revision 1.1.2.73
diff -u -3 -r1.1.2.72 -r1.1.2.73
--- drbd_dsender.c 9 Feb 2004 10:36:54 -0000 1.1.2.72
+++ drbd_dsender.c 11 Feb 2004 13:25:20 -0000 1.1.2.73
@@ -90,6 +90,7 @@
list_del(&e->w.list);
spin_unlock_irqrestore(&mdev->ee_lock,flags);
+ drbd_chk_io_error(mdev,!uptodate);
drbd_queue_work(mdev,&mdev->data.work,&e->w);
dec_local(mdev);
}
@@ -128,10 +129,7 @@
spin_unlock_irqrestore(&mdev->ee_lock,flags);
- if( mdev->do_panic && !uptodate) {
- drbd_panic(DEVICE_NAME": The lower-level device had an error.\n");
- }
-
+ drbd_chk_io_error(mdev,!uptodate);
wake_asender(mdev);
dec_local(mdev);
}
@@ -149,6 +147,7 @@
req = container_of(bh,struct drbd_request,private_bio);
PARANOIA_BUG_ON(!VALID_POINTER(req));
+ drbd_chk_io_error(mdev,!uptodate);
drbd_end_req(req, RQ_DRBD_WRITTEN, uptodate, drbd_req_get_sector(req));
drbd_al_complete_io(mdev,drbd_req_get_sector(req));
dec_local(mdev);
@@ -194,6 +193,7 @@
list_del(&e->w.list);
spin_unlock_irqrestore(&mdev->ee_lock,flags);
+ drbd_chk_io_error(mdev,error);
drbd_queue_work(mdev,&mdev->data.work,&e->w);
dec_local(mdev);
return 0;
@@ -230,10 +230,7 @@
spin_unlock_irqrestore(&mdev->ee_lock,flags);
- if( mdev->do_panic && error) {
- drbd_panic(DEVICE_NAME": The lower-level device had an error.\n");
- }
-
+ drbd_chk_io_error(mdev,error);
wake_asender(mdev);
dec_local(mdev);
return 0;
@@ -256,6 +253,7 @@
req = container_of(bio,struct drbd_request,private_bio);
PARANOIA_BUG_ON(!VALID_POINTER(req));
+ drbd_chk_io_error(mdev,error);
drbd_end_req(req, RQ_DRBD_WRITTEN, (error == 0), drbd_req_get_sector(req));
drbd_al_complete_io(mdev,drbd_req_get_sector(req));
dec_local(mdev);
@@ -375,8 +373,15 @@
struct Tl_epoch_entry *e = (struct Tl_epoch_entry*)w;
int ok;
- ok=drbd_send_block(mdev, DataReply, e);
- dec_unacked(mdev,HERE); // THINK unconditional?
+ if(likely(drbd_bio_uptodate(&e->private_bio))) {
+ ok=drbd_send_block(mdev, DataReply, e);
+ } else {
+ ok=drbd_send_ack(mdev,NegDReply,e);
+ ERR("Sending NegDReply. I guess it gets messy.\n");
+ drbd_io_error(mdev);
+ }
+
+ dec_unacked(mdev,HERE);
spin_lock_irq(&mdev->ee_lock);
drbd_put_ee(mdev,e);
@@ -392,9 +397,17 @@
int ok;
drbd_rs_complete_io(mdev,drbd_ee_get_sector(e));
- inc_rs_pending(mdev);
- ok=drbd_send_block(mdev, DataReply, e);
- dec_unacked(mdev,HERE); // THINK unconditional?
+
+ if(likely(drbd_bio_uptodate(&e->private_bio))) {
+ inc_rs_pending(mdev);
+ ok=drbd_send_block(mdev, DataReply, e);
+ } else {
+ ok=drbd_send_ack(mdev,NegDReply,e);
+ ERR("Sending NegDReply. I guess it gets messy.\n");
+ drbd_io_error(mdev);
+ }
+
+ dec_unacked(mdev,HERE);
spin_lock_irq(&mdev->ee_lock);
drbd_put_ee(mdev,e);
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_fs.c,v
retrieving revision 1.28.2.71
retrieving revision 1.28.2.72
diff -u -3 -r1.28.2.71 -r1.28.2.72
--- drbd_fs.c 9 Feb 2004 10:36:54 -0000 1.28.2.71
+++ drbd_fs.c 11 Feb 2004 13:25:20 -0000 1.28.2.72
@@ -298,7 +298,7 @@
mdev->backing_bdev = bdev;
mdev->lo_file = filp;
mdev->lo_usize = new_conf.disk_size;
- mdev->do_panic = new_conf.do_panic;
+ mdev->on_io_error = new_conf.on_io_error;
mdev->send_cnt = 0;
mdev->recv_cnt = 0;
@@ -325,6 +325,7 @@
})
#undef min_not_zero
+ clear_bit(SENT_DISK_FAILURE,&mdev->flags);
set_bit(MD_IO_ALLOWED,&mdev->flags);
i = drbd_md_read(mdev);
drbd_determin_dev_size(mdev);
@@ -406,7 +407,7 @@
cn.cstate=mdev->cstate;
cn.disk_size_user=mdev->lo_usize;
cn.meta_index=mdev->md_index;
- cn.do_panic=mdev->do_panic;
+ cn.on_io_error=mdev->on_io_error;
memcpy(&cn.nconf, &mdev->conf, sizeof(struct net_config));
memcpy(&cn.sconf, &mdev->sync_conf, sizeof(struct syncer_config));
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_int.h,v
retrieving revision 1.58.2.120
retrieving revision 1.58.2.121
diff -u -3 -r1.58.2.120 -r1.58.2.121
--- drbd_int.h 9 Feb 2004 10:36:54 -0000 1.58.2.120
+++ drbd_int.h 11 Feb 2004 13:25:20 -0000 1.58.2.121
@@ -237,38 +237,6 @@
#define RQ_DRBD_DONE 0x0030
#define RQ_DRBD_READ 0x0040
-#define DRBD_PANIC 2
-/* do_panic alternatives:
- * 0: panic();
- * 1: machine_halt; FIXME does not work;
- * 2: prink(EMERG ), plus flag to fail all eventual drbd IO, plus panic()
- */
-
-extern volatile int drbd_did_panic;
-
-#include <linux/reboot.h>
-
-#if DRBD_PANIC == 0
-#define drbd_panic(x...) panic(x)
-#elif DRBD_PANIC == 1
-#error "THIS DRBD_PANIC SETTING DOES NOT WORK (yet)"
-#define drbd_panic(x...) do { \
- printk(KERN_EMERG x); \
- notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL); \
- printk(KERN_EMERG "System halted.\n"); \
- machine_halt(); \
- do_exit(0); \
-} while (0)
-#else
-#define drbd_panic(x...) do { \
- printk(KERN_EMERG x); \
- drbd_did_panic = DRBD_MD_MAGIC; \
- smp_mb(); \
- panic(x); \
-} while (0)
-#endif
-#undef DRBD_PANIC
-
enum MetaDataFlags {
MDF_Consistent = 1,
MDF_PrimaryInd = 2,
@@ -577,6 +545,7 @@
#define PARTNER_DISKLESS 8
#define PROCESS_EE_RUNNING 9
#define MD_IO_ALLOWED 10
+#define SENT_DISK_FAILURE 11
struct BitMap {
unsigned long dev_size;
@@ -637,7 +606,7 @@
#endif
struct net_config conf;
struct syncer_config sync_conf;
- int do_panic;
+ enum io_error_handler on_io_error;
struct semaphore device_mutex;
struct drbd_socket data; // for data/barrier/cstate/parameter packets
struct drbd_socket meta; // for ping/ack (metadata) packets
@@ -944,6 +913,49 @@
#include "drbd_compat_wrappers.h"
+/**
+ * drbd_chk_io_error: Handles the on_io_error setting, should be called from
+ * all io completion handlers.
+ */
+static inline void drbd_chk_io_error(drbd_dev* mdev, int error)
+{
+ if (error) {
+ switch(mdev->on_io_error) {
+ case PassOn:
+ ERR("Ignoring local IO error!\n");
+ break;
+ case Panic:
+ set_bit(DISKLESS,&mdev->flags);
+ smp_mb();
+ panic(DEVICE_NAME" : IO error on backing device!\n");
+ break;
+ case Detach:
+ set_bit(DISKLESS,&mdev->flags);
+ smp_mb(); // Nack is sent in w_e handlers.
+ break;
+ }
+ }
+}
+
+/**
+ * drbd_io_error: Handles the on_io_error setting, should be called in the
+ * unlikely(!drbd_bio_uptodate(e->bio)) case from kernel thread context.
+ */
+static inline int drbd_io_error(drbd_dev* mdev)
+{
+ int ok=1;
+
+ if(mdev->on_io_error == Panic || mdev->on_io_error == Detach) {
+ if(!test_bit(SENT_DISK_FAILURE,&mdev->flags)) {
+ D_ASSERT(test_bit(DISKLESS,&mdev->flags));
+ ok = drbd_send_param(mdev,0);
+ set_bit(SENT_DISK_FAILURE,&mdev->flags);
+ WARN("Notified peer that my disk is broken.\n");
+ }
+ }
+
+ return ok;
+}
static inline int semaphore_is_locked(struct semaphore* s)
{
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_receiver.c,v
retrieving revision 1.97.2.107
retrieving revision 1.97.2.108
diff -u -3 -r1.97.2.107 -r1.97.2.108
--- drbd_receiver.c 9 Feb 2004 10:36:54 -0000 1.97.2.107
+++ drbd_receiver.c 11 Feb 2004 13:25:20 -0000 1.97.2.108
@@ -768,12 +768,19 @@
{
struct Tl_epoch_entry *e = (struct Tl_epoch_entry*)w;
sector_t sector = drbd_ee_get_sector(e);
+ int ok;
drbd_rs_complete_io(mdev,sector); // before set_in_sync() !
- drbd_set_in_sync(mdev, sector, drbd_ee_get_size(e));
- drbd_send_ack(mdev,WriteAck,e);
- dec_unacked(mdev,HERE); // FIXME unconditional ??
- return TRUE;
+ if(likely(drbd_bio_uptodate(&e->private_bio))) {
+ drbd_set_in_sync(mdev, sector, drbd_ee_get_size(e));
+ ok = drbd_send_ack(mdev,WriteAck,e);
+ } else {
+ ok = drbd_send_ack(mdev,NegAck,e);
+ ok&= drbd_io_error(mdev);
+ }
+
+ dec_unacked(mdev,HERE);
+ return ok;
}
int recv_resync_read(drbd_dev *mdev, struct Pending_read *pr,
@@ -928,17 +935,25 @@
STATIC int e_end_block(drbd_dev *mdev, struct drbd_work *w)
{
struct Tl_epoch_entry *e = (struct Tl_epoch_entry*)w;
- int ok=TRUE;
+ sector_t sector = drbd_ee_get_sector(e);
+ int ok=1;
mdev->epoch_size++;
if(mdev->conf.wire_protocol == DRBD_PROT_C) {
- if( mdev->cstate > Connected ) {
- drbd_set_in_sync(mdev
- , drbd_ee_get_sector(e)
- , drbd_ee_get_size(e));
+ if(likely(drbd_bio_uptodate(&e->private_bio))) {
+ ok=drbd_send_ack(mdev,WriteAck,e);
+ if(ok) drbd_set_in_sync(mdev,sector,drbd_ee_get_size(e));
+ } else {
+ ok = drbd_send_ack(mdev,NegAck,e);
+ ok&= drbd_io_error(mdev);
}
- ok=drbd_send_ack(mdev,WriteAck,e);
- dec_unacked(mdev,HERE); // FIXME unconditional ??
+ dec_unacked(mdev,HERE);
+
+ return ok;
+ }
+
+ if(unlikely(!drbd_bio_uptodate(&e->private_bio))) {
+ ok = drbd_io_error(mdev);
}
return ok;
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_req-2.4.c,v
retrieving revision 1.33.2.53
retrieving revision 1.33.2.54
diff -u -3 -r1.33.2.53 -r1.33.2.54
--- drbd_req-2.4.c 8 Feb 2004 19:53:19 -0000 1.33.2.53
+++ drbd_req-2.4.c 11 Feb 2004 13:25:20 -0000 1.33.2.54
@@ -82,11 +82,10 @@
drbd_set_in_sync(mdev,rsector,drbd_req_get_size(req));
}
+ /// TODO look at mdev->on_io_error
+ /// in the Detach case do not report it to the application.
drbd_bio_endio(req->master_bio,(req->rq_status & 0x0001));
- if( mdev->do_panic && !(req->rq_status & 0x0001) ) {
- drbd_panic(DEVICE_NAME": The lower-level device had an error.\n");
- }
INVALIDATE_MAGIC(req);
mempool_free(req,drbd_request_mempool);
@@ -286,6 +285,8 @@
if( rw == READ || rw == READA ) {
mdev->read_cnt += size >> 9;
dec_local(mdev); // FIXME TODO -> completion handler
+ /// TODO FIXME shoulbe be able to reissue the request to
+ /// the peer in case it fails locally.
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
bio->b_rdev = mdev->backing_bdev;
#else