Index: drbd-8.0.6/drbd/drbd_receiver.c =================================================================== --- drbd-8.0.6/drbd/drbd_receiver.c (revision 21229) +++ drbd-8.0.6/drbd/drbd_receiver.c (working copy) @@ -1094,6 +1094,8 @@ INFO("submit EE (RS)WRITE sec=%llus size=%u ee=%p\n", (unsigned long long)e->sector,e->size,e); ); + + dump_internal_bio("Sec", mdev, WRITE, e->private_bio, 0); drbd_generic_make_request(mdev,WRITE,DRBD_FAULT_RS_WR,e->private_bio); /* accounting done in endio */ @@ -1316,6 +1318,7 @@ struct Tl_epoch_entry *e; Drbd_Data_Packet *p = (Drbd_Data_Packet*)h; int header_size, data_size; + int rw = WRITE; unsigned int barrier_nr = 0; unsigned int epoch_size = 0; u32 dp_flags; @@ -1359,10 +1362,10 @@ dp_flags = be32_to_cpu(p->dp_flags); if ( dp_flags & DP_HARDBARRIER ) { - e->private_bio->bi_rw |= BIO_RW_BARRIER; + rw |= (1<private_bio->bi_rw |= BIO_RW_SYNC; + rw |= (1<flags |= EE_MAY_SET_IN_SYNC; @@ -1550,7 +1553,7 @@ } else { e->barrier_nr = mdev->next_barrier_nr; } - e->private_bio->bi_rw |= BIO_RW_BARRIER; + rw |= (1<next_barrier_nr = 0; } list_add(&e->w.list,&mdev->active_ee); @@ -1592,7 +1595,8 @@ (unsigned long long)e->sector,e->size,e); ); /* FIXME drbd_al_begin_io in case we have two primaries... */ - drbd_generic_make_request(mdev,WRITE,DRBD_FAULT_DT_WR,e->private_bio); + dump_internal_bio("Sec", mdev, rw, e->private_bio, 0); + drbd_generic_make_request(mdev,rw,DRBD_FAULT_DT_WR,e->private_bio); /* accounting done in endio */ maybe_kick_lo(mdev); @@ -1688,6 +1692,7 @@ (unsigned long long)e->sector,e->size,e); ); /* FIXME actually, it could be a READA originating from the peer ... */ + dump_internal_bio("Sec",mdev,READ,e->private_bio,0); drbd_generic_make_request(mdev,READ,fault_type,e->private_bio); maybe_kick_lo(mdev); Index: drbd-8.0.6/drbd/drbd_nl.c =================================================================== --- drbd-8.0.6/drbd/drbd_nl.c (revision 21229) +++ drbd-8.0.6/drbd/drbd_nl.c (working copy) @@ -1003,6 +1003,9 @@ dec_local(mdev); } + /* Force meta data to be written to ensure we determine if barriers are supported */ + drbd_md_mark_dirty(mdev); + drbd_md_sync(mdev); reply->ret_code = retcode; Index: drbd-8.0.6/drbd/drbd_actlog.c =================================================================== --- drbd-8.0.6/drbd/drbd_actlog.c (revision 21229) +++ drbd-8.0.6/drbd/drbd_actlog.c (working copy) @@ -39,32 +39,57 @@ struct page *page, sector_t sector, int rw, int size) { - struct bio *bio = bio_alloc(GFP_NOIO, 1); - struct completion event; + struct bio *bio; + struct drbd_md_io md_io; int ok; + md_io.mdev = mdev; + init_completion(&md_io.event); + md_io.error = 0; + +#ifdef BIO_RW_BARRIER + if (rw == WRITE && !(mdev->flags & NO_BARRIER_SUPP)) + rw |= (1<bi_bdev = bdev->md_bdev; bio->bi_sector = sector; ok = (bio_add_page(bio, page, size, 0) == size); if(!ok) goto out; - init_completion(&event); - bio->bi_private = &event; + bio->bi_private = &md_io; bio->bi_end_io = drbd_md_io_complete; + dump_internal_bio("Md",mdev,rw,bio,0); + if (FAULT_ACTIVE(mdev, (rw & WRITE)? DRBD_FAULT_MD_WR:DRBD_FAULT_MD_RD)) { bio->bi_rw |= rw; bio_endio(bio,bio->bi_size,-EIO); } else { -#ifdef BIO_RW_SYNC - submit_bio(rw | (1 << BIO_RW_SYNC), bio); -#else submit_bio(rw, bio); +#ifndef BIO_RW_SYNC drbd_blk_run_queue(bdev_get_queue(bdev->md_bdev)); #endif } - wait_for_completion(&event); + wait_for_completion(&md_io.event); ok = test_bit(BIO_UPTODATE, &bio->bi_flags); + +#ifdef BIO_RW_BARRIER + /* check for unsupported barrier op */ + if (unlikely(md_io.error == -EOPNOTSUPP && (rw & BIO_RW_BARRIER))) { + /* Try again with no barrier */ + WARN("Barriers not supported - disabling"); + mdev->flags |= NO_BARRIER_SUPP; + rw &= ~BIO_RW_BARRIER; + bio_put(bio); + goto retry; + } +#endif out: bio_put(bio); return ok; Index: drbd-8.0.6/drbd/drbd_worker.c =================================================================== --- drbd-8.0.6/drbd/drbd_worker.c (revision 21229) +++ drbd-8.0.6/drbd/drbd_worker.c (working copy) @@ -64,11 +64,17 @@ */ int drbd_md_io_complete(struct bio *bio, unsigned int bytes_done, int error) { + struct drbd_md_io *md_io; + if (bio->bi_size) return 1; - /* error parameter ignored: - * drbd_md_sync_page_io explicitly tests bio_uptodate(bio); */ - complete((struct completion*)bio->bi_private); + md_io = (struct drbd_md_io *)bio->bi_private; + + md_io->error = error; + + dump_internal_bio("Md", md_io->mdev, 0, bio, 1); + + complete(&md_io->event); return 0; } @@ -99,6 +105,8 @@ D_ASSERT(e->block_id != ID_VACANT); + dump_internal_bio("Sec", mdev, 0, bio, 1); + spin_lock_irqsave(&mdev->req_lock,flags); mdev->read_cnt += e->size >> 9; list_del(&e->w.list); @@ -145,6 +153,8 @@ D_ASSERT(e->block_id != ID_VACANT); + dump_internal_bio("Sec", mdev, 0, bio, 1); + spin_lock_irqsave(&mdev->req_lock,flags); mdev->writ_cnt += e->size >> 9; is_syncer_req = is_syncer_block_id(e->block_id); @@ -210,6 +220,8 @@ error = -EIO; } + dump_internal_bio("Pri", mdev, 0, bio, 1); + /* to avoid recursion in _req_mod */ what = error ? (bio_data_dir(bio) == WRITE) Index: drbd-8.0.6/drbd/drbd_main.c =================================================================== --- drbd-8.0.6/drbd/drbd_main.c (revision 21229) +++ drbd-8.0.6/drbd/drbd_main.c (working copy) @@ -1710,14 +1710,18 @@ p.seq_num = cpu_to_be32( req->seq_num = atomic_add_return(1,&mdev->packet_seq) ); dp_flags = 0; - if(req->master_bio->bi_rw & BIO_RW_BARRIER) { + + /* NOTE: no need to check if barriers supported here as we would + * not pass the test in make_request_common in that case + */ + if (bio_barrier(req->master_bio)) { dp_flags |= DP_HARDBARRIER; } - if(req->master_bio->bi_rw & BIO_RW_SYNC) { + if (bio_sync(req->master_bio)) { dp_flags |= DP_RW_SYNC; } - if(mdev->state.conn >= SyncSource && - mdev->state.conn <= PausedSyncT) { + if (mdev->state.conn >= SyncSource && + mdev->state.conn <= PausedSyncT) { dp_flags |= DP_MAY_SET_IN_SYNC; } @@ -3229,7 +3233,7 @@ // Debug routine to dump info about bio -void _dump_bio(drbd_dev *mdev, struct bio *bio, int complete) +void _dump_bio(const char *pfx, drbd_dev *mdev, int rw, struct bio *bio, int complete) { #ifdef CONFIG_LBD #define SECTOR_FORMAT "%Lx" @@ -3242,16 +3246,27 @@ char *faddr = (char *)(lowaddr); struct bio_vec *bvec; int segno; + int biorw, biobarrier, biosync; - INFO("%s %s Bio:%p - %soffset " SECTOR_FORMAT ", size %x\n", + rw |= bio->bi_rw; + + biorw = (rw & (RW_MASK|RWA_MASK)); + biobarrier = (rw & (1<>>", - bio_rw(bio)==WRITE?"Write":"Read",bio, + pfx, + biorw==WRITE?"Write":"Read", + biobarrier?":B":"", + biosync?":S":"", + bio, complete? (drbd_bio_uptodate(bio)? "Success, ":"Failed, ") : "", bio->bi_sector << SECTOR_SHIFT, bio->bi_size); if (trace_level >= TraceLvlMetrics && - ((bio_rw(bio) == WRITE) ^ complete) ) { + ((biorw == WRITE) ^ complete) ) { printk(KERN_DEBUG " ind page offset length\n"); __bio_for_each_segment(bvec, bio, segno, 0) { printk(KERN_DEBUG " [%d] %p %8.8x %8.8x\n",segno, Index: drbd-8.0.6/drbd/drbd_req.c =================================================================== --- drbd-8.0.6/drbd/drbd_req.c (revision 21229) +++ drbd-8.0.6/drbd/drbd_req.c (working copy) @@ -185,58 +185,58 @@ static void _about_to_complete_local_write(drbd_dev *mdev, drbd_request_t *req) { const unsigned long s = req->rq_state; - drbd_request_t *i; - struct Tl_epoch_entry *e; - struct hlist_node *n; - struct hlist_head *slot; + drbd_request_t *i; + struct Tl_epoch_entry *e; + struct hlist_node *n; + struct hlist_head *slot; - /* before we can signal completion to the upper layers, - * we may need to close the current epoch */ - if (req->epoch == mdev->newest_barrier->br_number) - set_bit(ISSUE_BARRIER,&mdev->flags); + /* before we can signal completion to the upper layers, + * we may need to close the current epoch */ + if (req->epoch == mdev->newest_barrier->br_number) + set_bit(ISSUE_BARRIER,&mdev->flags); - /* we need to do the conflict detection stuff, - * if we have the ee_hash (two_primaries) and - * this has been on the network */ - if ((s & RQ_NET_DONE) && mdev->ee_hash != NULL) { - const sector_t sector = req->sector; - const int size = req->size; + /* we need to do the conflict detection stuff, + * if we have the ee_hash (two_primaries) and + * this has been on the network */ + if ((s & RQ_NET_DONE) && mdev->ee_hash != NULL) { + const sector_t sector = req->sector; + const int size = req->size; - /* ASSERT: - * there must be no conflicting requests, since - * they must have been failed on the spot */ + /* ASSERT: + * there must be no conflicting requests, since + * they must have been failed on the spot */ #define OVERLAPS overlaps(sector, size, i->sector, i->size) - slot = tl_hash_slot(mdev,sector); - hlist_for_each_entry(i, n, slot, colision) { - if (OVERLAPS) { - ALERT("LOGIC BUG: completed: %p %llus +%u; other: %p %llus +%u\n", - req, (unsigned long long)sector, size, - i, (unsigned long long)i->sector, i->size); - } - } + slot = tl_hash_slot(mdev,sector); + hlist_for_each_entry(i, n, slot, colision) { + if (OVERLAPS) { + ALERT("LOGIC BUG: completed: %p %llus +%u; other: %p %llus +%u\n", + req, (unsigned long long)sector, size, + i, (unsigned long long)i->sector, i->size); + } + } - /* maybe "wake" those conflicting epoch entries - * that wait for this request to finish. - * - * currently, there can be only _one_ such ee - * (well, or some more, which would be pending - * DiscardAck not yet sent by the asender...), - * since we block the receiver thread upon the - * first conflict detection, which will wait on - * misc_wait. maybe we want to assert that? - * - * anyways, if we found one, - * we just have to do a wake_up. */ + /* maybe "wake" those conflicting epoch entries + * that wait for this request to finish. + * + * currently, there can be only _one_ such ee + * (well, or some more, which would be pending + * DiscardAck not yet sent by the asender...), + * since we block the receiver thread upon the + * first conflict detection, which will wait on + * misc_wait. maybe we want to assert that? + * + * anyways, if we found one, + * we just have to do a wake_up. */ #undef OVERLAPS #define OVERLAPS overlaps(sector, size, e->sector, e->size) - slot = ee_hash_slot(mdev,req->sector); - hlist_for_each_entry(e, n, slot, colision) { - if (OVERLAPS) { - wake_up(&mdev->misc_wait); - break; - } - } + slot = ee_hash_slot(mdev,req->sector); + hlist_for_each_entry(e, n, slot, colision) { + if (OVERLAPS) { + wake_up(&mdev->misc_wait); + break; } + } + } #undef OVERLAPS } @@ -973,7 +973,6 @@ local = 0; } if (remote) dec_ap_pending(mdev); - dump_bio(mdev,req->master_bio,1); /* THINK: do we want to fail it (-EIO), or pretend success? */ bio_endio(req->master_bio, req->master_bio->bi_size, 0); req->master_bio = NULL; @@ -1000,6 +999,8 @@ * was not detached below us? */ req->private_bio->bi_bdev = mdev->bc->backing_bdev; + dump_internal_bio("Pri",mdev,rw,req->private_bio,0); + if (FAULT_ACTIVE(mdev, rw==WRITE ? DRBD_FAULT_DT_WR : ( rw==READ ? DRBD_FAULT_DT_RD : DRBD_FAULT_DT_RA ) )) @@ -1075,8 +1076,13 @@ return 0; } - /* Currently our BARRIER code is disabled. */ - if(unlikely(bio_barrier(bio))) { + /* Reject barrier requests if we know the underlying device does + * not support them. + * XXX: Need to get this info from peer as well some how so we + * XXX: reject if EITHER side does not support them,,, + */ + if(unlikely(bio_barrier(bio) && (mdev->flags & NO_BARRIER_SUPP))) { + WARN("Rejecting barrier request as underlying device does not support\n"); bio_endio(bio, bio->bi_size, -EOPNOTSUPP); return 0; } Index: drbd-8.0.6/drbd/drbd_int.h =================================================================== --- drbd-8.0.6/drbd/drbd_int.h (revision 21229) +++ drbd-8.0.6/drbd/drbd_int.h (working copy) @@ -698,7 +698,8 @@ CRASHED_PRIMARY, // This node was a crashed primary. Gets // cleared when the state.conn goes into // Connected state. - WRITE_BM_AFTER_RESYNC // A kmalloc() during resync failed + WRITE_BM_AFTER_RESYNC, // A kmalloc() during resync failed + NO_BARRIER_SUPP, // underlying block device doesn't implement barriers }; struct drbd_bitmap; // opaque for Drbd_Conf @@ -767,6 +768,12 @@ struct disk_conf dc; /* The user provided config... */ }; +struct drbd_md_io { + struct Drbd_Conf *mdev; + struct completion event; + int error; +}; + struct Drbd_Conf { #ifdef PARANOIA long magic; @@ -1204,6 +1211,7 @@ TraceTypeUnplug = 0x00000020, TraceTypeNl = 0x00000040, TraceTypeALExts = 0x00000080, + TraceTypeIntRq = 0x00000100, }; static inline int @@ -1247,14 +1255,20 @@ unsigned int length); // Bio printing support -extern void _dump_bio(drbd_dev *mdev, struct bio *bio, int complete); +extern void _dump_bio(const char *pfx, drbd_dev *mdev, int rw, struct bio *bio, int complete); static inline void dump_bio(drbd_dev *mdev, struct bio *bio, int complete) { MTRACE(TraceTypeRq,TraceLvlSummary, - _dump_bio(mdev, bio, complete); + _dump_bio("Rq", mdev, 0, bio, complete); ); } +static inline void dump_internal_bio(const char *pfx, drbd_dev *mdev, int rw, struct bio *bio, int complete) { + MTRACE(TraceTypeIntRq,TraceLvlSummary, + _dump_bio(pfx, mdev, rw, bio, complete); + ); +} + // Packet dumping support extern void _dump_packet(drbd_dev *mdev, struct socket *sock, int recv, Drbd_Polymorph_Packet *p, char* file, int line); @@ -1274,6 +1288,7 @@ #define TRACE(ignored...) ((void)0) #define dump_bio(ignored...) ((void)0) +#define dump_internal_bio(ignored...) ((void)0) #define dump_packet(ignored...) ((void)0) #endif