[DRBD-cvs] svn commit by lars - r2115 - trunk/drbd - next try:
read/write bitmap asynchonously. again, untes
drbd-cvs at lists.linbit.com
drbd-cvs at lists.linbit.com
Sun Mar 12 22:01:36 CET 2006
Author: lars
Date: 2006-03-12 22:01:35 +0100 (Sun, 12 Mar 2006)
New Revision: 2115
Modified:
trunk/drbd/drbd_bitmap.c
trunk/drbd/drbd_compat_wrappers.h
Log:
next try: read/write bitmap asynchonously. again, untested ...
Modified: trunk/drbd/drbd_bitmap.c
===================================================================
--- trunk/drbd/drbd_bitmap.c 2006-03-12 09:40:26 UTC (rev 2114)
+++ trunk/drbd/drbd_bitmap.c 2006-03-12 21:01:35 UTC (rev 2115)
@@ -80,6 +80,12 @@
struct drbd_bitmap {
unsigned long *bm;
spinlock_t bm_lock;
+ /* WARNING unsigned long bm_fo and friends:
+ * 32bit number of bit offset is just enough for 512 MB bitmap.
+ * it will blow up if we make the bitmap bigger...
+ * not that it makes much sense to have a bitmap that large,
+ * rather change the granularity to 16k or 64k or something.
+ */
unsigned long bm_fo; // next offset for drbd_bm_find_next
unsigned long bm_set; // nr of set bits; THINK maybe atomic_t ?
unsigned long bm_bits;
@@ -87,8 +93,12 @@
sector_t bm_dev_capacity;
struct semaphore bm_change; // serializes resize operations
+ atomic_t bm_async_io;
+ wait_queue_head_t bm_io_wait;
+
+ unsigned long bm_flags;
+
// { REMOVE
- unsigned long bm_flags; // currently debugging aid only
unsigned long bm_line;
char *bm_file;
// }
@@ -97,6 +107,8 @@
// { REMOVE once we serialize all state changes properly
#define D_BUG_ON(x) ERR_IF(x) { dump_stack(); }
#define BM_LOCKED 0
+#define BM_MD_IO_ERROR (BITS_PER_LONG-1) // 31? 63?
+
#if 0 // simply disabled for now...
#define MUST_NOT_BE_LOCKED() do { \
if (test_bit(BM_LOCKED,&b->bm_flags)) { \
@@ -294,13 +306,16 @@
}
}
-STATIC unsigned long bm_count_bits(struct drbd_bitmap * b)
+STATIC unsigned long bm_count_bits(struct drbd_bitmap * b, int just_read)
{
unsigned long *bm = b->bm;
unsigned long *ep = b->bm + b->bm_words;
unsigned long bits = 0;
while ( bm < ep ) {
+ /* on little endian, this is *bm = *bm;
+ * and should be optimized away by the compiler */
+ if (just_read) *bm = lel_to_cpu(*bm);
bits += hweight_long(*bm++);
}
@@ -403,7 +418,7 @@
b->bm_words = words;
b->bm_dev_capacity = capacity;
bm_clear_surplus(b);
- if( !growing ) b->bm_set = bm_count_bits(b);
+ if( !growing ) b->bm_set = bm_count_bits(b,0);
bm_end_info(mdev, __FUNCTION__ );
spin_unlock_irq(&b->bm_lock);
INFO("resync bitmap: bits=%lu words=%lu\n",bits,words);
@@ -589,6 +604,52 @@
spin_unlock_irq(&b->bm_lock);
}
+int drbd_bm_async_io_complete(struct bio *bio, unsigned int bytes_done, int error)
+{
+ struct drbd_bitmap *b = bio->bi_private;
+
+ if (bio->bi_size)
+ return 1;
+
+ if (error) {
+ /* doh. what now?
+ * for now, set all bits, and flag MD_IO_ERROR
+ */
+ /* FIXME kmap_atomic memset etc. pp. */
+ __set_bit(BM_MD_IO_ERROR,&b->bm_flags);
+ }
+ if (atomic_dec_and_test(&b->bm_async_io))
+ wake_up(&b->bm_io_wait);
+
+ bio_put(bio);
+
+ return 0;
+}
+
+STATIC void drbd_bm_page_io_async(drbd_dev *mdev, struct drbd_bitmap *b, int page_nr, int rw)
+{
+ /* we are process context. we always get a bio */
+ /* THINK: do we need GFP_NOIO here? */
+ struct bio *bio = bio_alloc(GFP_KERNEL, 1);
+ struct page *page = virt_to_page((char*)(b->bm) + (PAGE_SIZE*page_nr));
+ unsigned int len;
+ sector_t on_disk_sector = mdev->bc->md.md_offset + mdev->bc->md.bm_offset;
+ on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9);
+
+ /* this might happen with very small flexible external meta data device */
+ len = min_t(unsigned int, PAGE_SIZE,
+ (drbd_md_last_sector(mdev->bc) - on_disk_sector + 1)<<9);
+
+ D_DUMPLU(on_disk_sector);
+ D_DUMPI(len);
+
+ bio->bi_bdev = mdev->bc->md_bdev;
+ bio->bi_sector = on_disk_sector;
+ bio_add_page(bio, page, len, 0);
+ bio->bi_private = b;
+ bio->bi_end_io = drbd_bm_async_io_complete;
+ submit_bio(rw, bio);
+}
/* read one sector of the on disk bitmap into memory.
* on disk bitmap is little endian.
* @enr is _sector_ offset from start of on disk bitmap (aka bm-extent nr).
@@ -629,28 +690,110 @@
/**
* drbd_bm_read: Read the whole bitmap from its on disk location.
+ *
+ * currently only called from "drbd_ioctl_set_disk"
+ * FIXME need to be able to return an error!!
+ *
*/
-void drbd_bm_read(struct Drbd_Conf *mdev)
+# if defined(__LITTLE_ENDIAN)
+ /* nothing to do, on disk == in memory */
+# define bm_cpu_to_lel(x) ((void)0)
+# else
+void bm_cpu_to_lel(struct drbd_bitmap *b)
{
+ /* need to cpu_to_lel all the pages ...
+ * this may be optimized by using
+ * cpu_to_lel(-1) == -1 and cpu_to_lel(0) == 0;
+ * the following is still not optimal, but better than nothing */
+ const unsigned long *end = b->bm+b->bm_words;
+ unsigned long *bm;
+ if (b->bm_set == 0) {
+ /* no page at all; avoid swap if all is 0 */
+ return;
+ } else if (b->bm_set == b->bm_bits) {
+ /* only the last words */
+ bm = end-2;
+ } else {
+ /* all pages */
+ bm = b->bm;
+ }
+ for (; bm < end; bm++) {
+ *bm = cpu_to_lel(*bm);
+ }
+}
+# endif
+/* lel_to_cpu == cpu_to_lel */
+# define bm_lel_to_cpu(x) bm_cpu_to_lel(x)
+
+STATIC void drbd_bm_rw(struct Drbd_Conf *mdev, int rw)
+{
struct drbd_bitmap *b = mdev->bitmap;
- sector_t sector;
- int bm_words, num_sectors;
+ /* sector_t sector; */
+ int bm_words, num_pages, i;
+ unsigned long now;
char ppb[10];
MUST_BE_LOCKED();
bm_words = drbd_bm_words(mdev);
- num_sectors = (bm_words*sizeof(long) + 511) >> 9;
+ num_pages = (bm_words*sizeof(long) + PAGE_SIZE-1) >> PAGE_SHIFT;
- for (sector = 0; sector < num_sectors; sector++) {
- // FIXME do something on io error here?
- drbd_bm_read_sect(mdev,sector);
+ /* OK, I manipulate the bitmap low level,
+ * and I expect to be the exclusive user.
+ * If not, I am really in a bad mood...
+ * to catch such bugs early, make all people who want to access the
+ * bitmap while I read/write it dereference a NULL pointer :->
+ */
+ mdev->bitmap = NULL;
+
+ if(rw == WRITE) bm_cpu_to_lel(b);
+
+ now = jiffies;
+ atomic_set(&b->bm_async_io, num_pages);
+ for (i = 0; i < num_pages; i++) {
+ /* let the layers below us try to merge these bios... */
+ drbd_bm_page_io_async(mdev,b,i,rw);
}
+ blk_run_queue(bdev_get_queue(mdev->bc->md_bdev));
+ wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0);
+ INFO("%s of bitmap took %lu jiffies\n",
+ rw == READ ? "reading" : "writing", jiffies - now);
+
+ if (test_bit(BM_MD_IO_ERROR,&b->bm_flags)) {
+ /* FIXME correct handling of this.
+ * detach?
+ */
+ ALERT("we had at least one MD IO ERROR during bitmap IO\n");
+ drbd_chk_io_error(mdev, 1);
+ drbd_io_error(mdev);
+ }
+
+ now = jiffies;
+ if(rw == WRITE) {
+ bm_lel_to_cpu(b);
+ } else /* rw == READ */ {
+ /* just read, if neccessary adjust endianness */
+ b->bm_set = bm_count_bits(b, 1);
+ INFO("recounting of set bits took additional %lu jiffies\n",
+ jiffies - now);
+ }
+
+ /* ok, done,
+ * now it is visible again
+ */
+
+ mdev->bitmap = b;
+
INFO("%s marked out-of-sync by on disk bit-map.\n",
ppsize(ppb,drbd_bm_total_weight(mdev) << (BM_BLOCK_SIZE_B-10)) );
}
+void drbd_bm_read(struct Drbd_Conf *mdev)
+{
+ drbd_bm_rw(mdev, READ);
+}
+
/**
* drbd_bm_write_sect: Writes a 512 byte piece of the bitmap to its
* on disk location. On disk bitmap is little endian.
Modified: trunk/drbd/drbd_compat_wrappers.h
===================================================================
--- trunk/drbd/drbd_compat_wrappers.h 2006-03-12 09:40:26 UTC (rev 2114)
+++ trunk/drbd/drbd_compat_wrappers.h 2006-03-12 21:01:35 UTC (rev 2115)
@@ -10,7 +10,7 @@
/* struct page has a union in 2.6.15 ...
* an anonymous union and struct since 2.6.16 */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15)) || LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15)) || (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16))
#define U_PRIVATE private
#else
#define U_PRIVATE u.private
More information about the drbd-cvs
mailing list