[DRBD-cvs] r1622 - trunk/user

svn at svn.drbd.org svn at svn.drbd.org
Tue Nov 2 18:36:55 CET 2004


Author: lars
Date: 2004-11-02 18:36:52 +0100 (Tue, 02 Nov 2004)
New Revision: 1622

Modified:
   trunk/user/drbdmeta.c
   trunk/user/drbdsetup.c
   trunk/user/drbdtool_common.c
   trunk/user/drbdtool_common.h
Log:
exclusion of drbdsetup and drbdmeta is now done with fcntl locks.
replaced drbdmeta with a mmap variant with improved error handling

Modified: trunk/user/drbdmeta.c
===================================================================
--- trunk/user/drbdmeta.c	2004-10-29 11:53:46 UTC (rev 1621)
+++ trunk/user/drbdmeta.c	2004-11-02 17:36:52 UTC (rev 1622)
@@ -31,6 +31,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>
+#include <sys/mman.h>
 
 #include <stdlib.h>
 #include <stdio.h>
@@ -39,13 +40,25 @@
 #include <unistd.h>
 #include <fcntl.h>
 
-#include <linux/fs.h>     /* for BLKGETSIZE64 */
-#include <linux/drbd.h>   /* only use DRBD_MAGIC from here! */
+#include <linux/fs.h>		/* for BLKGETSIZE64 */
+#include <linux/drbd.h>		/* only use DRBD_MAGIC from here! */
 
 #include "drbdtool_common.h"
 #include "drbd_endian.h"
 
+/* FIXME? should use sector_t and off_t, not long/u64 ... */
+/* FIXME? rename open -> mmap, close -> munmap */
 
+/* Note RETURN VALUES:
+ * exit code convention: int vXY_something() and meta_blah return some negative
+ * error code, usually -1, when failed, 0 for success.
+ *
+ * FIXME some of the return -1; probably should better be exit(something);
+ * or some of the exit() should be rather some return?
+ *
+ * AND, the exit codes should follow some defined scheme.
+ */
+
 /*
  * I think this block of declarations and definitions should be
  * in some common.h, too.
@@ -59,14 +72,13 @@
 #define MD_AL_OFFSET_07    8
 #define MD_AL_MAX_SIZE_07  64
 #define MD_BM_OFFSET_07    (MD_AL_OFFSET_07 + MD_AL_MAX_SIZE_07)
-#define DRBD_MD_MAGIC_07   (DRBD_MAGIC+3)
 #define MD_RESERVED_SIZE_07 ( (u64)128 * (1<<20) )
 #define MD_BM_MAX_SIZE_07  ( MD_RESERVED_SIZE_07 - MD_BM_OFFSET_07*512 )
 
+#define DRBD_MD_MAGIC_06   (DRBD_MAGIC+2)
+#define DRBD_MD_MAGIC_07   (DRBD_MAGIC+3)
 #define DRBD_MD_MAGIC_08   (DRBD_MAGIC+4)
 
-#define DRBD_MD_MAGIC_06   (DRBD_MAGIC+2)
-
 enum MetaDataFlags {
 	__MDF_Consistent,
 	__MDF_PrimaryInd,
@@ -78,951 +90,1109 @@
 #define MDF_ConnectedInd    (1<<__MDF_ConnectedInd)
 #define MDF_FullSync        (1<<__MDF_FullSync)
 
-#define OR_EXIT(OBJ,OP,args...) \
-({ if(! OBJ->ops-> OP(OBJ, ##args) ) format_op_failed(OBJ, #OP ); })
-
 enum MetaDataIndex {
-	Flags,          /* Consistency flag,connected-ind,primary-ind */
-	HumanCnt,       /* human-intervention-count */
-	TimeoutCnt,     /* timout-count */
-	ConnectedCnt,   /* connected-count */
-	ArbitraryCnt,   /* arbitrary-count */
-	GEN_CNT_SIZE	/* MUST BE LAST! (and Flags must stay first...) */
+	Flags,			/* Consistency flag,connected-ind,primary-ind */
+	HumanCnt,		/* human-intervention-count */
+	TimeoutCnt,		/* timout-count */
+	ConnectedCnt,		/* connected-count */
+	ArbitraryCnt,		/* arbitrary-count */
+	GEN_CNT_SIZE		/* MUST BE LAST! (and Flags must stay first...) */
 };
 
-struct meta_data {
-	u32 gc[GEN_CNT_SIZE];   /* v06 */
-
-	u64 la_size;              /* v07  [ units of KB ] */
-	int bm_size;              /* v07 */
-	unsigned long *bitmap;    /* v07 */
-	int al_size;              /* v07 */
-	unsigned int  *act_log;   /* not yet implemented... */
-
-	unsigned long bits_set;   /* additional info, set by fopts->read() */
-};
-
 /*
  * }
  * end of should-be-shared
  */
 
-
 /*
- * drbdmeta specific types
+ * A word about mmap.
+ * The reason to use it is that I do not want to malloc 128MB just to
+ * read() and then count the bits, especially not within uml.
+ * the resulting program code is simpler, too.
+ * BUT we have to be carefull not to accidentally touch that memory region,
+ * because it would change the on-disk content.
+ * We must check for out-of-band access anyways.
+ *
+ * I chose to have three different mmap'ed areas, because when we move to
+ * more flexible layout, this is more flexible, too.
+ *
+ * The al-sectors can then be indexed directly:
+ *   extent = be32_to_cpu(on_disk.al[7].updates[7].extent.be);
+ *
+ * similar the bitmap:
+ *   test_bit(bitnr & (BITS_PER_LONG-1),
+ *            le_long_to_cpu(on_disk.bm[bitnr>>BITS_PER_LONG].le));
+ *
+ *   when counting the bits only, we can ignore endianness.  well, strictly
+ *   speaking, we'd need to verify the very last word for oob bits.
+ *
  */
 
-struct format_06 {
-	int fd;
-	int minor;
-};
+unsigned long count_bits(const unsigned long *w, const size_t nr_long_words)
+{
+	unsigned long bits = 0;
+	int i;
+	for (i = 0; i < nr_long_words; i++)
+		bits += hweight_long(w[i]);
+	return bits;
+}
 
-struct format_07 {
-	int fd;
-	char *device_name;
-	int index;
+/* let gcc help us get it right.
+ * some explicit endian types */
+typedef struct { u64 le; } le_u64;
+typedef struct { u64 be; } be_u64;
+typedef struct { u32 le; } le_u32;
+typedef struct { u32 be; } be_u32;
+typedef struct { unsigned long le; } le_ulong;
+typedef struct { unsigned long be; } be_ulong;
+
+/* NOTE that this structure does not need to be packed,
+ * aligned, nor does it need to be in the same order as the on_disk variants.
+ */
+struct md_cpu {
+	/* present since drbd 0.6 */
+	u32 gc[GEN_CNT_SIZE];	/* generation counter */
+	u32 magic;
+	/* added in drbd 0.7;
+	 * 0.7 stores la_size on disk as kb, 0.8 in units of sectors.
+	 * we use sectors in our general working structure here */
+	u64 la_sect;		/* last agreed size. */
+	u32 md_size;
+	u32 al_offset;		/* offset to this block */
+	u32 al_nr_extents;	/* important for restoring the AL */
+	u32 bm_offset;		/* offset to the bitmap, from here */
+	/* more to come eventually */
 };
 
-struct format_ops;
+/*
+ * FIXME md_size not yet validated or used.
+ */
 
-struct format {
-	struct format_ops *ops;
-	union {
-		struct format_06 f06;
-		struct format_07 f07;
-	} d;
-};
+/*
+ * -- DRBD 0.6 --------------------------------------
+ */
 
-typedef void* conf_t;
-
-struct format_ops {
-	const char* name;
-	char** args;
-	int conf_size;
-	int (* parse)(struct format *, char **, int, int*);
-	int (* open) (struct format *);
-	int (* close)(struct format *);
-	struct meta_data * (* md_alloc)(void);
-	int (* read) (struct format *, struct meta_data *);
-	int (* write)(struct format *, struct meta_data *, int);
+struct __attribute__ ((packed)) md_on_disk_06 {
+	be_u32 gc[GEN_CNT_SIZE];	/* generation counter */
+	be_u32 magic;
 };
 
-void format_op_failed(struct format * fcfg, char* op)
+void md_disk_06_to_cpu(struct md_cpu *cpu, const struct md_on_disk_06 *disk)
 {
-	fprintf(stderr,"%s_%s() failed\n",fcfg->ops->name,op);
-	exit(20);
+	int i;
+	for (i = 0; i < GEN_CNT_SIZE; i++)
+		cpu->gc[i] = be32_to_cpu(disk->gc[i].be);
+	cpu->magic = be32_to_cpu(disk->magic.be);
 }
 
-/* capacity in units of 512 byte (AKA sectors)
- */
-int bm_words(unsigned long capacity)
+void md_cpu_to_disk_06(struct md_on_disk_06 *disk, const struct md_cpu *cpu)
 {
-	unsigned long bits;
-	int words;
-
-	/* bits  = ALIGN(capacity,BM_SECTORS_PER_BIT) >> (BM_BLOCK_SIZE_B-9); */
-	bits = ALIGN(capacity,8) >> 3;
-	words = ALIGN(bits,64) >> LN2_BPL;
-
-	return words;
+	int i;
+	for (i = 0; i < GEN_CNT_SIZE; i++)
+		disk->gc[i].be = cpu_to_be32(cpu->gc[i]);
+	disk->magic.be = cpu_to_be32(cpu->magic);
 }
 
-void to_lel(unsigned long* buffer, int words)
+int v06_validate_md(struct md_cpu *md)
 {
-	int i;
-	unsigned long w;
-
-	for (i=0;i<words;i++) {
-		w = cpu_to_le_long(buffer[i]);
-		buffer[i] = w;
+	if (md->magic != DRBD_MD_MAGIC_06) {
+		fprintf(stderr, "v06 Magic number not found\n");
+		return -1;
 	}
+	return 0;
 }
 
+/*
+ * -- DRBD 0.7 --------------------------------------
+ */
 
-unsigned long from_lel(unsigned long* buffer, int words)
+struct __attribute__ ((packed)) md_on_disk_07 {
+	be_u64 la_kb;		/* last agreed size. */
+	be_u32 gc[GEN_CNT_SIZE];	/* generation counter */
+	be_u32 magic;
+	be_u32 md_size;
+	be_u32 al_offset;	/* offset to this block */
+	be_u32 al_nr_extents;	/* important for restoring the AL */
+	be_u32 bm_offset;	/* offset to the bitmap, from here */
+	char reserved[8 * 512 - 48];
+};
+
+void md_disk_07_to_cpu(struct md_cpu *cpu, const struct md_on_disk_07 *disk)
 {
 	int i;
-	unsigned long w;
-	unsigned long bits=0;
+	cpu->la_sect = be64_to_cpu(disk->la_kb.be) << 1;
+	for (i = 0; i < GEN_CNT_SIZE; i++)
+		cpu->gc[i] = be32_to_cpu(disk->gc[i].be);
+	cpu->magic = be32_to_cpu(disk->magic.be);
+	cpu->md_size = be32_to_cpu(disk->md_size.be);
+	cpu->al_offset = be32_to_cpu(disk->al_offset.be);
+	cpu->al_nr_extents = be32_to_cpu(disk->al_nr_extents.be);
+	cpu->bm_offset = be32_to_cpu(disk->bm_offset.be);
+}
 
-	for (i=0;i<words;i++) {
-		w = le_long_to_cpu(buffer[i]);
-		bits += hweight_long(w);
-		buffer[i] = w;
-	}
-
-	return bits;
+void md_cpu_to_disk_07(struct md_on_disk_07 *disk, const struct md_cpu *cpu)
+{
+	int i;
+	disk->la_kb.be = cpu_to_be64(cpu->la_sect >> 1);
+	for (i = 0; i < GEN_CNT_SIZE; i++)
+		disk->gc[i].be = cpu_to_be32(cpu->gc[i]);
+	disk->magic.be = cpu_to_be32(cpu->magic);
+	disk->md_size.be = cpu_to_be32(cpu->md_size);
+	disk->al_offset.be = cpu_to_be32(cpu->al_offset);
+	disk->al_nr_extents.be = cpu_to_be32(cpu->al_nr_extents);
+	disk->bm_offset.be = cpu_to_be32(cpu->bm_offset);
+	memset(disk->reserved, sizeof(disk->reserved), 0);
 }
 
-u64 bdev_size(int fd)
+int v07_validate_md(struct md_cpu *md)
 {
-	u64 size64; /* size in byte. */
-	long size;    /* size in sectors. */
-	int err;
+	if (md->magic != DRBD_MD_MAGIC_07) {
+		fprintf(stderr, "v07 Magic number not found\n");
+		return -1;
+	}
 
-	err=ioctl(fd,BLKGETSIZE64,&size64);
-	if(err) {
-		if (errno == EINVAL)  {
-			printf("INFO: falling back to BLKGETSIZE\n");
-			err=ioctl(fd,BLKGETSIZE,&size);
-			if(err) {
-				perror("ioctl(,BLKGETSIZE,) failed");
-				exit(20);
-			}
-			size64 = (typeof(u64))512 * size;
-		} else {
-			perror("ioctl(,BLKGETSIZE64,) failed");
-			exit(20);
-		}
+	if (md->al_offset != MD_AL_OFFSET_07) {
+		fprintf(stderr, "v07 Magic number (al_offset) not found\n");
+		return -1;
 	}
 
-	return size64;
-}
+	if (md->bm_offset != MD_BM_OFFSET_07) {
+		fprintf(stderr, "v07 Magic number (bm_offset) not found\n");
+		return -1;
+	}
 
-void md_free(struct meta_data * m)
-{
+	/* fixme consistency check, la_size < ll_device_size,
+	 * no overlap with internal meta data,
+	 * no overlap of flexible meta data offsets/sizes
+	 * ...
+	 */
 
-	if(m->bitmap)  free(m->bitmap);
-	if(m->act_log) free(m->act_log);
-
-	free(m);
+	return 0;
 }
 
-/******************************************
- begin of v07 {
- ******************************************/
-struct __attribute__((packed)) meta_data_on_disk_07 {
-	u64 la_size;           /* last agreed size. */
-	u32 gc[GEN_CNT_SIZE];  /* generation counter */
+/*
+ * these stay the same for 0.8, too:
+ */
+
+struct __attribute__ ((packed)) al_sector_cpu {
 	u32 magic;
-	u32 md_size;
-	u32 al_offset;         /* offset to this block */
-	u32 al_nr_extents;     /* important for restoring the AL */
-	u32 bm_offset;         /* offset to the bitmap, from here */
+	u32 tr_number;
+	struct __attribute__ ((packed)) {
+		u32 pos;
+		u32 extent;
+	} updates[62];
+	u32 xor_sum;
 };
 
-u64 v07_offset(struct format_07* cfg)
-{
-	u64 offset;
+struct __attribute__ ((packed)) al_sector_on_disk {
+	be_u32 magic;
+	be_u32 tr_number;
+	struct __attribute__ ((packed)) {
+		be_u32 pos;
+		be_u32 extent;
+	} updates[62];
+	be_u32 xor_sum;
+};
 
-	if(cfg->index == -1) {
-		offset = ( bdev_size(cfg->fd) & ~((1<<12)-1) )
-			- MD_RESERVED_SIZE_07;
-	} else {
-		offset = MD_RESERVED_SIZE_07 * cfg->index;
-	}
-	return offset;
-}
+/*
+ * -- DRBD 0.8 --------------------------------------
+ *  even though they now differ only by la-size being kb or sectors,
+ *  I expect them to diverge, so lets have different structures.
+ */
 
-int v07_parse(struct format * config, char **argv, int argc, int *ai);
-int v07_open(struct format * config);
-int v07_close(struct format * config);
-struct meta_data * v07_md_alloc(void);
-int v07_read(struct format * config, struct meta_data *);
-int v07_write(struct format * config, struct meta_data *, int init_al);
+struct __attribute__ ((packed)) md_on_disk_08 {
+	be_u64 la_sect;		/* last agreed size. */
+	be_u32 gc[GEN_CNT_SIZE];	/* generation counter */
+	be_u32 magic;
+	be_u32 md_size;
+	be_u32 al_offset;	/* offset to this block */
+	be_u32 al_nr_extents;	/* important for restoring the AL */
+	be_u32 bm_offset;	/* offset to the bitmap, from here */
+	char reserved[8 * 512 - 48];
+};
 
-int v07_parse(struct format * config, char **argv, int argc, int *ai)
+void md_disk_08_to_cpu(struct md_cpu *cpu, const struct md_on_disk_08 *disk)
 {
-	struct format_07* cfg = &config->d.f07;
-	char *e;
+	int i;
+	cpu->la_sect = be64_to_cpu(disk->la_sect.be);
+	for (i = 0; i < GEN_CNT_SIZE; i++)
+		cpu->gc[i] = be32_to_cpu(disk->gc[i].be);
+	cpu->magic = be32_to_cpu(disk->magic.be);
+	cpu->md_size = be32_to_cpu(disk->md_size.be);
+	cpu->al_offset = be32_to_cpu(disk->al_offset.be);
+	cpu->al_nr_extents = be32_to_cpu(disk->al_nr_extents.be);
+	cpu->bm_offset = be32_to_cpu(disk->bm_offset.be);
+}
 
-	if(argc < 2) {
-		fprintf(stderr,"Too few arguments for format\n");
-		return 0;
-	}
-
-	cfg->device_name = strdup(argv[0]);
-	e = argv[1];
-	cfg->index = strtol(argv[1],&e,0);
-	if(*e != 0) {
-		fprintf(stderr,"'%s' is not a valid index number.\n",argv[1]);
-		return 0;
-	}
-
-	*ai+=2;
-
-	return 1;
+void md_cpu_to_disk_08(struct md_on_disk_08 *disk, const struct md_cpu *cpu)
+{
+	int i;
+	disk->la_sect.be = cpu_to_be64(cpu->la_sect);
+	for (i = 0; i < GEN_CNT_SIZE; i++)
+		disk->gc[i].be = cpu_to_be32(cpu->gc[i]);
+	disk->magic.be = cpu_to_be32(cpu->magic);
+	disk->md_size.be = cpu_to_be32(cpu->md_size);
+	disk->al_offset.be = cpu_to_be32(cpu->al_offset);
+	disk->al_nr_extents.be = cpu_to_be32(cpu->al_nr_extents);
+	disk->bm_offset.be = cpu_to_be32(cpu->bm_offset);
+	memset(disk->reserved, sizeof(disk->reserved), 0);
 }
 
-int v07_open(struct format * config)
+int v08_validate_md(struct md_cpu *md)
 {
-	struct format_07* cfg = &config->d.f07;
-	struct stat sb;
-
-	cfg->fd = open(cfg->device_name,O_RDWR);
-
-	if(cfg->fd == -1) {
-		PERROR("open() failed");
-		return 0;
+	if (md->magic != DRBD_MD_MAGIC_08) {
+		fprintf(stderr, "v08 Magic number not found\n");
+		return -1;
 	}
 
-	if(fstat(cfg->fd, &sb)) {
-		PERROR("fstat() failed");
-		return 0;
+	if (md->al_offset != MD_AL_OFFSET_07) {
+		fprintf(stderr, "v08 Magic number (al_offset) not found\n");
+		return -1;
 	}
 
-	if(!S_ISBLK(sb.st_mode)) {
-		fprintf(stderr, "'%s' is not a block device!\n",
-			cfg->device_name);
-		return 0;
+	if (md->bm_offset != MD_BM_OFFSET_07) {
+		fprintf(stderr, "v08 Magic number (bm_offset) not found\n");
+		return -1;
 	}
 
-	if( ioctl(cfg->fd,BLKFLSBUF) == -1) {
-		PERROR("ioctl(,BLKFLSBUF,) failed");
-		return 0;
-	}
+	/* fixme consistency check, la_size < ll_device_size,
+	 * no overlap with internal meta data,
+	 * no overlap of flexible meta data offsets/sizes
+	 * ...
+	 */
 
-	return 1;
+	return 0;
 }
 
-int v07_close(struct format * config)
-{
-	struct format_07* cfg = &config->d.f07;
+/*
+ * drbdmeta specific types
+ */
 
-	if( fsync(cfg->fd) == -1) {
-		PERROR("fsync() failed");
-		return 0;
-	}
+struct format_ops;
 
-	if( ioctl(cfg->fd,BLKFLSBUF) == -1) {
-		PERROR("ioctl(,BLKFLSBUF,) failed");
-		return 0;
-	}
+struct format {
+	struct format_ops *ops;
+	char *device_name;	/* well, in 06 it is file name */
+	int fd;
+	/* byte offset of our "super block", within fd */
+	u64 md_offset;
 
-	return close(cfg->fd) == 0;
-}
+	/* unused in 06 */
+	int md_index;
+	unsigned int bm_bytes;
+	unsigned int bits_set;	/* 32 bit should be enough. @4k ==> 16TB */
 
-struct meta_data * v07_md_alloc(void)
-{
-	struct meta_data *m;
+	struct md_cpu md;
 
-	m = malloc(sizeof(struct meta_data ));
-	memset(m,0,sizeof(struct meta_data ));
+	struct {
+		/* "super block", fixed 4096 byte for the next century */
+		union {
+			struct md_on_disk_06 *md6;
+			struct md_on_disk_07 *md7;
+			struct md_on_disk_08 *md8;
+		};
 
-	m->bitmap = malloc(MD_BM_MAX_SIZE_07);
-	if( ! m->bitmap) {
-		PERROR("Can not allocate memory for bitmap.");
-		return 0;
-	}
+		/* variable size; well, in 07 it is fixed 64*512 byte,
+		 * which may be partially unused */
+		struct al_on_disk_sector *al;
 
-	m->bm_size = MD_BM_MAX_SIZE_07;
+		/* variable size; well, in 07 it is fixed (256-64-8)*512 byte
+		 * which may be partially unused
+		 * use le_long for now. */
+		le_ulong *bm;
+	} on_disk;
+};
 
-	return m;
-}
+struct format_ops {
+	const char *name;
+	char **args;
+	int (*parse) (struct format *, char **, int, int *);
+	int (*open) (struct format *);
+	int (*close) (struct format *);
+	int (*md_initialize) (struct format *);
+	int (*md_disk_to_cpu) (struct format *);
+	int (*md_cpu_to_disk) (struct format *);
+};
 
-int v07_read(struct format * config, struct meta_data * m)
-{
-	struct format_07* cfg = &config->d.f07;
-	struct meta_data_on_disk_07 buffer;
-	int rr,i,bmw;
-	u64 offset = v07_offset(cfg);
+/*
+ * global vaiables
+ */
 
-	if(lseek64(cfg->fd,offset,SEEK_SET) == -1) {
-		PERROR("lseek() failed");
-		return 0;
-	}
+enum Known_Formats {
+	Drbd_06,
+	Drbd_07,
+	Drbd_08,
+	Drbd_Unknown,
+};
 
-	rr = read(cfg->fd, &buffer, sizeof(struct meta_data_on_disk_07));
-	if( rr != sizeof(struct meta_data_on_disk_07)) {
-		PERROR("read failed");
-		return 0;
-	}
+/* pre declarations */
+int v06_md_close(struct format *cfg);
+int v06_md_cpu_to_disk(struct format *cfg);
+int v06_md_disk_to_cpu(struct format *cfg);
+int v06_parse(struct format *cfg, char **argv, int argc, int *ai);
+int v06_md_open(struct format *cfg);
+int v06_md_initialize(struct format *cfg);
 
-	if( be32_to_cpu(buffer.magic) != DRBD_MD_MAGIC_07 ) {
-		fprintf(stderr,"Magic number not found\n");
-		return 0;
-	}
+int v07_md_close(struct format *cfg);
+int v07_md_cpu_to_disk(struct format *cfg);
+int v07_md_disk_to_cpu(struct format *cfg);
+int v07_md_open(struct format *cfg);
+int v07_parse(struct format *cfg, char **argv, int argc, int *ai);
+int v07_md_initialize(struct format *cfg);
 
-	if( be32_to_cpu(buffer.al_offset) != MD_AL_OFFSET_07 ) {
-		fprintf(stderr,"Magic number (al_offset) not found\n");
-		return 0;
-	}
+int v08_md_cpu_to_disk(struct format *cfg);
+int v08_md_disk_to_cpu(struct format *cfg);
+int v08_md_initialize(struct format *cfg);
 
-	if( be32_to_cpu(buffer.bm_offset) != MD_BM_OFFSET_07 ) {
-		fprintf(stderr,"Magic number (bm_offset) not found\n");
-		return 0;
-	}
+struct format_ops f_ops[] = {
+	[Drbd_06] = {
+		     .name = "v06",
+		     .args = (char *[]){"minor", NULL},
+		     .parse = v06_parse,
+		     .open = v06_md_open,
+		     .close = v06_md_close,
+		     .md_initialize = v06_md_initialize,
+		     .md_disk_to_cpu = v06_md_disk_to_cpu,
+		     .md_cpu_to_disk = v06_md_cpu_to_disk,
+		     },
+	[Drbd_07] = {
+		     .name = "v07",
+		     .args = (char *[]){"device", "index", NULL},
+		     .parse = v07_parse,
+		     .open = v07_md_open,
+		     .close = v07_md_close,
+		     .md_initialize = v07_md_initialize,
+		     .md_disk_to_cpu = v07_md_disk_to_cpu,
+		     .md_cpu_to_disk = v07_md_cpu_to_disk,
+		     },
+	[Drbd_08] = {
+		     .name = "v08",
+		     .args = (char *[]){"device", "index", NULL},
+		     .parse = v07_parse,
+		     .open = v07_md_open,
+		     .close = v07_md_close,
+		     .md_initialize = v08_md_initialize,
+		     .md_disk_to_cpu = v08_md_disk_to_cpu,
+		     .md_cpu_to_disk = v08_md_cpu_to_disk,
+		     },
+};
 
-	for (i = Flags; i < GEN_CNT_SIZE; i++)
-		m->gc[i] = be32_to_cpu(buffer.gc[i]);
+/******************************************
+  Commands we know about:
+ ******************************************/
 
-	m->la_size = be64_to_cpu(buffer.la_size);
+struct meta_cmd {
+	const char *name;
+	const char *args;
+	int (*function) (struct format *, char **argv, int argc);
+	int show_in_usage;
+};
 
-	if(m->bitmap) {
-		bmw = bm_words(m->la_size);
+/* pre declarations */
+int meta_get_gc(struct format *cfg, char **argv, int argc);
+int meta_show_gc(struct format *cfg, char **argv, int argc);
+int meta_dump_md(struct format *cfg, char **argv, int argc);
+int meta_create_md(struct format *cfg, char **argv, int argc);
+int meta_set_gc(struct format *cfg, char **argv, int argc);
 
-		offset = offset + 512 * MD_BM_OFFSET_07;
-		if(lseek64(cfg->fd, offset, SEEK_SET) == -1) {
-			PERROR("lseek() failed");
-			return 0;
-		}
+struct meta_cmd cmds[] = {
+	{"get-gc", 0, meta_get_gc, 1},
+	{"show-gc", 0, meta_show_gc, 1},
+	{"dump-md", 0, meta_dump_md, 1},
+	{"create-md", 0, meta_create_md, 1},
+	/* FIXME convert still missing.
+	 * implicit convert from v07 to v08 by create-md
+	 * see comments there */
+	{"set-gc", ":::VAL:VAL:...", meta_set_gc, 0},
+};
 
-		rr = read(cfg->fd, m->bitmap, bmw*sizeof(long));
-		if( rr != bmw*sizeof(long) ) {
-			PERROR("read failed");
-			return 0;
-		}
+char *progname = 0;
+int drbd_fd = -1;
+int lock_fd = -1;
+char *drbd_dev_name;
 
-		m->bm_size = bmw*sizeof(long);
-		m->bits_set = from_lel(m->bitmap,bmw);
-	}
+/*
+ * generic helpers
+ */
 
-	return 1;
+int confirmed(const char *text)
+{
+	char answer[16];
+	int rr;
+
+	printf("%s [yes/no] ", text);
+	rr = scanf("%[yesno]15s", answer);
+	return !strcmp(answer, "yes");
 }
 
-int v07_write(struct format * config, struct meta_data * m, int init_al)
+unsigned long bm_words(u64 sectors)
 {
-	struct format_07* cfg = &config->d.f07;
-	struct meta_data_on_disk_07 buffer;
-	int rr,i;
-	u64 offset = v07_offset(cfg);
+	unsigned long long bits;
+	unsigned long long words;
 
-	buffer.magic = cpu_to_be32( DRBD_MD_MAGIC_07 );
-	buffer.al_offset = cpu_to_be32( MD_AL_OFFSET_07 );
-	buffer.bm_offset = cpu_to_be32( MD_BM_OFFSET_07 );
+	/* bits  = ALIGN(capacity,BM_SECTORS_PER_BIT) >> (BM_BLOCK_SIZE_B-9); */
+	bits = ALIGN(sectors, 8) >> 3;
+	words = ALIGN(bits, 64) >> LN2_BPL;
 
-	for (i = Flags; i < GEN_CNT_SIZE; i++)
-		buffer.gc[i] = cpu_to_be32(m->gc[i]);
+	return words;
+}
 
-	buffer.la_size = cpu_to_be64(m->la_size);
+u64 bdev_size(int fd)
+{
+	u64 size64;		/* size in byte. */
+	long size;		/* size in sectors. */
+	int err;
 
-	if(lseek64(cfg->fd,offset,SEEK_SET) == -1) {
-		PERROR("lseek() failed");
-		return 0;
+	err = ioctl(fd, BLKGETSIZE64, &size64);
+	if (err) {
+		if (errno == EINVAL) {
+			printf("INFO: falling back to BLKGETSIZE\n");
+			err = ioctl(fd, BLKGETSIZE, &size);
+			if (err) {
+				perror("ioctl(,BLKGETSIZE,) failed");
+				exit(20);
+			}
+			size64 = (typeof(u64)) 512 *size;
+		} else {
+			perror("ioctl(,BLKGETSIZE64,) failed");
+			exit(20);
+		}
 	}
 
-	rr = write(cfg->fd, &buffer, sizeof(struct meta_data_on_disk_07));
-	if( rr != sizeof(struct meta_data_on_disk_07)) {
-		PERROR("write failed");
-		return 0;
-	}
+	return size64;
+}
 
-	if(lseek64(cfg->fd,offset + 512 * MD_BM_OFFSET_07 ,SEEK_SET) == -1) {
-		PERROR("lseek() failed");
-		return 0;
-	}
+#if BITS_PER_LONG == 32
+# define FMT " 0x%016llX;"
+#else
+# define FMT " 0x%016lX;"
+#endif
 
-	to_lel(m->bitmap, m->bm_size/sizeof(long) );
-
-	rr = write(cfg->fd, m->bitmap, m->bm_size);
-	if( rr != m->bm_size) {
-		PERROR("write failed");
-		return 0;
+/* le_u64, because we want to be able to hexdump it reliably
+ * regardless of sizeof(long) */
+void printf_bm(const le_u64 * bm, const unsigned int n)
+{
+	int i;
+	printf("bm {");
+	for (i = 0; i < n; i++) {
+		if ((i & 3) == 0)
+			printf("\n   ");
+		printf(FMT, le64_to_cpu(bm[i].le));
 	}
+	printf("\n }\n");
+}
 
-	from_lel(m->bitmap, m->bm_size/sizeof(long) );
+#undef FMT
 
-	if( init_al ) {
-		char sector[512] = { [ 0 ... 511 ] = 0 };
-
-		if(lseek64(cfg->fd,offset+512*MD_AL_OFFSET_07, SEEK_SET)==-1) {
-			PERROR("lseek() failed");
-			return 0;
-		}
-
-		for (i = 0; i < MD_AL_MAX_SIZE_07; i++) {
-			rr = write(cfg->fd, &sector, 512);
-			if(rr != 512) {
-				PERROR("write failed");
-				return 0;
-			}
-		}
-	}
-
-	return 1;
+void printf_gc(const struct md_cpu *md)
+{
+	printf("%d:%d:%d:%d:%d:%d:%d:%d\n",
+	       md->gc[Flags] & MDF_Consistent ? 1 : 0,
+	       md->gc[HumanCnt],
+	       md->gc[TimeoutCnt],
+	       md->gc[ConnectedCnt],
+	       md->gc[ArbitraryCnt],
+	       md->gc[Flags] & MDF_PrimaryInd ? 1 : 0,
+	       md->gc[Flags] & MDF_ConnectedInd ? 1 : 0,
+	       md->gc[Flags] & MDF_FullSync ? 1 : 0);
 }
-/******************************************
- } end of v07
- ******************************************/
 
 /******************************************
- begin of v08 {
+ begin of v06 {{{
  ******************************************/
 
-int v08_read(struct format * config, struct meta_data *);
-int v08_write(struct format * config, struct meta_data *, int init_al);
+int v06_md_disk_to_cpu(struct format *cfg)
+{
+	md_disk_06_to_cpu(&cfg->md, cfg->on_disk.md6);
+	return v06_validate_md(&cfg->md);
+}
 
-int v08_read(struct format * config, struct meta_data * m)
+int v06_md_cpu_to_disk(struct format *cfg)
 {
-	struct format_07* cfg = &config->d.f07;
-	struct meta_data_on_disk_07 buffer;
-	int rr,i,bmw;
-	__u64 offset = v07_offset(cfg);
-
-	if(lseek64(cfg->fd,offset,SEEK_SET) == -1) {
-		PERROR("lseek() failed");
-		return 0;
+	int err;
+	if (v06_validate_md(&cfg->md))
+		return -1;
+	if (!cfg->on_disk.md6) {
+		fprintf(stderr, "BUG: on-disk-md not mapped\n");
+		exit(30);
 	}
+	md_cpu_to_disk_06(cfg->on_disk.md6, &cfg->md);
+	err = msync(cfg->on_disk.md6, sizeof(*cfg->on_disk.md6),
+		    MS_SYNC | MS_INVALIDATE);
+	if (err) {
+		PERROR("msync(on_disk_md)");
+		return -1;
+	};
+	return 0;
+}
 
-	rr = read(cfg->fd, &buffer, sizeof(struct meta_data_on_disk_07));
-	if( rr != sizeof(struct meta_data_on_disk_07)) {
-		PERROR("read failed");
-		return 0;
-	}
+int v06_parse(struct format *cfg, char **argv, int argc, int *ai)
+{
+	unsigned long minor;
+	char *e;
 
-	if( be32_to_cpu(buffer.magic) != DRBD_MD_MAGIC_08 ) {
-		fprintf(stderr,"Magic number not found\n");
-		return 0;
+	if (argc < 1) {
+		fprintf(stderr, "Too few arguments for format\n");
+		exit(20);
 	}
 
-	if( be32_to_cpu(buffer.al_offset) != MD_AL_OFFSET_07 ) {
-		fprintf(stderr,"Magic number (al_offset) not found\n");
-		return 0;
+	e = argv[0];
+	minor = strtol(argv[0], &e, 0);
+	if (*e != 0 || minor > 255UL) {
+		fprintf(stderr, "'%s' is not a valid minor number.\n", argv[0]);
+		exit(20);
 	}
+	if (asprintf(&e, "/var/lib/drbd/drbd%lu", minor) <= 18) {
+		fprintf(stderr, "asprintf() failed.\n");
+		exit(20);
+	};
+	cfg->device_name = e;
 
-	if( be32_to_cpu(buffer.bm_offset) != MD_BM_OFFSET_07 ) {
-		fprintf(stderr,"Magic number (bm_offset) not found\n");
-		return 0;
-	}
+	*ai += 1;
 
-	for (i = Flags; i < GEN_CNT_SIZE; i++)
-		m->gc[i] = be32_to_cpu(buffer.gc[i]);
-
-	m->la_size = be64_to_cpu(buffer.la_size) / 2 ;
-
-	if(m->bitmap) {
-		bmw = bm_words(m->la_size);
-
-		offset = offset + 512 * MD_BM_OFFSET_07;
-		if(lseek64(cfg->fd, offset, SEEK_SET) == -1) {
-			PERROR("lseek() failed");
-			return 0;
-		}
-
-		rr = read(cfg->fd, m->bitmap, bmw*sizeof(long));
-		if( rr != bmw*sizeof(long) ) {
-			PERROR("read failed");
-			return 0;
-		}
-
-		m->bm_size = bmw*sizeof(long);
-		m->bits_set = from_lel(m->bitmap,bmw);
-	}
-
-	return 1;
+	return 0;
 }
 
-int v08_write(struct format * config, struct meta_data * m, int init_al)
+int v06_md_open(struct format *cfg)
 {
-	struct format_07* cfg = &config->d.f07;
-	struct meta_data_on_disk_07 buffer;
-	int rr,i;
-	__u64 offset = v07_offset(cfg);
+	struct stat sb;
 
-	buffer.magic = cpu_to_be32( DRBD_MD_MAGIC_08 );
-	buffer.al_offset = cpu_to_be32( MD_AL_OFFSET_07 );
-	buffer.bm_offset = cpu_to_be32( MD_BM_OFFSET_07 );
+	cfg->fd = open(cfg->device_name, O_RDWR);
 
-	for (i = Flags; i < GEN_CNT_SIZE; i++)
-		buffer.gc[i] = cpu_to_be32(m->gc[i]);
-
-	buffer.la_size = cpu_to_be64(m->la_size * 2);
-
-	if(lseek64(cfg->fd,offset,SEEK_SET) == -1) {
-		PERROR("lseek() failed");
-		return 0;
+	if (cfg->fd == -1) {
+		PERROR("open(%s) failed", cfg->device_name);
+		return -1;
 	}
 
-	rr = write(cfg->fd, &buffer, sizeof(struct meta_data_on_disk_07));
-	if( rr != sizeof(struct meta_data_on_disk_07)) {
-		PERROR("write failed");
-		return 0;
+	if (fstat(cfg->fd, &sb)) {
+		PERROR("fstat() failed");
+		return -1;
 	}
 
-	if(lseek64(cfg->fd,offset + 512 * MD_BM_OFFSET_07 ,SEEK_SET) == -1) {
-		PERROR("lseek() failed");
-		return 0;
+	if (!S_ISREG(sb.st_mode)) {
+		fprintf(stderr, "'%s' is not a plain file!\n",
+			cfg->device_name);
+		return -1;
 	}
 
-	to_lel(m->bitmap, m->bm_size/sizeof(long) );
-
-	rr = write(cfg->fd, m->bitmap, m->bm_size);
-	if( rr != m->bm_size) {
-		PERROR("write failed");
-		return 0;
+	cfg->on_disk.md6 =
+	    mmap(NULL, sizeof(struct md_on_disk_06), PROT_READ | PROT_WRITE,
+		 MAP_SHARED, cfg->fd, 0);
+	if (cfg->on_disk.md6 == NULL) {
+		PERROR("mmap(md_on_disk) failed");
+		return -1;
 	}
 
-	from_lel(m->bitmap, m->bm_size/sizeof(long) );
-
-	if( init_al ) {
-		char sector[512] = { [ 0 ... 511 ] = 0 };
-
-		if(lseek64(cfg->fd,offset+512*MD_AL_OFFSET_07, SEEK_SET)==-1) {
-			PERROR("lseek() failed");
-			return 0;
-		}
-
-		for (i = 0; i < MD_AL_MAX_SIZE_07; i++) {
-			rr = write(cfg->fd, &sector, 512);
-			if(rr != 512) {
-				PERROR("write failed");
-				return 0;
-			}
-		}
+	if (cfg->ops->md_disk_to_cpu(cfg)) {
+		return -1;
 	}
 
-	return 1;
+	return 0;
 }
-/******************************************
- } end of v08
- ******************************************/
 
-/******************************************
- begin of v06 {
- ******************************************/
-struct __attribute__((packed)) meta_data_on_disk_06 {
-	u32 gc[GEN_CNT_SIZE];  /* generation counter */
-	u32 magic;
-};
-
-int v06_parse(struct format * config, char **argv, int argc, int *ai);
-int v06_open(struct format * config);
-int v06_close(struct format * config);
-struct meta_data * v06_md_alloc(void);
-int v06_read(struct format * config, struct meta_data *);
-int v06_write(struct format * config, struct meta_data *, int init_al);
-
-int v06_parse(struct format * config, char **argv, int argc, int *ai)
+int v06_md_close(struct format *cfg)
 {
-	struct format_06* cfg = &config->d.f06;
-	char *e;
-
-	if(argc < 1) {
-		fprintf(stderr,"Too few arguments for format\n");
-		return 0;
+	if (munmap(cfg->on_disk.md6, sizeof(struct md_on_disk_06))) {
+		PERROR("munmap(md_on_disk) failed");
+		return -1;
 	}
-
-	e = argv[0];
-	cfg->minor = strtol(argv[0],&e,0);
-	if(*e != 0) {
-		fprintf(stderr,"'%s' is not a valid index number.\n",argv[1]);
-		return 0;
+	if (fsync(cfg->fd) == -1) {
+		PERROR("fsync() failed");
+		return -1;
 	}
-
-	*ai+=1;
-
-	return 1;
+	if (close(cfg->fd)) {
+		PERROR("close() failed");
+		return -1;
+	}
+	return 0;
 }
 
-int v06_open(struct format * config)
+int v06_md_initialize(struct format *cfg)
 {
-	struct format_06* cfg = &config->d.f06;
-	char fn[100];
+	cfg->md.gc[Flags] = 0;
+	cfg->md.gc[HumanCnt] = 1;	/* THINK 0? 1? */
+	cfg->md.gc[TimeoutCnt] = 1;
+	cfg->md.gc[ConnectedCnt] = 1;
+	cfg->md.gc[ArbitraryCnt] = 1;
+	cfg->md.magic = DRBD_MD_MAGIC_06;
+	return 0;
+}
 
-	snprintf(fn,99,"/var/lib/drbd/drbd%d",cfg->minor);
+/******************************************
+  }}} end of v06
+ ******************************************/
+/******************************************
+ begin of v07 {{{
+ ******************************************/
 
-	cfg->fd = open(fn,O_RDWR);
+u64 v07_offset(struct format * cfg)
+{
+	u64 offset;
 
-	if(cfg->fd == -1) {
-		PERROR("open() failed");
-		return 0;
+	if (cfg->md_index == -1) {
+		offset = (bdev_size(cfg->fd) & ~((1 << 12) - 1))
+		    - MD_RESERVED_SIZE_07;
+	} else {
+		offset = MD_RESERVED_SIZE_07 * cfg->md_index;
 	}
-
-	return 1;
+	return offset;
 }
 
-int v06_close(struct format * config)
+int v07_md_disk_to_cpu(struct format *cfg)
 {
-	struct format_06* cfg = &config->d.f06;
-
-	return close(cfg->fd) == 0;
+	md_disk_07_to_cpu(&cfg->md, cfg->on_disk.md7);
+	return v07_validate_md(&cfg->md);
 }
 
-struct meta_data * v06_md_alloc(void)
+int v07_md_cpu_to_disk(struct format *cfg)
 {
-	struct meta_data *m;
-
-	m = malloc(sizeof(struct meta_data ));
-	memset(m,0,sizeof(struct meta_data ));
-
-	return m;
+	int err;
+	if (v07_validate_md(&cfg->md))
+		return -1;
+	if (!cfg->on_disk.md7) {
+		fprintf(stderr, "BUG: on-disk-md not mapped\n");
+		return -1;
+	}
+	md_cpu_to_disk_07(cfg->on_disk.md7, &cfg->md);
+	err = msync(cfg->on_disk.md7, sizeof(*cfg->on_disk.md7),
+		    MS_SYNC | MS_INVALIDATE);
+	if (err) {
+		PERROR("msync(on_disk_md)");
+		return -1;
+	};
+	return 0;
 }
 
-int v06_read(struct format * config, struct meta_data * m)
+int v07_parse(struct format *cfg, char **argv, int argc, int *ai)
 {
-	struct format_06* cfg = &config->d.f06;
-	struct meta_data_on_disk_06 buffer;
-	int rr,i;
+	long index;
+	char *e;
 
-	if(lseek64(cfg->fd,0,SEEK_SET) == -1) {
-		PERROR("lseek() failed");
-		return 0;
+	if (argc < 2) {
+		fprintf(stderr, "Too few arguments for format\n");
+		return -1;
 	}
 
-	rr = read(cfg->fd, &buffer, sizeof(struct meta_data_on_disk_06));
-	if( rr != sizeof(struct meta_data_on_disk_06)) {
-		PERROR("read failed");
-		return 0;
+	cfg->device_name = strdup(argv[0]);
+	e = argv[1];
+	index = strtol(argv[1], &e, 0);
+	if (*e != 0 || -1 > index || index > 255) {
+		fprintf(stderr, "'%s' is not a valid index number.\n", argv[1]);
+		exit(20);
 	}
+	cfg->md_index = index;
 
-	if( be32_to_cpu(buffer.magic) != DRBD_MD_MAGIC_06 ) {
-		fprintf(stderr,"Magic number not found\n");
-		return 0;
-	}
+	*ai += 2;
 
-	for (i = Flags; i < GEN_CNT_SIZE; i++)
-		m->gc[i] = be32_to_cpu(buffer.gc[i]);
-
-	return 1;
+	return 0;
 }
 
-int v06_write(struct format * config, struct meta_data * m, int init_al)
+int v07_md_open(struct format *cfg)
 {
-	struct format_06* cfg = &config->d.f06;
-	struct meta_data_on_disk_06 buffer;
-	int rr,i;
+	struct stat sb;
+	unsigned long words;
+	u64 offset, al_offset, bm_offset;
 
-	buffer.magic = cpu_to_be32( DRBD_MD_MAGIC_06 );
+	cfg->fd = open(cfg->device_name, O_RDWR);
 
-	for (i = Flags; i < GEN_CNT_SIZE; i++)
-		buffer.gc[i] = cpu_to_be32(m->gc[i]);
+	if (cfg->fd == -1) {
+		PERROR("open(%s) failed", cfg->device_name);
+		exit(20);
+	}
 
-	if(lseek64(cfg->fd,0,SEEK_SET) == -1) {
-		PERROR("lseek() failed");
-		return 0;
+	if (fstat(cfg->fd, &sb)) {
+		PERROR("fstat(%s) failed", cfg->device_name);
+		exit(20);
 	}
 
-	rr = write(cfg->fd, &buffer, sizeof(struct meta_data_on_disk_06));
-	if( rr != sizeof(struct meta_data_on_disk_06)) {
-		PERROR("write failed");
-		return 0;
+	if (!S_ISBLK(sb.st_mode)) {
+		fprintf(stderr, "'%s' is not a block device!\n",
+			cfg->device_name);
+		exit(20);
 	}
 
-	return 1;
-}
-/******************************************
- } end of v06
- ******************************************/
+	if (ioctl(cfg->fd, BLKFLSBUF) == -1) {
+		PERROR("ioctl(,BLKFLSBUF,) failed");
+		exit(20);
+	}
 
-struct format_ops formats[] = {
-	{ 
-		"v06",
-		(char *[]) { "minor", 0 },
-		sizeof(struct format_06),
-		v06_parse,
-		v06_open,
-		v06_close,
-		v06_md_alloc,
-		v06_read,
-		v06_write
-	}, { 
-		"v07",
-		(char *[]) { "device","index",0 },
-		sizeof(struct format_07),
-		v07_parse,
-		v07_open,
-		v07_close,
-		v07_md_alloc,
-		v07_read,
-		v07_write
-	}, { 
-		"v08",
-		(char *[]) { "device","index",0 },
-		sizeof(struct format_07),
-		v07_parse,
-		v07_open,
-		v07_close,
-		v07_md_alloc,
-		v08_read,
-		v08_write
+	offset = v07_offset(cfg);
+	cfg->on_disk.md7 =
+	    mmap(NULL, sizeof(struct md_on_disk_07), PROT_READ | PROT_WRITE,
+		 MAP_SHARED, cfg->fd, offset);
+	if (cfg->on_disk.md7 == NULL) {
+		PERROR("mmap(md_on_disk) failed");
+		exit(20);
 	}
-};
+	cfg->md_offset = offset;
 
-struct format_ops *fops_v06 = formats+0;
-struct format_ops *fops_v07 = formats+1;
-struct format_ops *fops_v08 = formats+2;
-
-struct meta_cmd {
-	const char* name;
-	const char* args;
-	int (* function)(struct format *, char** argv, int argc );
-	int show_in_usage;
-};
-
-int meta_show_gc(struct format * fcfg, char** argv, int argc )
-{
-	struct meta_data* md;
-	char ppb[10];
-
-	if(argc > 0) {
-		fprintf(stderr,"Ignoring additional arguments\n");
+	if (cfg->ops->md_disk_to_cpu(cfg)) {
+		return -1;
 	}
-	md = fcfg->ops->md_alloc();
 
-	OR_EXIT(fcfg,open);
-	OR_EXIT(fcfg,read,md);
-	printf( "\n"
-		"                                        WantFullSync |\n"
-		"                                  ConnectedInd |     |\n"
-		"                               lastState |     |     |\n"
-		"                      ArbitraryCnt |     |     |     |\n"
-		"                ConnectedCnt |     |     |     |     |\n"
-		"            TimeoutCnt |     |     |     |     |     |\n"
-		"        HumanCnt |     |     |     |     |     |     |\n"
-		"Consistent |     |     |     |     |     |     |     |\n"
-		"   --------+-----+-----+-----+-----+-----+-----+-----+\n"
-		"       %3s | %3d | %3d | %3d | %3d | %3s | %3s | %3s  \n"
-		"\n",
-		md->gc[Flags] & MDF_Consistent ? "1/c" : "0/i",
-		md->gc[HumanCnt],
-		md->gc[TimeoutCnt],
-		md->gc[ConnectedCnt],
-		md->gc[ArbitraryCnt],
-		md->gc[Flags] & MDF_PrimaryInd ? "1/p" : "0/s",
-		md->gc[Flags] & MDF_ConnectedInd ? "1/c" : "0/n",
-		md->gc[Flags] & MDF_FullSync ? "1/y" : "0/n");
+	al_offset = offset + cfg->md.al_offset * 512;
+	bm_offset = offset + cfg->md.bm_offset * 512;
 
-
-	if(md->la_size) {
-		printf("last agreed size: %s\n", ppsize(ppb,md->la_size));
+	cfg->on_disk.al =
+	    mmap(NULL, MD_AL_MAX_SIZE_07 * 512, PROT_READ | PROT_WRITE,
+		 MAP_SHARED, cfg->fd, al_offset);
+	if (cfg->on_disk.al == NULL) {
+		PERROR("mmap(al_on_disk) failed");
+		exit(20);
 	}
 
-	if(md->bitmap) {
-		printf("%lu bits set in the bitmap [ %s out of sync ]\n",
-		       md->bits_set, ppsize(ppb,md->bits_set));
+	cfg->on_disk.bm = mmap(NULL, MD_BM_MAX_SIZE_07, PROT_READ | PROT_WRITE,
+			       MAP_SHARED, cfg->fd, bm_offset);
+	if (cfg->on_disk.bm == NULL) {
+		PERROR("mmap(bm_on_disk) failed");
+		exit(20);
 	}
 
-	OR_EXIT(fcfg,close);
+	words = bm_words(cfg->md.la_sect);
+	cfg->bm_bytes = words * sizeof(long);
+	cfg->bits_set =
+	    count_bits((const unsigned long *)cfg->on_disk.bm, words);
 
-	md_free(md);
+	/* FIXME paranoia verify that unused bits and words are unset... */
 
 	return 0;
 }
 
-int meta_get_gc(struct format * fcfg, char** argv, int argc )
+int v07_md_close(struct format *cfg)
 {
-	struct meta_data* md;
-
-	if(argc > 0) {
-		fprintf(stderr,"Ignoring additional arguments\n");
+	if (munmap(cfg->on_disk.bm, MD_BM_MAX_SIZE_07)) {
+		PERROR("munmap(bm_on_disk) failed");
+		return -1;
 	}
+	if (munmap(cfg->on_disk.al, MD_AL_MAX_SIZE_07 * 512)) {
+		PERROR("munmap(al_on_disk) failed");
+		return -1;
+	}
+	if (munmap(cfg->on_disk.md7, 8 * 512)) {
+		PERROR("munmap(md_on_disk) failed");
+		return -1;
+	}
+	if (fsync(cfg->fd) == -1) {
+		PERROR("fsync() failed");
+		return -1;
+	}
+	if (ioctl(cfg->fd, BLKFLSBUF) == -1) {
+		PERROR("ioctl(,BLKFLSBUF,) failed");
+		return -1;
+	}
+	if (close(cfg->fd)) {
+		PERROR("close() failed");
+		return -1;
+	}
+	return 0;
+}
 
-	md = fcfg->ops->md_alloc();
+int v07_md_initialize(struct format *cfg)
+{
+	u64 al_offset, bm_offset;
 
-	OR_EXIT(fcfg,open);
-	OR_EXIT(fcfg,read,md);
-	printf("%d:%d:%d:%d:%d:%d:%d:%d\n",
-		md->gc[Flags] & MDF_Consistent ? 1 : 0,
-		md->gc[HumanCnt],
-		md->gc[TimeoutCnt],
-		md->gc[ConnectedCnt],
-		md->gc[ArbitraryCnt],
-		md->gc[Flags] & MDF_PrimaryInd ? 1 : 0,
-		md->gc[Flags] & MDF_ConnectedInd ? 1 : 0,
-		md->gc[Flags] & MDF_FullSync ? 1 : 0);
+	cfg->md.la_sect = 0;
+	cfg->md.gc[Flags] = MDF_FullSync;
+	cfg->md.gc[HumanCnt] = 1;	/* THINK 0? 1? */
+	cfg->md.gc[TimeoutCnt] = 1;
+	cfg->md.gc[ConnectedCnt] = 1;
+	cfg->md.gc[ArbitraryCnt] = 1;
+	cfg->md.magic = DRBD_MD_MAGIC_07;
 
-	OR_EXIT(fcfg,close);
+	/*
+	 * FIXME md_size not yet validated or used.
+	 */
+	cfg->md.md_size = MD_RESERVED_SIZE_07;
+	cfg->md.al_offset = MD_AL_OFFSET_07;
+	cfg->md.al_nr_extents = 257;	/* arbitrary. */
+	cfg->md.bm_offset = MD_BM_OFFSET_07;
 
-	md_free(md);
+	al_offset = cfg->md_offset + cfg->md.al_offset * 512;
+	bm_offset = cfg->md_offset + cfg->md.bm_offset * 512;
+	if (cfg->on_disk.al == NULL) {
+		cfg->on_disk.al =
+		    mmap(NULL, MD_AL_MAX_SIZE_07 * 512, PROT_READ | PROT_WRITE,
+			 MAP_SHARED, cfg->fd, al_offset);
+		if (cfg->on_disk.al == NULL) {
+			PERROR("mmap(al_on_disk) failed");
+			exit(20);
+		}
+	}
 
+	if (cfg->on_disk.bm == NULL) {
+		cfg->on_disk.bm =
+		    mmap(NULL, MD_BM_MAX_SIZE_07, PROT_READ | PROT_WRITE,
+			 MAP_SHARED, cfg->fd, bm_offset);
+		if (cfg->on_disk.bm == NULL) {
+			PERROR("mmap(bm_on_disk) failed");
+			exit(20);
+		}
+	}
+
+	memset(cfg->on_disk.al, MD_AL_MAX_SIZE_07, 0);
+	memset(cfg->on_disk.bm, MD_BM_MAX_SIZE_07, 0);
 	return 0;
 }
 
-int m_convert_md(struct format * , struct format * );
+/******************************************
+  }}} end of v07
+ ******************************************/
+/******************************************
+ begin of v08 {{{
+ ******************************************/
 
-struct format * dup_v07(struct format * fcfg)
+int v08_md_disk_to_cpu(struct format *cfg)
 {
-	struct format * new;
-
-	new = malloc(fops_v07->conf_size + sizeof(void*) );
-	new->ops = fops_v07;
-	new->d.f07.fd = 0;
-	new->d.f07.device_name = fcfg->d.f07.device_name;
-	new->d.f07.index = fcfg->d.f07.index;
-	
-	return new;
+	md_disk_08_to_cpu(&cfg->md, cfg->on_disk.md8);
+	return v08_validate_md(&cfg->md);
 }
 
-int ask(char *text)
+int v08_md_cpu_to_disk(struct format *cfg)
 {
-	char answer[200];
-	int rr;
-
-	printf("%s [yes/no] ",text);
-
-	rr = scanf("%s",answer);
-
-	return !strcmp(answer,"yes");
+	int err;
+	if (v08_validate_md(&cfg->md))
+		return -1;
+	if (!cfg->on_disk.md8) {
+		fprintf(stderr, "BUG: on-disk-md not mapped\n");
+		return -1;
+	}
+	md_cpu_to_disk_08(cfg->on_disk.md8, &cfg->md);
+	err = msync(cfg->on_disk.md8, sizeof(*cfg->on_disk.md8),
+		    MS_SYNC | MS_INVALIDATE);
+	if (err) {
+		PERROR("msync(on_disk_md)");
+		return -1;
+	};
+	return 0;
 }
 
-int meta_create_md(struct format * fcfg, char** argv, int argc )
+int v08_md_initialize(struct format *cfg)
 {
-	struct meta_data* md;
+	u64 al_offset, bm_offset;
 
-	if(argc > 0) {
-		fprintf(stderr,"Ignoring additional arguments\n");
-	}
+	cfg->md.la_sect = 0;
+	cfg->md.gc[Flags] = MDF_FullSync;
+	cfg->md.gc[HumanCnt] = 1;	/* THINK 0? 1? */
+	cfg->md.gc[TimeoutCnt] = 1;
+	cfg->md.gc[ConnectedCnt] = 1;
+	cfg->md.gc[ArbitraryCnt] = 1;
+	cfg->md.magic = DRBD_MD_MAGIC_08;
 
-	OR_EXIT(fcfg,open);
+	/*
+	 * FIXME md_size not yet validated or used.
+	 * FIXME make it flexible, not fixed anymore as with 07.
+	 */
+	cfg->md.md_size = MD_RESERVED_SIZE_07;
+	cfg->md.al_offset = MD_AL_OFFSET_07;
+	cfg->md.al_nr_extents = 257;	/* arbitrary. */
+	cfg->md.bm_offset = MD_BM_OFFSET_07;
 
-	md = fcfg->ops->md_alloc();
-
-	/* Hackisch but efficient, knows details about v07 and v08 */
-	if( fcfg->ops == fops_v08 ) {
-		if(fops_v07->read(fcfg,md)) {
-			if(ask("Valid v07 meta-data found, convert?")) {
-				md_free(md);
-				return m_convert_md(dup_v07(fcfg), fcfg);
-			}
-			goto question2;
+	al_offset = cfg->md_offset + cfg->md.al_offset * 512;
+	bm_offset = cfg->md_offset + cfg->md.bm_offset * 512;
+	if (cfg->on_disk.al == NULL) {
+		cfg->on_disk.al =
+		    mmap(NULL, MD_AL_MAX_SIZE_07 * 512, PROT_READ | PROT_WRITE,
+			 MAP_SHARED, cfg->fd, al_offset);
+		if (cfg->on_disk.al == NULL) {
+			PERROR("mmap(al_on_disk) failed");
+			exit(20);
 		}
 	}
 
-	if(fcfg->ops->read(fcfg,md)) {
-	question2:
-		if(!ask("Valid meta-data already in place, create new?")) {
-			printf("Operation cancelled.\n");
-			exit(0);
+	if (cfg->on_disk.bm == NULL) {
+		cfg->on_disk.bm =
+		    mmap(NULL, MD_BM_MAX_SIZE_07, PROT_READ | PROT_WRITE,
+			 MAP_SHARED, cfg->fd, bm_offset);
+		if (cfg->on_disk.bm == NULL) {
+			PERROR("mmap(bm_on_disk) failed");
+			exit(20);
 		}
 	}
 
-	printf("Creating meta data...\n");
+	/* do you want to initilize al to something more usefull? */
+	memset(cfg->on_disk.al, MD_AL_MAX_SIZE_07, 0);
+	memset(cfg->on_disk.bm, MD_BM_MAX_SIZE_07, 0);
+	return 0;
+}
 
-	md_free(md);
-	md = fcfg->ops->md_alloc();
-	OR_EXIT(fcfg,write,md,1);
-	OR_EXIT(fcfg,close);
+/******************************************
+  }}} end of v08
+ ******************************************/
 
-	md_free(md);
+int meta_get_gc(struct format *cfg, char **argv, int argc)
+{
+	if (argc > 0) {
+		fprintf(stderr, "Ignoring additional arguments\n");
+	}
 
-	return 0;
+	if (cfg->ops->open(cfg))
+		return -1;
+	printf_gc(&cfg->md);
+	return cfg->ops->close(cfg);
 }
 
-int m_convert_md(struct format * source, struct format * target)
+int meta_show_gc(struct format *cfg, char **argv, int argc)
 {
-	struct meta_data* md;
+	char ppb[10];
 
-	md = target->ops->md_alloc();
+	if (argc > 0) {
+		fprintf(stderr, "Ignoring additional arguments\n");
+	}
 
-	printf("Converting meta data...\n");
+	if (cfg->ops->open(cfg))
+		return -1;
 
-	OR_EXIT(source,open);
-	OR_EXIT(source,read,md);
-	OR_EXIT(source,close);
+	printf("\n"
+	       "                                        WantFullSync |\n"
+	       "                                  ConnectedInd |     |\n"
+	       "                               lastState |     |     |\n"
+	       "                      ArbitraryCnt |     |     |     |\n"
+	       "                ConnectedCnt |     |     |     |     |\n"
+	       "            TimeoutCnt |     |     |     |     |     |\n"
+	       "        HumanCnt |     |     |     |     |     |     |\n"
+	       "Consistent |     |     |     |     |     |     |     |\n"
+	       "   --------+-----+-----+-----+-----+-----+-----+-----+\n"
+	       "       %3s | %3d | %3d | %3d | %3d | %3s | %3s | %3s  \n"
+	       "\n",
+	       cfg->md.gc[Flags] & MDF_Consistent ? "1/c" : "0/i",
+	       cfg->md.gc[HumanCnt],
+	       cfg->md.gc[TimeoutCnt],
+	       cfg->md.gc[ConnectedCnt],
+	       cfg->md.gc[ArbitraryCnt],
+	       cfg->md.gc[Flags] & MDF_PrimaryInd ? "1/p" : "0/s",
+	       cfg->md.gc[Flags] & MDF_ConnectedInd ? "1/c" : "0/n",
+	       cfg->md.gc[Flags] & MDF_FullSync ? "1/y" : "0/n");
 
-	OR_EXIT(target,open);
-	OR_EXIT(target,write,md,0);
-	OR_EXIT(target,close);
+	if (cfg->md.la_sect) {
+		printf("last agreed size: %s\n",
+		       ppsize(ppb, cfg->md.la_sect >> 1));
+		printf("%u bits set in the bitmap [ %s out of sync ]\n",
+		       cfg->bits_set, ppsize(ppb, cfg->bits_set * 4));
+	} else {
+		printf("zero size device -- never seen peer yet?\n");
+	}
 
-	md_free(md);
-
-	return 0;
+	return cfg->ops->close(cfg);
 }
 
-struct format* parse_format(char** argv, int argc, int* ai);
-
-int meta_convert_md(struct format * fcfg, char** argv, int argc )
+int meta_dump_md(struct format *cfg, char **argv, int argc)
 {
-	struct format * target;
-	int unused;
-
-	target = parse_format(argv, argc, &unused );
-
-	return m_convert_md(fcfg, target);
-}
-
-int meta_dump_md(struct format * fcfg, char** argv, int argc )
-{
-	struct meta_data* md;
-	u64 *b;
-	int words;
 	int i;
 
-	if(argc > 0) {
-		fprintf(stderr,"Ignoring additional arguments\n");
+	if (argc > 0) {
+		fprintf(stderr, "Ignoring additional arguments\n");
 	}
 
-	md = fcfg->ops->md_alloc();
+	if (cfg->ops->open(cfg))
+		return -1;
 
-	OR_EXIT(fcfg,open);
-	OR_EXIT(fcfg,read,md);
+	/* FIXME invent some sceme to identify this dump,
+	 * so we can safely restore it later */
+	printf("DRBD meta data dump version <FIXME drbdmeta dump version>\n");
+	printf("meta data version %s\n\n", cfg->ops->name);
 	printf("gc {");
-	for(i=0;i<GEN_CNT_SIZE;i++) {
-		printf(" 0x%X;",md->gc[i]);
+	for (i = 0; i < GEN_CNT_SIZE; i++) {
+		printf(" 0x%X;", cfg->md.gc[i]);
 	}
 	printf(" }\n");
 
-	/* if(md->la_size)  TODO. */
-
-	if(md->bitmap) {
-		words = md->bm_size/sizeof(u64);
-		b = (u64*) md->bitmap;
-		printf("bm {");
-		for (i=0;i<words;i++) {
-#if BITS_PER_LONG == 32
-			printf(" 0x%016llX;",b[i]);
-#elif BITS_PER_LONG == 64
-			printf(" 0x%016lX;",b[i]);
-#endif
-			if(i%4 == 3) printf("\n    ");
-		}
-		printf(" }\n");
+	if (cfg->ops > f_ops + Drbd_06) {
+		printf("la-size-sect %llu;\n", cfg->md.la_sect);
+		printf("# bm-bytes %u;\n", cfg->bm_bytes);	/* informational only */
+		printf("# bits-set %u;\n", cfg->bits_set);	/* informational only */
+		printf
+		    ("# FIXME include offsets, once they are not fixed anymore\n");
+		if (cfg->on_disk.bm)
+			printf_bm((le_u64 *) cfg->on_disk.bm,
+				  cfg->bm_bytes / sizeof(le_u64));
 	}
 
-	OR_EXIT(fcfg,close);
+	/* MAYBE dump activity log?
+	 * but that probably does not make any sense,
+	 * beyond debugging. */
 
-	md_free(md);
+	return cfg->ops->close(cfg);
+}
 
+int md_convert_07_to_08(struct format *cfg)
+{
+	/* Note that al and bm are not touched!
+	 * (they are currently not even mmaped)
+	 *
+	 * KB <-> sectors is done in the md disk<->cpu functions.
+	 * We only need to adjust the magic here. */
+	printf("Converting meta data...\n");
+	cfg->md.magic = DRBD_MD_MAGIC_08;
+	if (cfg->ops->md_cpu_to_disk(cfg)
+	    || cfg->ops->close(cfg)) {
+		fprintf(stderr, "conversion failed\n");
+		return -1;
+	}
+	printf("Successfully converted v07 meta data to v08 format.\n");
 	return 0;
 }
 
-int m_strsep(char **s,int *val)
+/* FIXME create v07 replaces a valid v08 block without confirmation!
+ * we need better format auto-detection */
+int meta_create_md(struct format *cfg, char **argv, int argc)
 {
+	int virgin, err;
+	if (argc > 0) {
+		fprintf(stderr, "Ignoring additional arguments\n");
+	}
+
+	virgin = cfg->ops->open(cfg);
+	if (virgin && cfg->ops == f_ops + Drbd_08) {
+		/* wrong format. if we want to create a v08,
+		 * we might have a v07 in place.
+		 * if so, maybe just convert.
+		 */
+		virgin = v07_md_disk_to_cpu(cfg);
+		if (!virgin) {
+			if (confirmed("Valid v07 meta-data found, convert?"))
+				return md_convert_07_to_08(cfg);
+		}
+	}
+	if (!virgin) {
+		if (!confirmed("Valid meta-data already in place, create new?")) {
+			printf("Operation cancelled.\n");
+			exit(0);
+		}
+	}
+
+	printf("Creating meta data...\n");
+	err = cfg->ops->md_initialize(cfg)
+	    || cfg->ops->md_cpu_to_disk(cfg)
+	    || cfg->ops->close(cfg);
+	if (err)
+		fprintf(stderr, "conversion failed\n");
+
+	return err;
+}
+
+int m_strsep(char **s, int *val)
+{
 	char *t, *e;
-	int v;
+	long v;
 
-	if( (t = strsep(s,":")) ) {
-		if(strlen(t)) {
+	if ((t = strsep(s, ":"))) {
+		if (strlen(t)) {
 			e = t;
-			v = strtol(t,&e,0);
-			if(*e != 0) {
-				fprintf(stderr,"'%s' is not a number.\n",*s);
+			v = strtol(t, &e, 0);
+			if (*e != 0) {
+				fprintf(stderr, "'%s' is not a number.\n", *s);
 				exit(10);
 			}
-			if(v < 0 ) {
-				fprintf(stderr,"'%s' is negative.\n",*s);
+			if (v < 0) {
+				fprintf(stderr, "'%s' is negative.\n", *s);
 				exit(10);
 			}
+			if (v > 0xFFffFFff) {
+				fprintf(stderr,
+					"'%s' is out of range (max 0xFFffFFff).\n",
+					*s);
+				exit(10);
+			}
 			*val = v;
 		}
 		return 1;
@@ -1030,192 +1200,194 @@
 	return 0;
 }
 
-int m_strsep_b(char **s,int *val, int mask)
+int m_strsep_b(char **s, int *val, int mask)
 {
 	int d;
 	int rv;
 
 	d = *val & mask;
 
-	rv = m_strsep(s,&d);
+	rv = m_strsep(s, &d);
 
-	if(d > 1) {
-		fprintf(stderr,"'%d' is not 0 or 1.\n",d);
+	if (d > 1) {
+		fprintf(stderr, "'%d' is not 0 or 1.\n", d);
 		exit(10);
 	}
 
-	if(d) *val |=  mask;
-	else  *val &= ~mask;
+	if (d)
+		*val |= mask;
+	else
+		*val &= ~mask;
 
 	return rv;
 }
 
-/* "::14" sets the TimeoutCnt to 14 */
-int meta_set_gc(struct format * fcfg, char** argv, int argc )
+int meta_set_gc(struct format *cfg, char **argv, int argc)
 {
-	struct meta_data* md;
+	struct md_cpu tmp;
+	int err;
 	char **str;
 
-	if(argc < 1) {
-		fprintf(stderr,"Required Argument missing\n");
+	if (argc > 1) {
+		fprintf(stderr, "Ignoring additional arguments\n");
+	}
+	if (argc < 1) {
+		fprintf(stderr, "Required Argument missing\n");
 		exit(10);
 	}
-	str = &argv[0];
 
-	md = fcfg->ops->md_alloc();
+	if (cfg->ops->open(cfg))
+		return -1;
 
-	OR_EXIT(fcfg,open);
-	OR_EXIT(fcfg,read,md);
+	tmp = cfg->md;
+	str = &argv[0];
 
 	do {
-		if(!m_strsep_b(str,&md->gc[Flags],MDF_Consistent)) break;
-		if(!m_strsep(str,&md->gc[HumanCnt])) break;
-		if(!m_strsep(str,&md->gc[TimeoutCnt])) break;
-		if(!m_strsep(str,&md->gc[ConnectedCnt])) break;
-		if(!m_strsep(str,&md->gc[ArbitraryCnt])) break;
-		if(!m_strsep_b(str,&md->gc[Flags],MDF_PrimaryInd)) break;
-		if(!m_strsep_b(str,&md->gc[Flags],MDF_ConnectedInd)) break;
-		if(!m_strsep_b(str,&md->gc[Flags],MDF_FullSync)) break;
-	} while(0);
+		if (!m_strsep_b(str, &tmp.gc[Flags], MDF_Consistent)) break;
+		if (!m_strsep(str, &tmp.gc[HumanCnt])) break;
+		if (!m_strsep(str, &tmp.gc[TimeoutCnt])) break;
+		if (!m_strsep(str, &tmp.gc[ConnectedCnt])) break;
+		if (!m_strsep(str, &tmp.gc[ArbitraryCnt])) break;
+		if (!m_strsep_b(str, &tmp.gc[Flags], MDF_PrimaryInd)) break;
+		if (!m_strsep_b(str, &tmp.gc[Flags], MDF_ConnectedInd)) break;
+		if (!m_strsep_b(str, &tmp.gc[Flags], MDF_FullSync)) break;
+	} while (0);
 
-	OR_EXIT(fcfg,write,md,0);
-	OR_EXIT(fcfg,close);
+	printf("  consistent:H:T:C:A:p:c:f\n");
+	printf("previously ");
+	printf_gc(&cfg->md);
+	printf("GCs set to ");
+	printf_gc(&tmp);
 
-	md_free(md);
+	if (!confirmed("Write new GCs to disk?")) {
+		printf("Operation cancelled.\n");
+		exit(0);
+	}
 
-	return 0;
-}
+	cfg->md = tmp;
 
-/*
- * global vaiables
- */
+	err = cfg->ops->md_cpu_to_disk(cfg)
+	    || cfg->ops->close(cfg);
+	if (err)
+		fprintf(stderr, "update failed\n");
 
-struct meta_cmd cmds[] = {
-	{ "show-gc",    0,                         meta_show_gc,      1 },
-	{ "get-gc",     0,                         meta_get_gc,       1 },
-	{ "create-md",  0,                         meta_create_md,    1 },
-	{ "dump-md",    0,                         meta_dump_md,      1 },
-	{ "convert-md", "FORMAT [FORMAT ARGS...]", meta_convert_md,   1 },
-	/* { "restore-md",    0,                    meta_restore_md,   0 }, */
-	{ "set-gc",     ":::VAL:VAL:...",          meta_set_gc,       0 }
-};
+	return err;
+}
 
-char* progname = 0;
-int drbd_fd;
-char* drbd_dev_name;
-
 void print_usage()
 {
+	char **args;
 	int i;
-	char **args;
 
-	printf("\nUSAGE: %s DEVICE FORMAT [FORMAT ARGS...] COMMAND [CMD ARGS...]\n"
-	       ,progname);
+	printf
+	    ("\nUSAGE: %s DEVICE FORMAT [FORMAT ARGS...] COMMAND [CMD ARGS...]\n",
+	     progname);
 
 	printf("\nFORMATS:\n");
-	for (i = 0; i < ARRY_SIZE(formats); i++ ) {
-		printf("  %s",formats[i].name);
-		if ((args = formats[i].args)) {
-			while(*args) {
-				printf(" %s",*args++);
+	for (i = Drbd_06; i < Drbd_Unknown; i++) {
+		printf("  %s", f_ops[i].name);
+		if ((args = f_ops[i].args)) {
+			while (*args) {
+				printf(" %s", *args++);
 			}
 		}
 		printf("\n");
 	}
 
 	printf("\nCOMMANDS:\n");
-	for (i = 0; i < ARRY_SIZE(cmds); i++ ) {
-		if(!cmds[i].show_in_usage) continue;
-		printf("  %s %s\n",cmds[i].name,
-		       cmds[i].args ? cmds[i].args : "" );
+	for (i = 0; i < ARRY_SIZE(cmds); i++) {
+		if (!cmds[i].show_in_usage)
+			continue;
+		printf("  %s %s\n", cmds[i].name,
+		       cmds[i].args ? cmds[i].args : "");
 	}
 
 	exit(0);
 }
 
-void cleanup(void)
+struct format *parse_format(char **argv, int argc, int *ai)
 {
-	if(drbd_fd == -1) {
-		dt_release_lockfile_dev_name(drbd_dev_name);
-	} else {
-		dt_close_drbd_device(drbd_fd);
-	}
-}
+	struct format *cfg;
+	enum Known_Formats f;
 
-struct format* parse_format(char** argv, int argc, int* ai)
-{
-	struct format_ops* fmt = NULL;
-	struct format* fcfg;
-
-	int i;
-
-	if(argc < 1) {
-		fprintf(stderr,"Format identifier missing\n");
+	if (argc < 1) {
+		fprintf(stderr, "Format identifier missing\n");
 		exit(20);
 	}
 
-	for (i = 0; i < ARRY_SIZE(formats); i++ ) {
-		if( !strcmp(formats[i].name,argv[0]) ) {
-			fmt = formats+i;
+	for (f = Drbd_06; f < Drbd_Unknown; f++) {
+		if (!strcmp(f_ops[f].name, argv[0]))
 			break;
-		}
 	}
-	if(fmt == NULL) {
-		fprintf(stderr,"Unknown format '%s'.\n",argv[0]);
+	if (f == Drbd_Unknown) {
+		fprintf(stderr, "Unknown format '%s'.\n", argv[0]);
 		exit(20);
 	}
 
 	(*ai)++;
 
-	fcfg = malloc(fmt->conf_size + sizeof(void*) );
-	fcfg->ops = fmt;
-	fmt->parse(fcfg,argv+1,argc-1,ai);
+	cfg = calloc(1, sizeof(struct format));
+	cfg->ops = f_ops + f;
+	cfg->ops->parse(cfg, argv + 1, argc - 1, ai);
 
-	return fcfg;
+	return cfg;
 }
 
-int main(int argc, char** argv)
+int main(int argc, char **argv)
 {
-	struct meta_cmd* command = NULL;
-	struct format * fcfg;
-	int i,ai;
+	struct meta_cmd *command = NULL;
+	struct format *cfg;
+	int i, ai;
 
-	if ( (progname = strrchr(argv[0],'/')) ) {
+	if ((progname = strrchr(argv[0], '/'))) {
 		argv[0] = ++progname;
 	} else {
 		progname = argv[0];
 	}
 
-	if (argc < 4) print_usage();
+	if (argc < 4)
+		print_usage();
 
 	ai = 1;
-	drbd_dev_name=argv[ai++];
-	drbd_fd=dt_open_drbd_device(drbd_dev_name,1); /* Create the lock file. */
-	atexit(cleanup);
-	if(drbd_fd > -1) {
-		int fd2 = open(drbd_dev_name,O_RDWR);
-		/* I want to avoid DRBD specific ioctls here... */
-		if(fd2) {
-			fprintf(stderr,"Device '%s' is configured!\n",
+	drbd_dev_name = argv[ai++];
+	drbd_fd = dt_lock_open_drbd(drbd_dev_name, &lock_fd, 1);
+	if (drbd_fd > -1) {
+		/* avoid DRBD specific ioctls here...
+		 * If the device is _not_ configured, block device ioctls
+		 * should fail. So if we _can_ determine whether it is readonly,
+		 * it is configured; and we better not touch its meta data.
+		 */
+		int dummy_is_ro;
+		if (ioctl(drbd_fd, BLKROGET, &dummy_is_ro) == 0) {
+			fprintf(stderr, "Device '%s' is configured!\n",
 				drbd_dev_name);
 			exit(20);
 		}
-		close(fd2);
 	}
 
-	fcfg = parse_format(argv+ai, argc-ai, &ai);
+	/* implicit cfg = calloc */
+	cfg = parse_format(argv + ai, argc - ai, &ai);
 
-	for (i = 0; i < ARRY_SIZE(cmds); i++ ) {
-		if( !strcmp(cmds[i].name,argv[ai]) ) {
-			command = cmds+i;
+	if (ai >= argc) {
+		fprintf(stderr, "command missing\n");
+		exit(20);
+	}
+
+	for (i = 0; i < ARRY_SIZE(cmds); i++) {
+		if (!strcmp(cmds[i].name, argv[ai])) {
+			command = cmds + i;
 			break;
 		}
 	}
-	if(command == NULL) {
-		fprintf(stderr,"Unknown command '%s'.\n",argv[ai]);
+	if (command == NULL) {
+		fprintf(stderr, "Unknown command '%s'.\n", argv[ai]);
 		exit(20);
 	}
 	ai++;
 
-	return command->function(fcfg, argv+ai, argc-ai);
+	return command->function(cfg, argv + ai, argc - ai);
+	/* and if we want an explicit free,
+	 * this would be the place for it.
+	 * free(cfg->device_name), free(cfg) ...
+	 */
 }

Modified: trunk/user/drbdsetup.c
===================================================================
--- trunk/user/drbdsetup.c	2004-10-29 11:53:46 UTC (rev 1621)
+++ trunk/user/drbdsetup.c	2004-11-02 17:36:52 UTC (rev 1622)
@@ -365,7 +365,7 @@
 {
   int err,drbd_fd,version;
 
-  drbd_fd=dt_open_drbd_device(device,0);
+  drbd_fd = dt_lock_open_drbd(device,NULL,0);
 
   err=ioctl(drbd_fd,DRBD_IOCTL_GET_VERSION,&version);
   if(err)

Modified: trunk/user/drbdtool_common.c
===================================================================
--- trunk/user/drbdtool_common.c	2004-10-29 11:53:46 UTC (rev 1621)
+++ trunk/user/drbdtool_common.c	2004-11-02 17:36:52 UTC (rev 1622)
@@ -1,3 +1,5 @@
+#define _GNU_SOURCE
+
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/sysmacros.h>
@@ -6,7 +8,6 @@
 #include <errno.h>
 #include <signal.h>
 #include <stdio.h>
-#define _GNU_SOURCE
 #include <getopt.h>
 #include <stdlib.h>
 #include "drbdtool_common.h"
@@ -123,45 +124,70 @@
   return r << shift;
 }
 
-void create_lockfile_mm(int major, int minor)
+void alarm_handler(int signo)
+{ /* nothing. just interrupt F_SETLKW */ }
+
+/* it is implicitly unlocked when the process dies.
+ * but if you want to explicitly unlock it, just close it. */
+int unlock_fd(int fd)
 {
-  char lfname[40];
-  int fd,pid;
-  FILE* fi;
+	return close(fd);
+}
 
-  snprintf(lfname,39,"/var/lock/drbd-%d-%d.pid",major,minor);
+int get_fd_lockfile_timeout(const char *path, int seconds)
+{
+    int fd, err;
+    struct sigaction sa,so;
+    struct flock fl = {
+	.l_type = F_WRLCK,
+	.l_whence = 0,
+	.l_start = 0,
+	.l_len = 0
+    };
 
-  while ( (fd = open(lfname,O_CREAT|O_EXCL|O_WRONLY,00644)) == -1 )
-    {
-      fd = open(lfname,O_RDONLY);
-      if(fd == -1 )
-	{
-	  PERROR("Creation and open(,O_RDONLY) of lockfile failed");
-	  exit(20);
-	}
-      fi = fdopen(fd,"r");
-      fscanf(fi,"%d",&pid);
-      fclose(fi);
-      errno = 0;
-      kill(pid,0);
-      if(errno == ESRCH) {
-	fprintf(stderr,"Stale lock file found and removed.\n");
-	remove(lfname);
-      } else {
-	fprintf(stderr,"A drbd tool with pid %d has the device locked.\n",pid);
-	exit(20);
-      }
+    if ((fd = open(path, O_RDWR | O_CREAT, 0600)) < 0) {
+	fprintf(stderr,"open(%s): %m\n",path);
+	return -1;
     }
 
-  fi = fdopen(fd,"w");
-  fprintf(fi,"%d\n",getpid());
-  fclose(fi);
+    if (seconds) {
+	sa.sa_handler=alarm_handler;
+	sigemptyset(&sa.sa_mask);
+	sa.sa_flags=0;
+	sigaction(SIGALRM,&sa,&so);
+	alarm(seconds);
+	err = fcntl(fd,F_SETLKW,&fl);
+	if (err) err = errno;
+	alarm(0);
+	sigaction(SIGALRM,&so,NULL);
+    } else {
+	err = fcntl(fd,F_SETLK,&fl);
+	if (err) err = errno;
+    }
+
+    if (!err) return fd;
+
+    if (err != EINTR && err != EAGAIN) {
+	close(fd);
+	errno = err;
+	fprintf(stderr,"fcntl(%s,...): %m\n", path);
+	return -1;
+    }
+
+    /* do we want to know this? */
+    if (!fcntl(fd,F_GETLK,&fl)) {
+	fprintf(stderr,"lock on %s currently held by pid:%u\n",
+		path, fl.l_pid);
+    }
+    close(fd);
+    return -1;
 }
 
-int dt_open_drbd_device(const char* device,int open_may_fail)
+int dt_lock_open_drbd(const char* device, int *lock_fd, int open_may_fail)
 {
-  int drbd_fd,err;
+  int drbd_fd, lfd, err;
   struct stat drbd_stat;
+  char lfname[40];
 
   drbd_fd=open(device,O_RDONLY);
   if(drbd_fd==-1 && !open_may_fail)
@@ -182,51 +208,34 @@
       exit(20);
     }
 
-  create_lockfile_mm(major(drbd_stat.st_rdev),minor(drbd_stat.st_rdev));
+  /* THINK.
+   * maybe we should also place a fcntl lock on the
+   * _physical_device_ we open later...
+   *
+   * This lock is to prevent a drbd minor from being configured
+   * by drbdsetup while drbdmeta is about to mess with its meta data.
+   *
+   * If you happen to mess with the meta data of one device,
+   * pretending it belongs to an other, you'll screw up completely.
+   *
+   * We should store something in the meta data to detect such abuses.
+   */
 
-  return drbd_fd;
-}
-
-void dt_release_lockfile(int drbd_fd)
-{
-  int err;
-  struct stat drbd_stat;
-  char lfname[40];
-
-  err=fstat(drbd_fd, &drbd_stat);
-  if(err)
-    {
-      PERROR("fstat() failed");
-      exit(20);
-    }
-
-  snprintf(lfname,39,"/var/lock/drbd-%d-%d.pid",
+  snprintf(lfname,39,"/var/lock/drbd-%d-%d",
 	   major(drbd_stat.st_rdev),minor(drbd_stat.st_rdev));
 
-  remove(lfname);
-}
+  lfd = get_fd_lockfile_timeout(lfname,1);
+  if (lfd < 0)
+	exit(20);
+  if (lock_fd) *lock_fd = lfd;
 
-void dt_release_lockfile_dev_name(const char* device)
-{
-  int err;
-  struct stat drbd_stat;
-  char lfname[40];
-
-  err=stat(device, &drbd_stat);
-  if(err)
-    {
-      PERROR("stat() failed");
-      exit(20);
-    }
-
-  snprintf(lfname,39,"/var/lock/drbd-%d-%d.pid",
-	   major(drbd_stat.st_rdev),minor(drbd_stat.st_rdev));
-
-  remove(lfname);
+  return drbd_fd;
 }
 
-int dt_close_drbd_device(int drbd_fd)
+int dt_close_drbd_unlock(int drbd_fd, int lock_fd)
 {
-  dt_release_lockfile(drbd_fd);
-  return close(drbd_fd);
+  int err = 0;
+  if (drbd_fd >= 0) err = close(drbd_fd);
+  if (lock_fd >= 0) unlock_fd(lock_fd); /* ignore errors */
+  return err;
 }

Modified: trunk/user/drbdtool_common.h
===================================================================
--- trunk/user/drbdtool_common.h	2004-10-29 11:53:46 UTC (rev 1621)
+++ trunk/user/drbdtool_common.h	2004-11-02 17:36:52 UTC (rev 1622)
@@ -3,15 +3,17 @@
 
 #define ARRY_SIZE(A) (sizeof(A)/sizeof(A[0]))
 
+/*
 #define PERROR(fmt, args...) \
 do { fprintf(stderr,fmt ": " , ##args); perror(0); } while (0)
+*/
+#define PERROR(fmt, args...) fprintf(stderr, fmt ": %m\n" , ##args);
 
 struct option;
 
+extern int dt_lock_open_drbd(const char* device, int *lock_fd, int open_may_fail);
+extern int dt_close_drbd_unlock(int drbd_fd, int lock_fd);
 extern void dt_release_lockfile(int drbd_fd);
-extern void dt_release_lockfile_dev_name(const char* device);
-extern int dt_open_drbd_device(const char* device,int open_may_fail);
-extern int dt_close_drbd_device(int drbd_fd);
 extern unsigned long long m_strtoll(const char* s,const char def_unit);
 const char* make_optstring(struct option *options, char startc);
 char* ppsize(char* buf, size_t size);



More information about the drbd-cvs mailing list