[DRBD-cvs] r1436 - in trunk: drbd testing testing/CTH testing/CTH/LGE_CTH

drbd-user@lists.linbit.com drbd-user@lists.linbit.com
Thu, 15 Jul 2004 20:28:33 +0200 (CEST)


Author: phil
Date: 2004-07-15 20:28:32 +0200 (Thu, 15 Jul 2004)
New Revision: 1436

Modified:
   trunk/drbd/drbd_actlog.c
   trunk/drbd/drbd_bitmap.c
   trunk/drbd/drbd_int.h
   trunk/drbd/drbd_main.c
   trunk/drbd/drbd_receiver.c
   trunk/drbd/drbd_worker.c
   trunk/testing/CTH/CTH_bash.helpers
   trunk/testing/CTH/CTH_bash.sh
   trunk/testing/CTH/LGE_CTH.pm
   trunk/testing/CTH/LGE_CTH/DRBD_Resource.pm
   trunk/testing/CTH/LGE_CTH/FileSystem.pm
   trunk/testing/CTH/LGE_CTH/Node.pm
   trunk/testing/CTH/T-001.sh
   trunk/testing/CTH/T-002.sh
   trunk/testing/CTH/T-003.sh
   trunk/testing/CTH/T-004.sh
   trunk/testing/CTH/T-005.sh
   trunk/testing/CTH/T-006.sh
   trunk/testing/CTH/T-007.sh
   trunk/testing/CTH/functions.sh
   trunk/testing/access_and_verify.c
   trunk/testing/uml-screen-debug
Log:
[patch by LGE]
* s/WriteHint/UnplugRemote/g
* new module parameter major_nr to allow "arbitrary" major numbers
  * adjusted CTH to cope with that
* fix copy'n'paste and conversion errors in initial bitmap handshake
* warning "please upgrade me" if peer speaks (PRO_VERSION+1)
* drbd_set_in_sync and drbd_set_out_of_sync are now macros
  calling to __*, giving file and line information,
  to be able to easily track causes of "strange state"s there.
* rs_total is now != 0 only if we actually ARE syncing.
  it is reset 
  * when sync is done
  * when connection is lost
  * when storage is lost on either node
  this way we can optimize and call drbd_set_in_sync only if rs_total != 0
  (and it feels somewhat more clean, too)
* makefile adjusted to recognize svn revision and date tags
* updates and fixes to the test helpers and bash test cases


Modified: trunk/drbd/drbd_actlog.c
===================================================================
--- trunk/drbd/drbd_actlog.c	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/drbd/drbd_actlog.c	2004-07-15 18:28:32 UTC (rev 1436)
@@ -671,15 +671,19 @@
  * called by worker on SyncTarget and receiver on SyncSource.
  *
  */
-void drbd_set_in_sync(drbd_dev* mdev, sector_t sector, int size)
+void __drbd_set_in_sync(drbd_dev* mdev, sector_t sector, int size, const char* file, const unsigned int line)
 {
 	/* Is called from worker and receiver context _only_ */
 	unsigned long sbnr,ebnr,lbnr,bnr;
 	unsigned long count = 0;
 	sector_t esector, nr_sectors;
+	int strange_state;
 
-	if (mdev->cstate < Connected || test_bit(DISKLESS,&mdev->flags)) {
-		ERR("%s:%d: %s flags=0x%02lx\n", __FILE__ , __LINE__ ,
+	strange_state = (mdev->cstate <= Connected) ||
+	                test_bit(DISKLESS,&mdev->flags) ||
+	                test_bit(PARTNER_DISKLESS,&mdev->flags);
+	if (strange_state) {
+		ERR("%s:%d: %s flags=0x%02lx\n", file , line ,
 				cstate_to_name(mdev->cstate), mdev->flags);
 	}
 
@@ -745,13 +749,18 @@
  * called by tl_clear and drbd_send_dblock (==drbd_make_request).
  * so this can be _any_ process.
  */
-void drbd_set_out_of_sync(drbd_dev* mdev, sector_t sector, int size)
+void __drbd_set_out_of_sync(drbd_dev* mdev, sector_t sector, int size, const char* file, const unsigned int line)
 {
 	unsigned long sbnr,ebnr,lbnr,bnr;
 	sector_t esector, nr_sectors;
+	int strange_state;
 
-	if (mdev->cstate >= Connected) {
-		ERR("%s:%d: %s flags=0x%02lx\n", __FILE__ , __LINE__ ,
+	strange_state = ( mdev->cstate  > Connected ) ||
+	                ( mdev->cstate == Connected &&
+	                 !(test_bit(DISKLESS,&mdev->flags) ||
+	                   test_bit(PARTNER_DISKLESS,&mdev->flags)) );
+	if (strange_state) {
+		ERR("%s:%d: %s flags=0x%02lx\n", file , line ,
 				cstate_to_name(mdev->cstate), mdev->flags);
 	}
 

Modified: trunk/drbd/drbd_bitmap.c
===================================================================
--- trunk/drbd/drbd_bitmap.c	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/drbd/drbd_bitmap.c	2004-07-15 18:28:32 UTC (rev 1436)
@@ -723,8 +723,16 @@
 	int i;
 	D_BUG_ON(!(b && b->bm));
 
-	if (mdev->cstate >= Connected)
+/*
+ * only called from drbd_set_out_of_sync.
+ * strange_state blubber is already in place there...
+	strange_state = ( mdev->cstate  > Connected ) ||
+	                ( mdev->cstate == Connected &&
+	                 !(test_bit(DISKLESS,&mdev->flags) ||
+	                   test_bit(PARTNER_DISKLESS,&mdev->flags)) );
+	if (strange_state)
 		ERR("%s in drbd_bm_set_bit\n", cstate_to_name(mdev->cstate));
+*/
 
 	spin_lock_irq(&b->bm_lock);
 	BM_PARANOIA_CHECK();
@@ -761,10 +769,13 @@
 	}
 	spin_unlock_irq(&b->bm_lock);
 
-	/* clearing bits should only take place when sync is in progress! */
+	/* clearing bits should only take place when sync is in progress!
+	 * this is only called from drbd_set_in_sync.
+	 * strange_state blubber is already in place there ...
 	if (i && mdev->cstate <= Connected)
 		ERR("drbd_bm_clear_bit: cleared a bitnr=%lu while %s\n",
 				bitnr, cstate_to_name(mdev->cstate));
+	 */
 
 	return i;
 }

Modified: trunk/drbd/drbd_int.h
===================================================================
--- trunk/drbd/drbd_int.h	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/drbd/drbd_int.h	2004-07-15 18:28:32 UTC (rev 1436)
@@ -42,6 +42,7 @@
 // module parameter, defined in drbd_main.c
 extern int minor_count;
 extern int disable_io_hints;
+extern int major_nr;
 
 /* Using the major_nr of the network block device
    prevents us from deadlocking with no request entries
@@ -61,16 +62,20 @@
 # define DEVICE_NR(device) (MINOR(device))
 # define LOCAL_END_REQUEST
 # include <linux/blk.h>
-# define DRBD_MAJOR NBD_MAJOR
+# define DRBD_MAJOR major_nr
 #else
 # include <linux/blkdev.h>
 # include <linux/bio.h>
-# define MAJOR_NR NBD_MAJOR
+# define MAJOR_NR major_nr
 #endif
 
 #undef DEVICE_NAME
 #define DEVICE_NAME "drbd"
 #define DEVFS_NAME "nbd"    // This make sense as long as we are MAJOR 43
+/* FIXME we are no longer strictly MAJOR 43.
+ * so, should this too become "configurable" ?
+ * or shall we fix it to "drbd" ?
+ */
 
 // XXX do we need this?
 #ifndef TRUE
@@ -93,16 +98,11 @@
  * Cannot use SIGTERM nor SIGKILL, since these
  * are sent out by init on runlevel changes
  * I choose SIGHUP for now.
+ *
+ * FIXME btw, we should register some reboot notifier.
  */
 #define DRBD_SIGKILL SIGHUP
 
-/* To temporarily block signals during network operations.
- * as long as we send directly from make_request, I'd like to
- * allow KILL, so the user can kill -9 hanging write processes.
- * If it does not succeed, it _should_ timeout anyways, but...
- */
-#define DRBD_SHUTDOWNSIGMASK sigmask(DRBD_SIG)|sigmask(DRBD_SIGKILL)
-
 #define ID_SYNCER (-1LL)
 #define ID_VACANT 0     // All EEs on the free list should have this value
                         // freshly allocated EEs get !ID_VACANT (== 1)
@@ -341,7 +341,7 @@
 	ReportBitMap,
 	BecomeSyncTarget,
 	BecomeSyncSource,
-	WriteHint,     // Used in protocol C to hint the secondary to hurry up
+	UnplugRemote,  // Used at various times to hint the peer to hurry up
 	DataRequest,   // Used to ask for a data block
 	RSDataRequest, // Used to ask for a data block
 	SyncParam,
@@ -376,7 +376,7 @@
 		[ReportBitMap]     = "ReportBitMap",
 		[BecomeSyncTarget] = "BecomeSyncTarget",
 		[BecomeSyncSource] = "BecomeSyncSource",
-		[WriteHint]        = "WriteHint",
+		[UnplugRemote]     = "UnplugRemote",
 		[DataRequest]      = "DataRequest",
 		[RSDataRequest]    = "RSDataRequest",
 		[SyncParam]        = "SyncParam",
@@ -420,7 +420,7 @@
  *   PingAck
  *   BecomeSyncTarget
  *   BecomeSyncSource
- *   WriteHint
+ *   UnplugRemote
  */
 
 /*
@@ -649,7 +649,7 @@
 	STOP_SYNC_TIMER,	// tell timer to cancel itself
 	DO_NOT_INC_CONCNT,	// well, don't ...
 	UNPLUG_QUEUED,		// only relevant with kernel 2.4
-	UNPLUG_REMOTE,		// whether sending a "WriteHint" makes sense
+	UNPLUG_REMOTE,		// whether sending a "UnplugRemote" makes sense
 	DISKLESS,		// no local disk
 	PARTNER_DISKLESS,	// partner has no storage
 	PARTNER_CONSISTENT,	// partner has consistent data
@@ -1032,8 +1032,12 @@
 extern int drbd_rs_begin_io(struct Drbd_Conf *mdev, sector_t sector);
 extern void drbd_rs_cancel_all(drbd_dev* mdev);
 extern void drbd_al_read_log(struct Drbd_Conf *mdev);
-extern void drbd_set_in_sync(drbd_dev* mdev, sector_t sector,int blk_size);
-extern void drbd_set_out_of_sync(drbd_dev* mdev, sector_t sector,int blk_size);
+extern void __drbd_set_in_sync(drbd_dev* mdev, sector_t sector, int size, const char* file, const unsigned int line);
+#define drbd_set_in_sync(mdev,sector,size) \
+	__drbd_set_in_sync(mdev,sector,size, __FILE__, __LINE__ )
+extern void __drbd_set_out_of_sync(drbd_dev* mdev, sector_t sector, int size, const char* file, const unsigned int line);
+#define drbd_set_out_of_sync(mdev,sector,size) \
+	__drbd_set_out_of_sync(mdev,sector,size, __FILE__, __LINE__ )
 extern void drbd_al_apply_to_bm(struct Drbd_Conf *mdev);
 extern void drbd_al_to_on_disk_bm(struct Drbd_Conf *mdev);
 extern void drbd_al_shrink(struct Drbd_Conf *mdev);
@@ -1382,7 +1386,7 @@
 	case PingAck:
 	case BecomeSyncTarget:
 	case BecomeSyncSource:
-	case WriteHint:
+	case UnplugRemote:
 
 	case SyncParam:
 	case ReportParams:

Modified: trunk/drbd/drbd_main.c
===================================================================
--- trunk/drbd/drbd_main.c	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/drbd/drbd_main.c	2004-07-15 18:28:32 UTC (rev 1436)
@@ -90,9 +90,11 @@
 MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, Lars Ellenberg <lars@linbit.com>");
 MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
 MODULE_LICENSE("GPL");
+MODULE_PARM_DESC(major_nr, "Major nr to use -- default 43 (NBD_MAJOR)");
 MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (1-255)");
 MODULE_PARM_DESC(disable_io_hints, "Necessary if the loopback network device is used for DRBD" );
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+MODULE_PARM(major_nr,"i");
 MODULE_PARM(minor_count,"i");
 MODULE_PARM(disable_io_hints,"i");
 #else
@@ -108,14 +110,16 @@
  */
 
 /* thanks to these macros, if compiled into the kernel (not-module),
- * these become boot parameters: drbd.minor_count and
+ * these become boot parameters: drbd.major_nr, drbd.minor_count and
  * drbd.disable_io_hints
  */
+module_param(major_nr,        int,0);
 module_param(minor_count,     int,0);
 module_param(disable_io_hints,int,0);
 #endif
 
 // module parameter, defined
+int major_nr = NBD_MAJOR;
 #ifdef MODULE
 int minor_count = 2;
 #else
@@ -369,6 +373,7 @@
 	if(mdev->cstate > Connected ) {
 		WARN("Resync aborted.\n");
 		set_cstate(mdev,Connected);
+		mdev->rs_total = 0;
 	}
 	if ( wait_event_interruptible_timeout(mdev->cstate_wait,
 		     atomic_read(&mdev->local_cnt) == 0 , HZ ) <= 0) {
@@ -806,12 +811,6 @@
 	p.block_id = e->block_id;
 	p.blksize  = cpu_to_be32(drbd_ee_get_size(e));
 
-	// YES, this happens. There is some race with the syncer!
-	if ((unsigned long)e->block_id <= 1) {
-		ERR("%s: e->block_id == %lx\n",__func__,(long)e->block_id);
-		return FALSE;
-	}
-
 	if (!mdev->meta.socket || mdev->cstate < Connected) return FALSE;
 	ok = drbd_send_cmd(mdev,mdev->meta.socket,cmd,(Drbd_Header*)&p,sizeof(p));
 	return ok;
@@ -1635,6 +1634,17 @@
 		return -EINVAL;
 	}
 
+	/* FIXME maybe allow only certain ranges? */
+	if (1 > major_nr||major_nr > 254) {
+		printk(KERN_ERR DEVICE_NAME
+			": invalid major_nr (%d)\n",major_nr);
+#ifdef MODULE
+		return -EINVAL;
+#else
+		major_nr = NBD_MAJOR;
+#endif
+	}
+
 	if (1 > minor_count||minor_count > 255) {
 		printk(KERN_ERR DEVICE_NAME
 			": invalid minor_count (%d)\n",minor_count);
@@ -1794,6 +1804,7 @@
 	printk(KERN_INFO DEVICE_NAME ": initialised. "
 	       "Version: " REL_VERSION " (api:%d/proto:%d)\n",
 	       API_VERSION,PRO_VERSION);
+	printk(KERN_INFO DEVICE_NAME": registered as block device major %d\n", major_nr);
 
 	return 0; // Success!
 

Modified: trunk/drbd/drbd_receiver.c
===================================================================
--- trunk/drbd/drbd_receiver.c	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/drbd/drbd_receiver.c	2004-07-15 18:28:32 UTC (rev 1436)
@@ -881,8 +881,17 @@
 	int ok;
 
 	drbd_rs_complete_io(mdev,sector); // before set_in_sync() !
-	if(likely(drbd_bio_uptodate(&e->private_bio))) {
-		drbd_set_in_sync(mdev, sector, drbd_ee_get_size(e));
+	if (likely( drbd_bio_uptodate(&e->private_bio) )) {
+		ok = !test_bit(DISKLESS,&mdev->flags) &&
+		     !test_bit(PARTNER_DISKLESS,&mdev->flags);
+		if (likely( ok )) {
+			drbd_set_in_sync(mdev, sector, drbd_ee_get_size(e));
+			/* THINK maybe don't send ack either
+			 * when we are suddenly diskless?
+			 * Dropping it here should do no harm,
+			 * since peer has no structs referencing this.
+			 */
+		}
 		ok = drbd_send_ack(mdev,WriteAck,e);
 	} else {
 		ok = drbd_send_ack(mdev,NegAck,e);
@@ -1004,10 +1013,14 @@
 	if(mdev->conf.wire_protocol == DRBD_PROT_C) {
 		if(likely(drbd_bio_uptodate(&e->private_bio))) {
 			ok=drbd_send_ack(mdev,WriteAck,e);
-			if(ok && mdev->rs_total) drbd_set_in_sync(mdev,sector,drbd_ee_get_size(e));
+			if (ok && mdev->rs_total)
+				drbd_set_in_sync(mdev,sector,drbd_ee_get_size(e));
 		} else {
 			ok = drbd_send_ack(mdev,NegAck,e);
 			ok&= drbd_io_error(mdev);
+			/* we expect it to be marked out of sync anyways...
+			 * maybe assert this?
+			 */
 		}
 		dec_unacked(mdev,HERE);
 
@@ -1281,7 +1294,7 @@
 		}
 	} else {
 		set_cstate(mdev,Connected);
-		if(mdev->rs_total) {
+		if(drbd_bm_total_weight(mdev)) {
 			if (drbd_md_test_flag(mdev,MDF_Consistent)) {
 				/* We are not going to do a resync but there
 				   are marks in the bitmap.
@@ -1290,7 +1303,7 @@
 				   Clean the bitmap...
 				 */
 				INFO("No resync -> clearing bit map.\n");
-				drbd_bm_set_all(mdev);
+				drbd_bm_clear_all(mdev);
 				drbd_bm_write(mdev);
 			} else {
 				WARN("I am inconsistent, but there is no sync? BOTH nodes inconsistent!\n");
@@ -1450,6 +1463,7 @@
 			 * on the next _drbd_send_bitmap this will be done.
 			 */
 			WARN("PARTNER DISKLESS\n");
+			mdev->rs_total = 0;
 		}
 		if(mdev->cstate >= Connected ) {
 			if(mdev->state == Primary) tl_clear(mdev);
@@ -1465,8 +1479,10 @@
 			set_cstate(mdev,Connected);
 		}
 	} else {
-		if (test_and_clear_bit(PARTNER_DISKLESS, &mdev->flags))
+		if (test_and_clear_bit(PARTNER_DISKLESS, &mdev->flags)) {
 			WARN("Partner no longer diskless\n");
+			D_ASSERT(consider_sync);
+		}
 	}
 
 	if (be32_to_cpu(p->gen_cnt[Flags]) & MDF_Consistent) {
@@ -1649,7 +1665,7 @@
 	return TRUE; // cannot fail ?
 }
 
-STATIC int receive_WriteHint(drbd_dev *mdev, Drbd_Header *h)
+STATIC int receive_UnplugRemote(drbd_dev *mdev, Drbd_Header *h)
 {
 	if (!test_bit(DISKLESS,&mdev->flags)) drbd_kick_lo(mdev);
 	return TRUE; // cannot fail.
@@ -1671,7 +1687,7 @@
 	[PingAck]          = NULL, //receive_PingAck,
 	[BecomeSyncTarget] = receive_BecomeSyncTarget,
 	[BecomeSyncSource] = receive_BecomeSyncSource,
-	[WriteHint]        = receive_WriteHint,
+	[UnplugRemote]     = receive_UnplugRemote,
 	[DataRequest]      = receive_DataRequest,
 	[RSDataRequest]    = receive_DataRequest, //receive_RSDataRequest,
 	[SyncParam]        = receive_SyncParam,
@@ -1767,6 +1783,7 @@
 	D_ASSERT(list_empty(&mdev->done_ee)); // done here
 
 	mdev->epoch_size=0;
+	mdev->rs_total=0;
 
 	if(atomic_read(&mdev->unacked_cnt)) {
 		ERR("unacked_cnt = %d\n",atomic_read(&mdev->unacked_cnt));
@@ -1888,6 +1905,11 @@
 
 	if ( p->protocol_version == PRO_VERSION ||
 	     p->protocol_version == (PRO_VERSION+1) ) {
+		if (p->protocol_version == (PRO_VERSION+1)) {
+			WARN( "You should upgrade me! "
+			      "Peer wants protocol version: %u\n",
+			      p->protocol_version );
+		}
 		INFO( "Handshake successful: DRBD Protocol version %u\n",
 		      PRO_VERSION );
 	} /* else if ( p->protocol_version == (PRO_VERSION-1) ) {
@@ -1991,9 +2013,8 @@
 
 			drbd_end_req(req, RQ_DRBD_SENT, 1, sector);
 
-			/* TODO maybe optimize: don't do the set_in_sync
-			 * if not neccessary */
-			if(mdev->conf.wire_protocol == DRBD_PROT_C)
+			if (mdev->rs_total && 
+			    mdev->conf.wire_protocol == DRBD_PROT_C)
 				drbd_set_in_sync(mdev,sector,blksize);
 		}
 	}

Modified: trunk/drbd/drbd_worker.c
===================================================================
--- trunk/drbd/drbd_worker.c	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/drbd/drbd_worker.c	2004-07-15 18:28:32 UTC (rev 1436)
@@ -647,7 +647,7 @@
 {
 	if (cancel) return 1;
 	NOT_IN_26(clear_bit(UNPLUG_QUEUED,&mdev->flags));
-	return drbd_send_short_cmd(mdev,WriteHint);
+	return drbd_send_short_cmd(mdev,UnplugRemote);
 }
 
 STATIC void drbd_global_lock(void)

Modified: trunk/testing/CTH/CTH_bash.helpers
===================================================================
--- trunk/testing/CTH/CTH_bash.helpers	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/testing/CTH/CTH_bash.helpers	2004-07-15 18:28:32 UTC (rev 1436)
@@ -21,11 +21,16 @@
 	  H_Disk=(name Node real_dev usize)
 	Fmt_Link="\n#\t(%-8s %-15s %-6s)"
 	  H_Link=(Node ip nic)
-	Fmt_Drbd="(%-8s %-5s %6s %-8s %7s %2s\n#\t  %-8s %-15s %-24s %-8s\n#\t  %-8s %-15s %-24s %-8s)"
+	Fmt_Drbd="(%-8s %-5s %6s %-8s %7s %8s\n#\t  %-8s %-15s %-24s %-8s\n#\t  %-8s %-15s %-24s %-8s)"
 	  H_Drbd=(name minor port link usize sync_group
 		left left_ip left_real_dev left_disk
 		right right_ip right_real_dev right_disk
 		conf)
+	DRBD_MAJOR=43
+	DRBD_DEVNAME="nb"	# /dev/ nb X
+		#   ="nbd/"
+		#   ="drbd"
+		#   ="drbd/"
 }
 link_for_each_node()
 {
@@ -86,7 +91,7 @@
 	shift 6;  left=$1  left_ip=$2  left_real_dev=$3  left_disk=$4
 	shift 4; right=$1 right_ip=$2 right_real_dev=$3 right_disk=$4
 	   conf=$5
-	DEV=/dev/nb$minor
+	DEV=/dev/$DRBD_DEVNAME$minor
 }
 new_bdev()
 {
@@ -103,12 +108,12 @@
 		else
 			usize=""
 		fi
-		let ID_DISK++
+		let ++ID_DISK
 		disk="Disk_$ID_DISK"
 		eval "$disk=( $name $node  $real_dev \"${usize}\")"
 		bdev="$bdev $node $real_dev"
 	done
-	let ID_BDEV++
+	let ++ID_BDEV
 	eval "Bdev_$ID_BDEV=($bdev)"
 }	
 new_drbd()
@@ -132,10 +137,10 @@
 	node_to_env $left  ;  left_hostname=$hostname
 	node_to_env $right ; right_hostname=$hostname
 
-	let ID_DISK++
+	let ++ID_DISK
 	left_disk="Disk_$ID_DISK"
 	eval "$left_disk=( $name $left  $left_real_dev \"${usize:+$[usize+128*1024]}\" )"
-	let ID_DISK++
+	let ++ID_DISK
 	right_disk="Disk_$ID_DISK"
        	eval "$right_disk=( $name $right $right_real_dev \"${usize:+$[usize+128*1024]}\" )"
 
@@ -146,13 +151,13 @@
 		    protocol        C;
 		    incon-degr-cmd "reboot -f";
 		    on $left_hostname {
-		        device         /dev/nb$minor;
+		        device         /dev/$DRBD_DEVNAME$minor;
 		        disk           /dev/mapper/$name;
 		        address        $left_ip:$port;
 		        meta-disk      internal;
 		    }
 		    on $right_hostname {
-		        device         /dev/nb$minor;
+		        device         /dev/$DRBD_DEVNAME$minor;
 		        disk           /dev/mapper/$name;
 		        address        $right_ip:$port;
 		        meta-disk      internal;
@@ -180,7 +185,7 @@
 		___
 	)
 
-	let ID_DRBD++
+	let ++ID_DRBD
 	eval "Drbd_$ID_DRBD=(
 		$name
 		$minor
@@ -230,7 +235,7 @@
 	blocks=$( grep ^DEVICE_SIZE < md5sum-${!right}-$name | cut -f 2 )
 	echo "NOTE: internal meta data may differ..."
 	echo "md probably starts at blocknr $[blocks-(128<<8)]"
-	diff -u md5sum-${!left}-$name md5sum-${!right}-$name
+	diff -u md5sum-${!left}-$name md5sum-${!right}-$name || true
 }
 
 dd_single_block()
@@ -321,15 +326,19 @@
 	# reset and up all DRBDs
 	for d in ${!Drbd_*} ; do
 		drbd_to_env $d
-		echo "$conf" | on ${!left}:  drbd_append_config USIZE=$USIZE NAME=$name RES=$name LO_DEV=$left_real_dev
-		echo "$conf" | on ${!right}: drbd_append_config USIZE=$USIZE NAME=$name RES=$name LO_DEV=$right_real_dev
+		echo "$conf" | on ${!left}:  drbd_append_config USIZE=$USIZE \
+			NAME=$name RES=$name LO_DEV=$left_real_dev \
+			START_CLEAN=$DRBD_SKIP_INITIAL_SYNC
+		echo "$conf" | on ${!right}: drbd_append_config USIZE=$USIZE \
+			NAME=$name RES=$name LO_DEV=$right_real_dev \
+			START_CLEAN=$DRBD_SKIP_INITIAL_SYNC
 	done
 
 	# wait for connect and initial sync (which should be skipped automatically)
 	for d in ${!Drbd_*} ; do
 		drbd_to_env $d
-		on ${!left}:  drbd_wait_sync minor=$minor
-		# on ${!right}: drbd_wait_sync minor=$minor
+		on ${!left}:  drbd_wait_sync DEV=/dev/$DRBD_DEVNAME$minor
+		# on ${!right}: drbd_wait_sync DEV=/dev/$DRBD_DEVNAME$minor
 	done
 
 	# make file systems
@@ -353,7 +362,7 @@
 		on ${!n}: mkfs_$TYPE    DEV=$DEV
 		if [[ $BDEV == Drbd_* ]] ; then
 			on ${!n}: drbdadm_sec   name=$name
-			on ${!n}: drbd_wait_sync minor=$minor
+			on ${!n}: drbd_wait_sync DEV=/dev/$DRBD_DEVNAME$minor
 		fi
 	done
 ) }
@@ -362,7 +371,9 @@
 {
 	rs=$1 nn=$2
 	i=${rs#RS_}
-	which=${!rs}; [[ $which == *CN=* ]] && cn=${which#*CN=} || cn=''
+	which=${!rs};
+	: ${which:?no such resource $rs}
+	[[ $which == *CN=* ]] && cn=${which#*CN=} || cn=''
 	fs=FS_$i; CN=""
 	eval ${!fs}
 
@@ -394,14 +405,13 @@
 }
 
 Stop()
-{ ( set -e
+{
 	local rs=$1 i fs cn CN which
 	i=${rs#RS_}
 	which=${!rs}; [[ $which == *CN=* ]] && cn=${which#*CN=} || cn=''
 	which=${which%% *}
 	fs=FS_$i; CN=""
 	eval "${!fs}"
-
 	[[ -z $CN || -z $cn ]] && return 101
 	[[ $CN == $cn ]]       || return 102
 	[[ $cn == Node_* ]]    || return 103
@@ -418,7 +428,6 @@
 	else
 		: "do something to make $BDEV unavailable?"
 	fi
-) || return
 	eval "$fs=\${$fs% CN=*}"   # forget current node
 	eval "$rs=\${$rs% CN=*}"   # forget current node
 	Dump_RS

Modified: trunk/testing/CTH/CTH_bash.sh
===================================================================
--- trunk/testing/CTH/CTH_bash.sh	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/testing/CTH/CTH_bash.sh	2004-07-15 18:28:32 UTC (rev 1436)
@@ -28,6 +28,9 @@
 	clear_env
 	source $CONF              || return
 
+	: ${Node_1:?no Node_1 defined...}
+	: ${Node_2:?no Node_2 defined...}
+
 	# verify
 	# Dump_All
 	# exit 0
@@ -48,13 +51,12 @@
 	trap 'ex=$?; echo "exit_code: $ex"' ERR # show exit codes != 0
 	if [[ -e $CASE ]] ; then
 		echo "now run CASE=$CASE"
-		source $CASE || return
+		( set -e; source $CASE ) || return
 	fi
 
 	return
 }
 
-set +e
 if Run; then
 	cat <<-___
 	#--- $CASE ----

Modified: trunk/testing/CTH/LGE_CTH/DRBD_Resource.pm
===================================================================
--- trunk/testing/CTH/LGE_CTH/DRBD_Resource.pm	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/testing/CTH/LGE_CTH/DRBD_Resource.pm	2004-07-15 18:28:32 UTC (rev 1436)
@@ -42,7 +42,7 @@
 	} elsif ($node->{_busy} and $node->{_busy} !~ /^wait_sync/) {
 		warn "$node->{_id} busy: $node->{_busy}\n";
 	} else {
-		$cmd = "on $ip: drbd_wait_sync minor=$minor";
+		$cmd = "on $ip: drbd_wait_sync DEV=/dev/$DRBD_DEVNAME$minor";
 		$node->{_busy} = "wait_sync" unless $node->{_busy};
 		$node->{_busy} .= " $name ";
 		$LGE_CTH::FAILED += 1000;
@@ -109,7 +109,7 @@
 	return if $event ne 'heal';
 	return if $node->{_status}->{status} ne 'up';
 
-	$cmd = "on $ip: drbd_reattach minor=$minor name=$name";
+	$cmd = "on $ip: drbd_reattach DEV=/dev/$DRBD_DEVNAME$minor name=$name";
 	_spawn( "drbd_reattach $name on $hostname", $cmd, 'SYNC');
 	$me->wait_sync("attach");
 }
@@ -228,7 +228,7 @@
 		my $ip = $link->{_config}->{_nodes}->{$n->{node}->id}->{ip};
 		$s .= <<___ ;
     on $n->{node}->{_config}->{hostname} {
-        device         /dev/nb$c->{minor};
+        device         /dev/$DRBD_DEVNAME$c->{minor};
         disk           /dev/mapper/$c->{name};
         address        $ip:$n->{port};
         meta-disk      $n->{'meta-disk'}@{[ $n->{'meta-disk'} eq "internal" ? ";" : "[$n->{'meta-index'}];" ]}

Modified: trunk/testing/CTH/LGE_CTH/FileSystem.pm
===================================================================
--- trunk/testing/CTH/LGE_CTH/FileSystem.pm	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/testing/CTH/LGE_CTH/FileSystem.pm	2004-07-15 18:28:32 UTC (rev 1436)
@@ -41,7 +41,7 @@
 sub env {
 	my $me = shift;
 	my $minor = $me->{_config}->{bdev}->{_config}->{minor};
-	return "TYPE=$me->{_config}->{type} DEV=/dev/nb$minor MNT=$me->{_config}->{mount_point}";
+	return "TYPE=$me->{_config}->{type} DEV=/dev/$DRBD_DEVNAME$minor MNT=$me->{_config}->{mount_point}";
 }
 
 sub DRBD_Resource_changed {

Modified: trunk/testing/CTH/LGE_CTH/Node.pm
===================================================================
--- trunk/testing/CTH/LGE_CTH/Node.pm	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/testing/CTH/LGE_CTH/Node.pm	2004-07-15 18:28:32 UTC (rev 1436)
@@ -118,6 +118,7 @@
 	my $initial = $me->{_status}->{status} eq '__UNDEF__' ? "true" : "false";
 	my $have_drbd = scalar(grep { /^DRBD/ } keys %{$me->{_users}}) ? "true" : "false";
 	my $cmd = "ip=$ip\nhostname=$hostname\ntimeout=$timeout\ninitial=$initial\nhave_drbd=$have_drbd\n"
+		. "DRBD_MAJOR=$DRBD_MAJOR\nDRBD_DEVNAME=$DRBD_DEVNAME\nMINOR_COUNT=$MINOR_COUNT\n"
 		. $me->{_config}->{boot_script};
 
 	$me->{_busy} = "wait_for_boot";

Modified: trunk/testing/CTH/LGE_CTH.pm
===================================================================
--- trunk/testing/CTH/LGE_CTH.pm	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/testing/CTH/LGE_CTH.pm	2004-07-15 18:28:32 UTC (rev 1436)
@@ -9,7 +9,10 @@
 our @EXPORT = (qw{ Configure Run Log });
 our @EXPORT_OK = (qw{ mytimestr _spawn pid_is_pending kill_if_pending });
 our %EXPORT_TAGS = (
-	util => [qw{ Log mytimestr _spawn pid_is_pending kill_if_pending }], # for internal use only
+	util => [qw{
+		Log mytimestr _spawn pid_is_pending kill_if_pending
+		DRBD_DEVNAME DRBD_MAJOR MINOR_COUNT
+	}], # for internal use only
 ); 
 
 use POSIX;
@@ -36,6 +39,13 @@
 my  $clean_exit = 0;
 sub clean_exit { $clean_exit = 1; exit @_; };
 
+our $MINOR_COUNT =  4;
+our $DRBD_MAJOR  = 43;
+our $DRBD_DEVNAME = "nb";   # the part between /dev/ and the minor number.
+		# = "nbd/";
+		# = "drbd";
+		# = "drbd/";
+
 ##
 ## private
 ##

Modified: trunk/testing/CTH/T-001.sh
===================================================================
--- trunk/testing/CTH/T-001.sh	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/testing/CTH/T-001.sh	2004-07-15 18:28:32 UTC (rev 1436)
@@ -1,6 +1,9 @@
 #!/usr/bin/env - /bin/bash
 # $Id: T-001.sh,v 1.1.2.3 2004/06/01 09:36:55 lars Exp $
 
+: ${RS_1:?no RS_1 defined...}
+: ${RS_2:?no RS_2 defined...}
+
 echo "START"
 Start RS_1 Node_1
 Start RS_2 Node_2

Modified: trunk/testing/CTH/T-002.sh
===================================================================
--- trunk/testing/CTH/T-002.sh	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/testing/CTH/T-002.sh	2004-07-15 18:28:32 UTC (rev 1436)
@@ -6,6 +6,8 @@
 # does work.
 # 
 
+: ${RS_1:?no RS_1 defined...}
+
 Start RS_1 Node_1
 
 sleep 10
@@ -17,9 +19,9 @@
 sleep 5
 
 Heal_Disk Disk_1
-on $Node_1: drbd_reattach minor=0 name=r0
+on $Node_1: drbd_reattach DEV=/dev/${DRBD_DEVNAME}0 name=r0
 sleep 10
-on $Node_1: drbd_wait_sync minor=0
+on $Node_1: drbd_wait_sync DEV=/dev/${DRBD_DEVNAME}0
 
 Reloc RS_1 Node_1
 sleep 10

Modified: trunk/testing/CTH/T-003.sh
===================================================================
--- trunk/testing/CTH/T-003.sh	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/testing/CTH/T-003.sh	2004-07-15 18:28:32 UTC (rev 1436)
@@ -6,6 +6,8 @@
 # does work.
 #
 
+: ${RS_1:?no RS_1 defined...}
+
 Start RS_1 Node_1
 
 echo "FAIL Secondary DISK"
@@ -15,9 +17,9 @@
 echo "HEAL Secondary DISK"
 Heal_Disk Disk_2
 echo "REATTACH Secondary DISK"
-on $Node_2: drbd_reattach minor=0 name=r0
+on $Node_2: drbd_reattach DEV=/dev/${DRBD_DEVNAME}0 name=r0
 echo "WAIT_SYNC"
-on $Node_2: drbd_wait_sync minor=0
+on $Node_2: drbd_wait_sync DEV=/dev/${DRBD_DEVNAME}0
 
 Reloc RS_1 Node_2
 sleep 10

Modified: trunk/testing/CTH/T-004.sh
===================================================================
--- trunk/testing/CTH/T-004.sh	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/testing/CTH/T-004.sh	2004-07-15 18:28:32 UTC (rev 1436)
@@ -10,6 +10,9 @@
 #
 # does work.
 #
+
+: ${RS_1:?no RS_1 defined...}
+
 Start RS_1 Node_1
 
 sleep 2
@@ -27,11 +30,11 @@
 # bitmap handshake... currently it may even cause both nodes to hang!
 # # not yet. Heal_Disk Disk_1
 # # see what happens:
-# on $Node_1: drbd_reattach minor=0 name=r0
+# on $Node_1: drbd_reattach DEV=/dev/${DRBD_DEVNAME}0 name=r0
 
 # attaching a good disk *does* work
 Heal_Disk Disk_1
-on $Node_1: drbd_reattach minor=0 name=r0
+on $Node_1: drbd_reattach DEV=/dev/${DRBD_DEVNAME}0 name=r0
 
 sleep 4
 
@@ -43,7 +46,7 @@
 sleep 10
 
 Heal_Disk Disk_1
-on $Node_1: drbd_reattach minor=0 name=r0
-on $Node_1: drbd_wait_sync minor=0
+on $Node_1: drbd_reattach DEV=/dev/${DRBD_DEVNAME}0 name=r0
+on $Node_1: drbd_wait_sync DEV=/dev/${DRBD_DEVNAME}0
 
 Stop RS_1

Modified: trunk/testing/CTH/T-005.sh
===================================================================
--- trunk/testing/CTH/T-005.sh	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/testing/CTH/T-005.sh	2004-07-15 18:28:32 UTC (rev 1436)
@@ -6,12 +6,12 @@
 #
 # in a loop. does work.
 #
-# not exactly "real world" since actually we'd need a "invalidate",
-# because we put in a new and clean disk. 
-# but this tests shows how drbd behaves when the primary disk fails
+# this tests shows how drbd behaves when the Primary disk fails
 # and you configured "on-io-error Detach;"
 #
 
+: ${RS_1:?no RS_1 defined...}
+
 # start it.
 Start RS_1 Node_1
 sleeptime=30
@@ -26,12 +26,12 @@
 	sleep $sleeptime
 
 	Heal_Disk Disk_1
-	on $Node_1: drbd_reattach minor=0 name=r0
+	on $Node_1: drbd_reattach DEV=/dev/${DRBD_DEVNAME}0 name=r0
 	sleep $sleeptime
 	# now wait for sync,
 	# I don't want to bail out of the test early
 	# because I fail the only good copy of the data ...
-	on $Node_1: drbd_wait_sync minor=0
+	on $Node_1: drbd_wait_sync DEV=/dev/${DRBD_DEVNAME}0
 
 	# and reverse
 	
@@ -42,8 +42,8 @@
 	sleep $sleeptime
 
 	Heal_Disk Disk_2
-	on $Node_2: drbd_reattach minor=0 name=r0
+	on $Node_2: drbd_reattach DEV=/dev/${DRBD_DEVNAME}0 name=r0
 	sleep $sleeptime
-	on $Node_2: drbd_wait_sync minor=0
+	on $Node_2: drbd_wait_sync DEV=/dev/${DRBD_DEVNAME}0
 
 done

Modified: trunk/testing/CTH/T-006.sh
===================================================================
--- trunk/testing/CTH/T-006.sh	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/testing/CTH/T-006.sh	2004-07-15 18:28:32 UTC (rev 1436)
@@ -6,12 +6,12 @@
 #
 # in a loop. does work.
 #
-# not exactly "real world" since actually we'd need a "invalidate",
-# because we put in a new and clean disk. 
-# but this tests shows how drbd behaves when the primary disk fails
+# this tests shows how drbd behaves when the Secondary disk fails
 # and you configured "on-io-error Detach;"
 #
 
+: ${RS_1:?no RS_1 defined...}
+
 # start it.
 Start RS_1 Node_1
 sleeptime=30
@@ -23,12 +23,12 @@
 	sleep $sleeptime
 
 	Heal_Disk Disk_2
-	on $Node_2: drbd_reattach minor=0 name=r0
+	on $Node_2: drbd_reattach DEV=/dev/${DRBD_DEVNAME}0 name=r0
 	sleep $sleeptime
 	# now wait for sync,
 	# I don't want to bail out of the test early
 	# because I fail the only good copy of the data ...
-	on $Node_2: drbd_wait_sync minor=0
+	on $Node_2: drbd_wait_sync DEV=/dev/${DRBD_DEVNAME}0
 
 	Reloc RS_1 Node_2
 	sleep $sleeptime
@@ -39,9 +39,9 @@
 	sleep $sleeptime
 
 	Heal_Disk Disk_1
-	on $Node_1: drbd_reattach minor=0 name=r0
+	on $Node_1: drbd_reattach DEV=/dev/${DRBD_DEVNAME}0 name=r0
 	sleep $sleeptime
-	on $Node_1: drbd_wait_sync minor=0
+	on $Node_1: drbd_wait_sync DEV=/dev/${DRBD_DEVNAME}0
 
 	Reloc RS_1 Node_1
 	sleep $sleeptime

Modified: trunk/testing/CTH/T-007.sh
===================================================================
--- trunk/testing/CTH/T-007.sh	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/testing/CTH/T-007.sh	2004-07-15 18:28:32 UTC (rev 1436)
@@ -3,10 +3,13 @@
 
 #
 # Fail Link; Heal Link; wait for sync; Relocate service.
+# every 10th iteration, compare md5sums of lower level devices.
 #
 # in a loop. does work.
 #
 
+: ${RS_1:?no RS_1 defined...}
+
 sleeptime=30
 
 # start it.
@@ -21,7 +24,7 @@
 
 	Heal_Link Link_1
 	SECONDS=0
-	on $Node_1: drbd_wait_sync minor=0
+	on $Node_1: drbd_wait_sync DEV=/dev/${DRBD_DEVNAME}0
 	if (( sleeptime - SECONDS > 0)) ; then
 		sleep $(( sleeptime - SECONS ))
 	fi
@@ -34,7 +37,7 @@
 
 	Heal_Link Link_1
 	SECONDS=0
-	on $Node_2: drbd_wait_sync minor=0
+	on $Node_2: drbd_wait_sync DEV=/dev/${DRBD_DEVNAME}0
 	if (( sleeptime - SECONDS > 0)) ; then
 		sleep $(( sleeptime - SECONS ))
 	fi

Modified: trunk/testing/CTH/functions.sh
===================================================================
--- trunk/testing/CTH/functions.sh	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/testing/CTH/functions.sh	2004-07-15 18:28:32 UTC (rev 1436)
@@ -106,9 +106,12 @@
 do_initial_sanity_check()
 {
 	: ${hostname:?unknown hostname} 
+	: ${DRBD_MAJOR:?missing DRBD_MAJOR}
+	: ${DRBD_DEVNAME:?missing DRBD_DEVNAME}
+	: ${MINOR_COUNT:?missing MINOR_COUNT}
 	[[ `uname -n` == $hostname ]]
 	if [ -e /proc/drbd ] ; then
-		for d in `grep -o "^/dev/nb[^ ]\+" /proc/mounts` ; do
+		for d in `grep -o "^/dev/$DRBD_DEVNAME[^ ]\+" /proc/mounts` ; do
 			fuser -vmk $d || true
 			umount $d
 		done
@@ -120,7 +123,16 @@
 		fi
 	fi
 	> /etc/drbd-07.conf # no more drbd-07.conf hehehe...
-	[ -e /proc/drbd ] || modprobe drbd minor_count=4 || exit 1
+	ls_line=$(LANG= ls -l /dev/${DRBD_DEVNAME}0)
+	egrep_pat="^brw-..---- +1 root +root +$DRBD_MAJOR, +0 [A-Za-z0-9-: ]*/dev/${DRBD_DEVNAME}0\$"
+	if ! echo $ls_line | grep -E "$egrep_pat" ; then
+		echo "unexpected drbd device settings"
+		echo " $ls_line"
+		echo "    does not match"
+		echo "$egrep_pat"
+		exit 1
+	fi
+	[ -e /proc/drbd ] || modprobe drbd minor_count=$MINOR_COUNT major_nr=$DRBD_MAJOR || exit 1
 	echo "$hostname just forgot its configuration..."
 	# FIXME more paranoia
 }
@@ -148,7 +160,12 @@
 {
 	: ${ip:?unknown admin ip} 
 	: ${hostname:?unknown hostname} 
+	# You can override these in your config
+	: ${DRBD_MAJOR:=43}
+	: ${DRBD_DEVNAME:=nb}
+	: ${MINOR_COUNT:=4}
 
+
 	: ${initial:=false} ${have_drbd:=true}
 	[[ $initial   == true ]] || [[ $initial   == false ]] || return 1
 	[[ $have_drbd == true ]] || [[ $have_drbd == false ]] || return 1
@@ -171,13 +188,18 @@
 	while (( retry-- )) ; do
 		if $initial; then
 			if $have_drbd ; then
-				on $ip: do_initial_sanity_check hostname=$hostname && break
+				on $ip: do_initial_sanity_check \
+					hostname=$hostname \
+					DRBD_DEVNAME=$DRBD_DEVNAME \
+					DRBD_MAJOR=$DRBD_MAJOR \
+					MINOR_COUNT=$MINOR_COUNT && break
 			else
 				# fixme sanity check *no drbd*
-				on $ip: do_sanity_check         hostname=$hostname && break
+				on $ip: do_sanity_check \
+					hostname=$hostname && break
 			fi
 		else
-			on $ip: do_sanity_check         hostname=$hostname && break
+			on $ip: do_sanity_check hostname=$hostname && break
 		fi
 		echo "admin connect failed, retrying $retry times"
 		sleep 5
@@ -263,6 +285,9 @@
 	: ${RES:?unknown resource name}
 	: ${LO_DEV:?unknown lo level device}
 	: ${NAME:?unknown dm name}
+
+	: START_CLEAN=${START_CLEAN:+true}
+	: ${START_CLEAN:=false}
 	# : ${USIZE:?unknown device size} # TODO
 
 	# FIXME support external meta data
@@ -275,9 +300,19 @@
 	: ${USIZE:=$RSIZE}
 	(( USIZE <= RSIZE )) # assert USIZE <= RSIZE
 	let "MLOC=(USIZE & ~3) -128*1024"
-	echo -n "Wipeout GC and AL area on $HOSTNAME:$LO_DEV via /dev/mapper/$NAME for resource $RES"
-	# drbdadm down $RES
-	dd if=/dev/zero bs=4k seek=$[MLOC/4] count=$[128*256] of=/dev/mapper/$NAME
+	if $START_CLEAN ; then
+		perl -e 'print pack "N8", 0,0, 1,1,1,1,1, 0x8374026a;
+			 print "\x00" x (4096 - 32);
+			 # and, just to see how the bitmap behaves:
+			 print "\xff" x (4096 * 19);
+			' | dd bs=4k seek=$[MLOC/4] of=/dev/mapper/$NAME
+	else
+		echo "Wipeout GC and AL area on $HOSTNAME:$LO_DEV via /dev/mapper/$NAME for resource $RES"
+		# drbdadm down $RES
+		# dd if=/dev/zero bs=4k seek=$[MLOC/4] count=$[128*256] of=/dev/mapper/$NAME
+		# well, killing the first 80k of meta data should be enough for now...
+		dd if=/dev/zero bs=4k seek=$[MLOC/4] count=20 of=/dev/mapper/$NAME
+	fi
 	sync
 	echo .
 	drbdadm up $RES
@@ -303,9 +338,9 @@
 
 drbd_wait_sync()							# {{{3
 {
-	: ${minor:?unknown minor number} 
-	drbdsetup /dev/nb$minor wait_connect -d 0 -t 0
-	drbdsetup /dev/nb$minor wait_sync -t 0
+	: ${DEV:?unknown device name} 
+	drbdsetup $DEV wait_connect -d 0 -t 0
+	drbdsetup $DEV wait_sync -t 0
 	# cat /proc/drbd
 }
 
@@ -323,9 +358,9 @@
 
 drbd_reattach()								# {{{3
 {
-	: ${minor:?unknown minor number} 
+	: ${DEV:?unknown device name} 
 	: ${name:?unknown resource name} 
-	if drbdsetup /dev/nb$minor show | grep -q "^Lower device:.*null"; then
+	if drbdsetup $DEV show | grep -q "^Lower device:.*null"; then
 		# NO. drbdadm attach $name
 		# But rather:
 		drbdadm down $name
@@ -339,11 +374,6 @@
 	: ${name:?unknown resource name} 
 	: ${force:=}
 	drbdadm $force primary $name
-	# FIXME should not be neccessary!
-	# patch already done, needs to be checked in...
-	# if [[ $force ]] ; then
-	# 	drbdadm invalidate_remote $name || true
-	# fi
 	echo "$name now Primary on $HOSTNAME"
 }
 

Modified: trunk/testing/access_and_verify.c
===================================================================
--- trunk/testing/access_and_verify.c	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/testing/access_and_verify.c	2004-07-15 18:28:32 UTC (rev 1436)
@@ -22,6 +22,8 @@
 
  */
 
+#define _GNU_SOURCE /* want lseek64 to be declared */
+
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>
@@ -31,7 +33,6 @@
 #include <time.h>
 #include <linux/fs.h>
 #include <unistd.h>
-#define _GNU_SOURCE
 #include <getopt.h>
 #include <string.h>
 #include <signal.h>

Modified: trunk/testing/uml-screen-debug
===================================================================
--- trunk/testing/uml-screen-debug	2004-07-15 17:25:50 UTC (rev 1435)
+++ trunk/testing/uml-screen-debug	2004-07-15 18:28:32 UTC (rev 1436)
@@ -48,10 +48,12 @@
 # use -hub if you want to be able to tcpdump on the host.
 # uml_switch -hub -tap tap0 -unix /tmp/uml0.ctl
 # uml_switch -hub -tap tap1 -unix /tmp/uml1.ctl
-fuser /tmp/uml0.ctl &> /dev/null ||
+if ! ps x | grep -v grep | grep -q "uml_switch.* /tmp/uml0.ctl" ; then
 	uml_switch -tap tap0 -unix /tmp/uml0.ctl </dev/null &> /dev/null &
-fuser /tmp/uml1.ctl &> /dev/null ||
+fi
+if ! ps x | grep -v grep | grep -q "uml_switch.* /tmp/uml1.ctl" ; then
 	uml_switch -tap tap1 -unix /tmp/uml1.ctl </dev/null &> /dev/null &
+fi
 
 #
 # now, start both umls.
@@ -60,9 +62,11 @@
 # this starts a SCREEN and an xterm attached to that screen.
 start_uml() {
 	local XPID SPID STY UMLPID LOGDIR COMMAND NAME=$1 EXTRA_ARG=$2
-	local i fg bg
+	local fg bg ubd0
 
-	fuser -v ~/.uml/$NAME/mconsole 2>/dev/null && {
+	ubd0=UBDs_$i; ubd0=${!ubd0}; ubd0=${ubd0#*ubd0=}; ubd0=${ubd0%% *}
+
+	fuser -v ${ubd0:-~/.uml/$NAME/mconsole} && {
 		echo >&2 "$NAME seems to be still running, stop it first!"
 		return 1
 	}
@@ -70,24 +74,28 @@
 	LOGDIR="/tmp/$(date +%Ft%H%M).$NAME"
 	mkdir -p "$LOGDIR"
 	cd "$LOGDIR"
+	echo "LOGDIR=$LOGDIR"
 
 	COMMAND="sleep 1; screen -t \"console ($NAME)\" $LINUX $CMDLINE umid=$NAME $EXTRA_ARG
 		echo 'waiting for uml to come up, then attaching gdb and mconsole...'
-		sleep 2;
+
+		sleep 2
+		screen -X caption always
+		screen -X bindkey -k k1 select 4 # F1 -> vc/1
+		screen -X bindkey -k k2 select 5 # F2 -> vc/2
+		screen -X bindkey -k k3 select 1 # F3 -> console
+		screen -X bindkey -k k4 select 0 # F4 -> gdb
+		screen -X bindkey -k k5 select 2 # F5 -> mconsole
+		screen -X bindkey -k k6 select 3 # F6 -> console log
+
 		screen -t '$NAME mconsole' uml_mconsole $NAME
+		screen -t 'less +F $NAME console' -- less +F screenlog.1
 		screen -X title gdb; exec gdb $LINUX \$(<~/.uml/$NAME/pid)"
-	screen -e ^^^^ -S "$NAME" -L -m -D -t $NAME -- bash -c "$COMMAND" &
+	echo "LINUX: $LINUX $CMDLINE umid=$NAME $EXTRA_ARG"
+	screen -fn -e ^^^^ -S "$NAME" -L -m -D -t $NAME -- bash -c "$COMMAND" &
 	SPID=$!
 	
 	export STY="$SPID.$NAME"
-	screen -X caption always
-   
-	# quote "select", it confuses syntax highlighting ...
-	screen -X bindkey -k k1 "select" 3 # F1 -> vc/1
-	screen -X bindkey -k k2 "select" 4 # F2 -> vc/2
-	screen -X bindkey -k k3 "select" 1 # F3 -> console
-	screen -X bindkey -k k4 "select" 0 # F4 -> gdb
-	screen -X bindkey -k k5 "select" 2 # F5 -> mconsole
 
 	(
 		sleep 4
@@ -96,22 +104,22 @@
 handle SIGSEGV pass nostop noprint
 handle SIGUSR1 pass nostop noprint
 continue
-
 "
 			screen -X -p gdb paste g
-			(
-				# when this uml has died, there is no more mconsole.
-				# reap gdb again.
-				while test -e ~/.uml/$NAME/mconsole ; do sleep 5 ; done 
-				screen -X register g $'quit\n'; screen -X -p gdb paste g
-				screen -X -p 2 paste g  # quit mconsole
-			) &
+			#(
+			#	# when this uml has died, there is no more mconsole.
+			#	# reap gdb again.
+			#	while test -e ~/.uml/$NAME/mconsole ; do sleep 5 ; done 
+			#	screen -X register g $'quit\n'
+			#       	screen -X -p 0 paste g  # quit gdb
+			#	screen -X -p 2 paste g  # quit mconsole
+			#) &
 			screen -X "select" 1		# switch to console
 			# wait for vc/2 to show up
 			i=true
 			while [[ -e ~/.uml/$NAME/pid ]] ; do
-				[[ -e screenlog.3 ]] && break
-				[[ -e screenlog.2 ]] && $i &&
+				[[ -e screenlog.5 ]] && break
+				[[ -e screenlog.4 ]] && $i &&
 					i=false && screen -X "select" 1
 				[[ $SECONDS -ge 30 ]] && break
 				sleep 1
@@ -120,11 +128,12 @@
 			# screen -X detach
 		else
 			screen -X register g $'quit\n'
-			screen -X -p gdb paste g
+			screen -X -p 0 paste g  # quit gdb
 			screen -X -p 2 paste g  # quit mconsole
 		fi
 	) &
 
+	sleep 1
 	screen -r "$SPID.$NAME" -p =
 
 	#i=${NAME##*[^0-9]}
@@ -146,7 +155,7 @@
 # where you reattach, thats your business!
 #
 
-# in case this runs withing ksonsole or xterm: set tab and window title
+# in case this runs within konsole or xterm: set tab and window title
 # konsole tab title:
 echo -n $'\e]30;'$UML_WHICH$'\a'
 # window title