[DRBD-cvs] r1452 - in trunk/testing/CTH: . LGE_CTH

drbd-user@lists.linbit.com drbd-user@lists.linbit.com
Fri, 23 Jul 2004 16:38:55 +0200 (CEST)


Author: lars
Date: 2004-07-23 16:38:54 +0200 (Fri, 23 Jul 2004)
New Revision: 1452

Modified:
   trunk/testing/CTH/CTH_bash.helpers
   trunk/testing/CTH/CTH_bash.sh
   trunk/testing/CTH/LGE_CTH.pm
   trunk/testing/CTH/LGE_CTH/DRBD_Resource.pm
   trunk/testing/CTH/LGE_CTH/Node.pm
   trunk/testing/CTH/T-007.sh
   trunk/testing/CTH/functions.sh
   trunk/testing/CTH/uml-minna.conf
Log:

- allow to skip the initial sync
- allow resources to be moved while resync is running 
  (still needs some work)
- in case internal signal magic gets confused, trigger an extra sigchld
  (ugly; needs work)
- integrate new wbtest options (wbtest was rewritten and committed yesterday)
- fix several issues with bash errexit option (set -e)
  I should not rely too much on these internal semantics, it keeps biting me :-/


Modified: trunk/testing/CTH/CTH_bash.helpers
===================================================================
--- trunk/testing/CTH/CTH_bash.helpers	2004-07-23 14:18:42 UTC (rev 1451)
+++ trunk/testing/CTH/CTH_bash.helpers	2004-07-23 14:38:54 UTC (rev 1452)
@@ -235,7 +235,7 @@
 	blocks=$( grep ^DEVICE_SIZE < md5sum-${!right}-$name | cut -f 2 )
 	echo "NOTE: internal meta data may differ..."
 	echo "md probably starts at blocknr $[blocks-(128<<8)]"
-	diff -u md5sum-${!left}-$name md5sum-${!right}-$name || true
+	diff -u md5sum-${!left}-$name md5sum-${!right}-$name
 }
 
 dd_single_block()
@@ -306,8 +306,7 @@
 
 # initial boot
 boot_and_setup_nodes()
-{ (
-	set -e
+{
 	initial=true
 	[[ ${!Drbd_*} ]] && have_drbd=true || have_drbd=false # FIXME should be node specific...
 	for n in ${!Node_*}; do
@@ -350,7 +349,7 @@
 		bdev_to_env $BDEV
 		if [[ $BDEV == Drbd_* ]] ; then
 			n=$left
-			on ${!n}: drbdadm_pri force="-- -d" name=$name
+			on ${!n}: drbdadm_pri force="-- --do-what-I-say" name=$name
 		else
 			n=Node_1
 		fi
@@ -365,8 +364,15 @@
 			on ${!n}: drbd_wait_sync DEV=/dev/$DRBD_DEVNAME$minor
 		fi
 	done
-) }
+}
 
+on_all_nodes()
+{
+	for n in ${!Node_*} ; do
+		on ${!n}: "$@"
+	done
+}
+
 Start()
 {
 	rs=$1 nn=$2

Modified: trunk/testing/CTH/CTH_bash.sh
===================================================================
--- trunk/testing/CTH/CTH_bash.sh	2004-07-23 14:18:42 UTC (rev 1451)
+++ trunk/testing/CTH/CTH_bash.sh	2004-07-23 14:38:54 UTC (rev 1452)
@@ -40,7 +40,9 @@
 	__I_MEAN_IT__=__YES__
 	source ./functions.sh     || return
 
-	boot_and_setup_nodes      || return
+	set +e
+	( set -e; boot_and_setup_nodes )
+	err=$?; [[ $err == 0 ]]   || return $err
 
 	cat <<-___
 	#
@@ -51,20 +53,23 @@
 	trap 'ex=$?; echo "exit_code: $ex"' ERR # show exit codes != 0
 	if [[ -e $CASE ]] ; then
 		echo "now run CASE=$CASE"
-		( set -e; source $CASE ) || return
+		on_all_nodes to_syslog MSG="now run CASE=$CASE"
+		( set -e; source $CASE )
+		err=$?; [[ $err == 0 ]]   || return $err
 	fi
 
 	return
 }
 
-if Run; then
+Run; err=$?
+if [[ $err == 0 ]]; then
 	cat <<-___
 	#--- $CASE ----
 	#     PASSED
 	#-----------------
 	___
 else
-	echo "something went wrong. exit_code: $?"
+	echo "something went wrong. exit_code: $err"
 fi
 
 if $INTERACTIVE ; then

Modified: trunk/testing/CTH/LGE_CTH/DRBD_Resource.pm
===================================================================
--- trunk/testing/CTH/LGE_CTH/DRBD_Resource.pm	2004-07-23 14:18:42 UTC (rev 1451)
+++ trunk/testing/CTH/LGE_CTH/DRBD_Resource.pm	2004-07-23 14:38:54 UTC (rev 1452)
@@ -45,14 +45,14 @@
 		$cmd = "on $ip: drbd_wait_sync DEV=/dev/$DRBD_DEVNAME$minor";
 		$node->{_busy} = "wait_sync" unless $node->{_busy};
 		$node->{_busy} .= " $name ";
-		$LGE_CTH::FAILED += 1000;
+		$LGE_CTH::FAILED += 0x1000;
 		_spawn( "wait_sync $name on $hostname after $event", $cmd,
 			sub {
 				my $ex = $_[0];
 				$node->say("wait_sync $name on $hostname done: $ex");
 				$node->{_busy} =~ s/ $name / /;
 				$node->{_busy} = ""  if $node->{_busy} =~ /^wait_sync\s*$/;
-				$LGE_CTH::FAILED -= 1000;
+				$LGE_CTH::FAILED -= 0x1000;
 				# TODO update state and so on.
 			}
 		);
@@ -68,8 +68,10 @@
 
 	_spawn("$me->{_id}: initial_setup on $node->{_config}->{hostname}",
 		sub {
-			my $cmd = ". ./functions.sh; on $node->{_config}->{admin_ip}: drbd_append_config "
-			     . "RES=$mr->{_config}->{name} LO_DEV=$disk->{_config}->{dev} NAME=$disk->{_config}->{name}";
+			my $cmd = ". ./functions.sh; on $node->{_config}->{admin_ip}: drbd_append_config"
+			     . " RES=$mr->{_config}->{name} LO_DEV=$disk->{_config}->{dev}"
+			     . " NAME=$disk->{_config}->{name}"
+			     . " START_CLEAN=$::DRBD_SKIP_INITIAL_SYNC";
 			open (DRBD_CONF,"|$cmd")
 				or die "$cmd $node->{_id}:drbd.conf: $!";
 			print DRBD_CONF $mr->as_conf_string
@@ -274,7 +276,7 @@
 	_spawn("$me->{_id}: wait for $hostname to recognize ... ",$cmd,'SYNC');
 
        	$cmd = "on $ip: drbdadm_pri name=$name";
-        $cmd .=	' "force=-- -d"' if $force;
+        $cmd .=	' "force=-- --do-what-I-say"' if $force;
 	_spawn("$me->{_id}: Primary $name on $node->{_config}->{hostname}",$cmd,'SYNC');
 	if ($force) {
 		for my $i (@{$c->{_instances}}) {

Modified: trunk/testing/CTH/LGE_CTH/Node.pm
===================================================================
--- trunk/testing/CTH/LGE_CTH/Node.pm	2004-07-23 14:18:42 UTC (rev 1451)
+++ trunk/testing/CTH/LGE_CTH/Node.pm	2004-07-23 14:38:54 UTC (rev 1452)
@@ -98,7 +98,7 @@
 				$me->say("unexpected crash of $me->{_config}->{hostname}!\n")
 					unless $me->{_busy} eq "fail";
 				$me->status("down","fail","done");
-				$LGE_CTH::FAILED -= 499;
+				$LGE_CTH::FAILED -= 0x800;
 				$me->wait_for_boot;
 			} else {
 				$me->status("shutdown","END_OF_TEST","done");
@@ -153,7 +153,7 @@
 	$me->say("no point in crashing me, I'm down!"), return
 		if $me->{_status}->{status} ne "up";
 
-	$LGE_CTH::FAILED += 500;
+	$LGE_CTH::FAILED += 0x801;
 	$me->{_busy} = "fail";
 	$me->{_status}->{status} = "crashing"; # don't propagate yet, failing heartbeat will propagate it
 	Log("FAIL $me->{_id}");

Modified: trunk/testing/CTH/LGE_CTH.pm
===================================================================
--- trunk/testing/CTH/LGE_CTH.pm	2004-07-23 14:18:42 UTC (rev 1451)
+++ trunk/testing/CTH/LGE_CTH.pm	2004-07-23 14:38:54 UTC (rev 1452)
@@ -49,6 +49,10 @@
 		# = "drbd";
 		# = "drbd/";
 
+# set this to something ne '' to skip the initial full sync
+# NOTE: "false" is also true. only "" is false :-)
+$::DRBD_SKIP_INITIAL_SYNC="";
+
 ##
 ## private
 ##
@@ -226,10 +230,14 @@
 	warn("\n.\n#\n#\tINITIAL SETUP\n#\n");
 
 	$CRM->start_all;
-	wait_for_pending_events;
+	# wait_for_pending_events;
 
+	kill 'CHLD' => $$;
+	# effect: trigger an other SIGCHLD...
+	# this whole signal magic is a little bit messy :(
+
 	warn("\n.\n#\n#\tENTER MAINLOOP\n#\n");
-	warn "Failed: $FAILED\n";
+	warn("Failed: $FAILED\n") if $::LGE_IS_DEBUGGING;
 
 
 	my ($part,$event,$lasttime,@obj);
@@ -257,10 +265,13 @@
 		last if $pending <= 0;
 		$did_something = 0;
 
-		@obj = active_objects;
-		if ($FAILED == 0 and int(rand(100)) < $move_resource_prob) {
+		# wait_sync and wait_for_boot increase $FAILED by 0x800 or 0x1000,
+		# so if the hardware is up, we still may move resources.
+		if ( ($FAILED & 0xff) == 0 and
+		     int(rand(100)) < $move_resource_prob ) {
 			$part = choose(@Resource);
 		} else {
+			@obj = active_objects;
 			$part = choose(@obj);
 		}
 		if ($part and $part->isa('LGE_CTH::Resource')) {
@@ -269,7 +280,9 @@
 			$cn = $part->{_current_node};
 			if ($cn) {
 				$nn = ( sort { $a->{_resources} <=> $b->{_resources} } 
-				        grep { $_ != $cn and not $_->{_busy} } @LGE_CTH::Node )[0];
+				        grep { $_ != $cn and $_->{_busy} =~ /^$|^wait_sync/ }
+					@LGE_CTH::Node
+				)[0];
 				$part->relocate_to($nn) if $nn;
 				$did_something = 1;
 				$last_time = time;
@@ -285,6 +298,9 @@
 			}
 		} else {
 			print STDERR "cannot do anything, FAILED=$FAILED\n";
+			kill 'CHLD' => $$;
+			# effect: trigger an other SIGCHLD...
+			# this whole signal magic is a little bit messy :(
 		}
 	}
 
@@ -377,11 +393,13 @@
 				: $ex >> 8;
 		}
 
+		# print "exit with $ex from: $script";
+
 		# make sure the logging sub process has flushed and
 		# exited, too ...
 		close STDERR;
 		close STDOUT;
-		
+
 		exit $ex;
 	}
 	die "NOT REACHED. ??";

Modified: trunk/testing/CTH/T-007.sh
===================================================================
--- trunk/testing/CTH/T-007.sh	2004-07-23 14:18:42 UTC (rev 1451)
+++ trunk/testing/CTH/T-007.sh	2004-07-23 14:38:54 UTC (rev 1452)
@@ -12,6 +12,26 @@
 
 sleeptime=30
 
+# FIXME incorporate properly into Drbd_MD5_diff in CTH_bash.helpers
+Compare()
+{
+	out=md5sum.r0.diff.$iter
+	echo "==> compare checksums of lower level devices"
+	Drbd_MD5_diff Drbd_1 > $out || true
+	last_line=$(sed -ne 's/^md probably starts at blocknr //p;2q' < $out)
+	first_chunk=$(sed -ne '1,4d;s/^@@ -\([0-9]\+\),.*/\1/p;5q' < $out)
+	if (( first_chunk > last_line )) ; then
+		echo "no block differences in data section."
+	else
+		echo -n "number of block differences in data section: "
+		if sed -e "1,4d;/^. *$last_line\t/q" $out | grep -c "^[+-]" ; then
+			echo "oops. stopping here."
+			echo "you want to have a look at '$out' yourself"
+			exit 1
+		fi
+	fi
+}
+
 # start it.
 Start RS_1 Node_1
 sleep 10
@@ -44,7 +64,7 @@
 
 	Stop RS_1
 	if (( iter % 10 == 0 )) ; then
-		Drbd_MD5_diff Drbd_1 > md5sum.r0.diff.$iter
+		Compare
 	fi
 	Start RS_1 Node_1
 

Modified: trunk/testing/CTH/functions.sh
===================================================================
--- trunk/testing/CTH/functions.sh	2004-07-23 14:18:42 UTC (rev 1451)
+++ trunk/testing/CTH/functions.sh	2004-07-23 14:38:54 UTC (rev 1452)
@@ -42,7 +42,7 @@
 # wbtest            {{{2
 ########################
 
-wbtest_start()
+wbtest_old_start()
 {
 	: ${MNT:?unknown mount point}
 	WBTLOG=~/wbtest.log
@@ -63,6 +63,21 @@
 	wbtest -p 0 -c 5 -m 16384 -M 102400 -s checkpoint -t data -l $WBTLOG
 }
 
+wbtest_start()
+{
+	: ${MNT:?unknown mount point}
+	WBTLOG=~/wbtest.log
+	cd $MNT
+	mkdir -p data
+	echo RESTART >> $WBTLOG
+	date >> $WBTLOG
+
+	wbtest -d data -l $WBTLOG -V
+# CHANGE, but be aware that -c 20,
+# and two resources, you will have a load of ~40 :)
+	wbtest -v -p 0 -c 5 -m 16384 -M 102400 -d data -l $WBTLOG > /dev/null
+}
+
 #
 # dummy            {{{2
 ########################
@@ -93,12 +108,21 @@
 PATH=/root/bin:/usr/bin:/bin:/usr/sbin:/sbin
 $(printf '%q\n' "${@:3}")
 "
+	local err
 	: ${host:?unknown host}
 	: ${cmd:?no command}
 	# printf "%s " ssh -2 -4 -o BatchMode=yes -o KeepAlive=yes -xl root $host -- "$env$cmd; $2"
 	ssh -2 -4 -o BatchMode=yes -o KeepAlive=yes -xl root $host -- "$env$cmd; $2"
+	err=$?
+	[[ $err == 0 ]] || echo "$2: $err"
+	return $err
 }
 
+to_syslog()
+{
+	logger "$MSG"
+}
+
 #
 # Node              {{{2
 ########################
@@ -109,7 +133,7 @@
 	: ${DRBD_MAJOR:?missing DRBD_MAJOR}
 	: ${DRBD_DEVNAME:?missing DRBD_DEVNAME}
 	: ${MINOR_COUNT:?missing MINOR_COUNT}
-	[[ `uname -n` == $hostname ]]
+	test `uname -n` = $hostname
 	if [ -e /proc/drbd ] ; then
 		for d in `grep -o "^/dev/$DRBD_DEVNAME[^ ]\+" /proc/mounts` ; do
 			fuser -vmk $d || true
@@ -298,7 +322,7 @@
 	RSIZE=$(fdisk -s /dev/mapper/$NAME)
 	USIZE=${USIZE:+$[USIZE+128*1024]}
 	: ${USIZE:=$RSIZE}
-	(( USIZE <= RSIZE )) # assert USIZE <= RSIZE
+	(( USIZE <= RSIZE )) || return 1 # assert USIZE <= RSIZE
 	let "MLOC=(USIZE & ~3) -128*1024"
 	if $START_CLEAN ; then
 		perl -e 'print pack "N8", 0,0, 1,1,1,1,1, 0x8374026a;

Modified: trunk/testing/CTH/uml-minna.conf
===================================================================
--- trunk/testing/CTH/uml-minna.conf	2004-07-23 14:18:42 UTC (rev 1451)
+++ trunk/testing/CTH/uml-minna.conf	2004-07-23 14:38:54 UTC (rev 1452)
@@ -4,6 +4,9 @@
 # or at the end of this file
 # 
 package main;
+
+$::DRBD_SKIP_INITIAL_SYNC="true"; # "false" is also true. only "" is false :-)
+
 $left = new LGE_CTH::Node {
 	hostname  => 'uml-1',
 	admin_ip  => '192.168.200.1',
@@ -133,4 +136,4 @@
 
 # if you like, override the sync group here to be the same
 $r0->{_config}->{syncer}->{group} = 1;
-$r1->{_config}->{syncer}->{group} = 1;
+$r1->{_config}->{syncer}->{group} = 2;