[DRBD-cvs] r1452 - in trunk/testing/CTH: . LGE_CTH
drbd-user@lists.linbit.com
drbd-user@lists.linbit.com
Fri, 23 Jul 2004 16:38:55 +0200 (CEST)
Author: lars
Date: 2004-07-23 16:38:54 +0200 (Fri, 23 Jul 2004)
New Revision: 1452
Modified:
trunk/testing/CTH/CTH_bash.helpers
trunk/testing/CTH/CTH_bash.sh
trunk/testing/CTH/LGE_CTH.pm
trunk/testing/CTH/LGE_CTH/DRBD_Resource.pm
trunk/testing/CTH/LGE_CTH/Node.pm
trunk/testing/CTH/T-007.sh
trunk/testing/CTH/functions.sh
trunk/testing/CTH/uml-minna.conf
Log:
- allow to skip the initial sync
- allow resources to be moved while resync is running
(still needs some work)
- in case internal signal magic gets confused, trigger an extra sigchld
(ugly; needs work)
- integrate new wbtest options (wbtest was rewritten and committed yesterday)
- fix several issues with bash errexit option (set -e)
I should not rely too much on these internal semantics, it keeps biting me :-/
Modified: trunk/testing/CTH/CTH_bash.helpers
===================================================================
--- trunk/testing/CTH/CTH_bash.helpers 2004-07-23 14:18:42 UTC (rev 1451)
+++ trunk/testing/CTH/CTH_bash.helpers 2004-07-23 14:38:54 UTC (rev 1452)
@@ -235,7 +235,7 @@
blocks=$( grep ^DEVICE_SIZE < md5sum-${!right}-$name | cut -f 2 )
echo "NOTE: internal meta data may differ..."
echo "md probably starts at blocknr $[blocks-(128<<8)]"
- diff -u md5sum-${!left}-$name md5sum-${!right}-$name || true
+ diff -u md5sum-${!left}-$name md5sum-${!right}-$name
}
dd_single_block()
@@ -306,8 +306,7 @@
# initial boot
boot_and_setup_nodes()
-{ (
- set -e
+{
initial=true
[[ ${!Drbd_*} ]] && have_drbd=true || have_drbd=false # FIXME should be node specific...
for n in ${!Node_*}; do
@@ -350,7 +349,7 @@
bdev_to_env $BDEV
if [[ $BDEV == Drbd_* ]] ; then
n=$left
- on ${!n}: drbdadm_pri force="-- -d" name=$name
+ on ${!n}: drbdadm_pri force="-- --do-what-I-say" name=$name
else
n=Node_1
fi
@@ -365,8 +364,15 @@
on ${!n}: drbd_wait_sync DEV=/dev/$DRBD_DEVNAME$minor
fi
done
-) }
+}
+on_all_nodes()
+{
+ for n in ${!Node_*} ; do
+ on ${!n}: "$@"
+ done
+}
+
Start()
{
rs=$1 nn=$2
Modified: trunk/testing/CTH/CTH_bash.sh
===================================================================
--- trunk/testing/CTH/CTH_bash.sh 2004-07-23 14:18:42 UTC (rev 1451)
+++ trunk/testing/CTH/CTH_bash.sh 2004-07-23 14:38:54 UTC (rev 1452)
@@ -40,7 +40,9 @@
__I_MEAN_IT__=__YES__
source ./functions.sh || return
- boot_and_setup_nodes || return
+ set +e
+ ( set -e; boot_and_setup_nodes )
+ err=$?; [[ $err == 0 ]] || return $err
cat <<-___
#
@@ -51,20 +53,23 @@
trap 'ex=$?; echo "exit_code: $ex"' ERR # show exit codes != 0
if [[ -e $CASE ]] ; then
echo "now run CASE=$CASE"
- ( set -e; source $CASE ) || return
+ on_all_nodes to_syslog MSG="now run CASE=$CASE"
+ ( set -e; source $CASE )
+ err=$?; [[ $err == 0 ]] || return $err
fi
return
}
-if Run; then
+Run; err=$?
+if [[ $err == 0 ]]; then
cat <<-___
#--- $CASE ----
# PASSED
#-----------------
___
else
- echo "something went wrong. exit_code: $?"
+ echo "something went wrong. exit_code: $err"
fi
if $INTERACTIVE ; then
Modified: trunk/testing/CTH/LGE_CTH/DRBD_Resource.pm
===================================================================
--- trunk/testing/CTH/LGE_CTH/DRBD_Resource.pm 2004-07-23 14:18:42 UTC (rev 1451)
+++ trunk/testing/CTH/LGE_CTH/DRBD_Resource.pm 2004-07-23 14:38:54 UTC (rev 1452)
@@ -45,14 +45,14 @@
$cmd = "on $ip: drbd_wait_sync DEV=/dev/$DRBD_DEVNAME$minor";
$node->{_busy} = "wait_sync" unless $node->{_busy};
$node->{_busy} .= " $name ";
- $LGE_CTH::FAILED += 1000;
+ $LGE_CTH::FAILED += 0x1000;
_spawn( "wait_sync $name on $hostname after $event", $cmd,
sub {
my $ex = $_[0];
$node->say("wait_sync $name on $hostname done: $ex");
$node->{_busy} =~ s/ $name / /;
$node->{_busy} = "" if $node->{_busy} =~ /^wait_sync\s*$/;
- $LGE_CTH::FAILED -= 1000;
+ $LGE_CTH::FAILED -= 0x1000;
# TODO update state and so on.
}
);
@@ -68,8 +68,10 @@
_spawn("$me->{_id}: initial_setup on $node->{_config}->{hostname}",
sub {
- my $cmd = ". ./functions.sh; on $node->{_config}->{admin_ip}: drbd_append_config "
- . "RES=$mr->{_config}->{name} LO_DEV=$disk->{_config}->{dev} NAME=$disk->{_config}->{name}";
+ my $cmd = ". ./functions.sh; on $node->{_config}->{admin_ip}: drbd_append_config"
+ . " RES=$mr->{_config}->{name} LO_DEV=$disk->{_config}->{dev}"
+ . " NAME=$disk->{_config}->{name}"
+ . " START_CLEAN=$::DRBD_SKIP_INITIAL_SYNC";
open (DRBD_CONF,"|$cmd")
or die "$cmd $node->{_id}:drbd.conf: $!";
print DRBD_CONF $mr->as_conf_string
@@ -274,7 +276,7 @@
_spawn("$me->{_id}: wait for $hostname to recognize ... ",$cmd,'SYNC');
$cmd = "on $ip: drbdadm_pri name=$name";
- $cmd .= ' "force=-- -d"' if $force;
+ $cmd .= ' "force=-- --do-what-I-say"' if $force;
_spawn("$me->{_id}: Primary $name on $node->{_config}->{hostname}",$cmd,'SYNC');
if ($force) {
for my $i (@{$c->{_instances}}) {
Modified: trunk/testing/CTH/LGE_CTH/Node.pm
===================================================================
--- trunk/testing/CTH/LGE_CTH/Node.pm 2004-07-23 14:18:42 UTC (rev 1451)
+++ trunk/testing/CTH/LGE_CTH/Node.pm 2004-07-23 14:38:54 UTC (rev 1452)
@@ -98,7 +98,7 @@
$me->say("unexpected crash of $me->{_config}->{hostname}!\n")
unless $me->{_busy} eq "fail";
$me->status("down","fail","done");
- $LGE_CTH::FAILED -= 499;
+ $LGE_CTH::FAILED -= 0x800;
$me->wait_for_boot;
} else {
$me->status("shutdown","END_OF_TEST","done");
@@ -153,7 +153,7 @@
$me->say("no point in crashing me, I'm down!"), return
if $me->{_status}->{status} ne "up";
- $LGE_CTH::FAILED += 500;
+ $LGE_CTH::FAILED += 0x801;
$me->{_busy} = "fail";
$me->{_status}->{status} = "crashing"; # don't propagate yet, failing heartbeat will propagate it
Log("FAIL $me->{_id}");
Modified: trunk/testing/CTH/LGE_CTH.pm
===================================================================
--- trunk/testing/CTH/LGE_CTH.pm 2004-07-23 14:18:42 UTC (rev 1451)
+++ trunk/testing/CTH/LGE_CTH.pm 2004-07-23 14:38:54 UTC (rev 1452)
@@ -49,6 +49,10 @@
# = "drbd";
# = "drbd/";
+# set this to something ne '' to skip the initial full sync
+# NOTE: "false" is also true. only "" is false :-)
+$::DRBD_SKIP_INITIAL_SYNC="";
+
##
## private
##
@@ -226,10 +230,14 @@
warn("\n.\n#\n#\tINITIAL SETUP\n#\n");
$CRM->start_all;
- wait_for_pending_events;
+ # wait_for_pending_events;
+ kill 'CHLD' => $$;
+ # effect: trigger an other SIGCHLD...
+ # this whole signal magic is a little bit messy :(
+
warn("\n.\n#\n#\tENTER MAINLOOP\n#\n");
- warn "Failed: $FAILED\n";
+ warn("Failed: $FAILED\n") if $::LGE_IS_DEBUGGING;
my ($part,$event,$lasttime,@obj);
@@ -257,10 +265,13 @@
last if $pending <= 0;
$did_something = 0;
- @obj = active_objects;
- if ($FAILED == 0 and int(rand(100)) < $move_resource_prob) {
+ # wait_sync and wait_for_boot increase $FAILED by 0x800 or 0x1000,
+ # so if the hardware is up, we still may move resources.
+ if ( ($FAILED & 0xff) == 0 and
+ int(rand(100)) < $move_resource_prob ) {
$part = choose(@Resource);
} else {
+ @obj = active_objects;
$part = choose(@obj);
}
if ($part and $part->isa('LGE_CTH::Resource')) {
@@ -269,7 +280,9 @@
$cn = $part->{_current_node};
if ($cn) {
$nn = ( sort { $a->{_resources} <=> $b->{_resources} }
- grep { $_ != $cn and not $_->{_busy} } @LGE_CTH::Node )[0];
+ grep { $_ != $cn and $_->{_busy} =~ /^$|^wait_sync/ }
+ @LGE_CTH::Node
+ )[0];
$part->relocate_to($nn) if $nn;
$did_something = 1;
$last_time = time;
@@ -285,6 +298,9 @@
}
} else {
print STDERR "cannot do anything, FAILED=$FAILED\n";
+ kill 'CHLD' => $$;
+ # effect: trigger an other SIGCHLD...
+ # this whole signal magic is a little bit messy :(
}
}
@@ -377,11 +393,13 @@
: $ex >> 8;
}
+ # print "exit with $ex from: $script";
+
# make sure the logging sub process has flushed and
# exited, too ...
close STDERR;
close STDOUT;
-
+
exit $ex;
}
die "NOT REACHED. ??";
Modified: trunk/testing/CTH/T-007.sh
===================================================================
--- trunk/testing/CTH/T-007.sh 2004-07-23 14:18:42 UTC (rev 1451)
+++ trunk/testing/CTH/T-007.sh 2004-07-23 14:38:54 UTC (rev 1452)
@@ -12,6 +12,26 @@
sleeptime=30
+# FIXME incorporate properly into Drbd_MD5_diff in CTH_bash.helpers
+Compare()
+{
+ out=md5sum.r0.diff.$iter
+ echo "==> compare checksums of lower level devices"
+ Drbd_MD5_diff Drbd_1 > $out || true
+ last_line=$(sed -ne 's/^md probably starts at blocknr //p;2q' < $out)
+ first_chunk=$(sed -ne '1,4d;s/^@@ -\([0-9]\+\),.*/\1/p;5q' < $out)
+ if (( first_chunk > last_line )) ; then
+ echo "no block differences in data section."
+ else
+ echo -n "number of block differences in data section: "
+ if sed -e "1,4d;/^. *$last_line\t/q" $out | grep -c "^[+-]" ; then
+ echo "oops. stopping here."
+ echo "you want to have a look at '$out' yourself"
+ exit 1
+ fi
+ fi
+}
+
# start it.
Start RS_1 Node_1
sleep 10
@@ -44,7 +64,7 @@
Stop RS_1
if (( iter % 10 == 0 )) ; then
- Drbd_MD5_diff Drbd_1 > md5sum.r0.diff.$iter
+ Compare
fi
Start RS_1 Node_1
Modified: trunk/testing/CTH/functions.sh
===================================================================
--- trunk/testing/CTH/functions.sh 2004-07-23 14:18:42 UTC (rev 1451)
+++ trunk/testing/CTH/functions.sh 2004-07-23 14:38:54 UTC (rev 1452)
@@ -42,7 +42,7 @@
# wbtest {{{2
########################
-wbtest_start()
+wbtest_old_start()
{
: ${MNT:?unknown mount point}
WBTLOG=~/wbtest.log
@@ -63,6 +63,21 @@
wbtest -p 0 -c 5 -m 16384 -M 102400 -s checkpoint -t data -l $WBTLOG
}
+wbtest_start()
+{
+ : ${MNT:?unknown mount point}
+ WBTLOG=~/wbtest.log
+ cd $MNT
+ mkdir -p data
+ echo RESTART >> $WBTLOG
+ date >> $WBTLOG
+
+ wbtest -d data -l $WBTLOG -V
+# CHANGE, but be aware that -c 20,
+# and two resources, you will have a load of ~40 :)
+ wbtest -v -p 0 -c 5 -m 16384 -M 102400 -d data -l $WBTLOG > /dev/null
+}
+
#
# dummy {{{2
########################
@@ -93,12 +108,21 @@
PATH=/root/bin:/usr/bin:/bin:/usr/sbin:/sbin
$(printf '%q\n' "${@:3}")
"
+ local err
: ${host:?unknown host}
: ${cmd:?no command}
# printf "%s " ssh -2 -4 -o BatchMode=yes -o KeepAlive=yes -xl root $host -- "$env$cmd; $2"
ssh -2 -4 -o BatchMode=yes -o KeepAlive=yes -xl root $host -- "$env$cmd; $2"
+ err=$?
+ [[ $err == 0 ]] || echo "$2: $err"
+ return $err
}
+to_syslog()
+{
+ logger "$MSG"
+}
+
#
# Node {{{2
########################
@@ -109,7 +133,7 @@
: ${DRBD_MAJOR:?missing DRBD_MAJOR}
: ${DRBD_DEVNAME:?missing DRBD_DEVNAME}
: ${MINOR_COUNT:?missing MINOR_COUNT}
- [[ `uname -n` == $hostname ]]
+ test `uname -n` = $hostname
if [ -e /proc/drbd ] ; then
for d in `grep -o "^/dev/$DRBD_DEVNAME[^ ]\+" /proc/mounts` ; do
fuser -vmk $d || true
@@ -298,7 +322,7 @@
RSIZE=$(fdisk -s /dev/mapper/$NAME)
USIZE=${USIZE:+$[USIZE+128*1024]}
: ${USIZE:=$RSIZE}
- (( USIZE <= RSIZE )) # assert USIZE <= RSIZE
+ (( USIZE <= RSIZE )) || return 1 # assert USIZE <= RSIZE
let "MLOC=(USIZE & ~3) -128*1024"
if $START_CLEAN ; then
perl -e 'print pack "N8", 0,0, 1,1,1,1,1, 0x8374026a;
Modified: trunk/testing/CTH/uml-minna.conf
===================================================================
--- trunk/testing/CTH/uml-minna.conf 2004-07-23 14:18:42 UTC (rev 1451)
+++ trunk/testing/CTH/uml-minna.conf 2004-07-23 14:38:54 UTC (rev 1452)
@@ -4,6 +4,9 @@
# or at the end of this file
#
package main;
+
+$::DRBD_SKIP_INITIAL_SYNC="true"; # "false" is also true. only "" is false :-)
+
$left = new LGE_CTH::Node {
hostname => 'uml-1',
admin_ip => '192.168.200.1',
@@ -133,4 +136,4 @@
# if you like, override the sync group here to be the same
$r0->{_config}->{syncer}->{group} = 1;
-$r1->{_config}->{syncer}->{group} = 1;
+$r1->{_config}->{syncer}->{group} = 2;