Note: "permalinks" may not be as permanent as we would like,
direct links of old sources may well be a few messages off.
Hi All:
I set a clsuter based on drbd and do the following test:
On Node A, I set drbd as Primary, and run a mysql database on it
and some load. and run the following script
while [ 0 ] ; do
drbdadm connect drbd0 >/dev/null 2>&1
sleep 10;
done
On node B, I do the follwing test to simulate the failover action
after node A crash
while [ 0 ] ; do
drbdadm disconnect drbd0
drbdadm primary drbd0
mount & start mysql
sleep 1
stop mysql & umount
drbdadm secondary drbd0
drbdadm -- --discard-my-data connect drbd0
wait while sync complete
sleep 1;
done
and after server loops, drbd state on A become Unconnected and not
changed any more
when I run drbdadm command such as drbdadm disconnect drbd0, it will
be blocked for a while and the followin kernel message print:
INFO: task cqueue:2423 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
cqueue D 0000000000000000 0 2423 2 0x00000080
ffff880037ad7a20 0000000000000046 0000000000000000 ffff880000000000
ffff880037ad7980 ffff880037ad7980 0000000000000000 0000000000000000
ffff8800374f3038 ffff880037ad7fd8 000000000000f4e8 ffff8800374f3038
Call Trace:
[<ffffffffa0651ec5>] _drbd_request_state+0xb5/0x130 [drbd]
[<ffffffff81091570>] ? autoremove_wake_function+0x0/0x40
[<ffffffffa06582f6>] drbd_nl_disconnect+0x176/0x5c0 [drbd]
[<ffffffff81064d74>] ? enqueue_task_fair+0x64/0x100
[<ffffffff81059507>] ? enqueue_task+0x77/0x90
[<ffffffff8105f1da>] ? try_to_wake_up+0xca/0x420
[<ffffffff81271849>] ? cpumask_next_and+0x29/0x50
[<ffffffff81054754>] ? find_busiest_group+0x244/0xb20
[<ffffffff81190396>] ? pollwake+0x56/0x60
[<ffffffff81055c83>] ? perf_event_task_sched_out+0x33/0x80
[<ffffffff810096f0>] ? __switch_to+0xd0/0x320
[<ffffffff814f6085>] ? thread_return+0x53/0x7be
[<ffffffff8116400c>] ? __kmalloc+0x20c/0x220
[<ffffffff8133f441>] ? cn_queue_wrapper+0x31/0x50
[<ffffffffa065965f>] drbd_connector_callback+0x13f/0x2b0 [drbd]
[<ffffffff8133f410>] ? cn_queue_wrapper+0x0/0x50
[<ffffffff8133f438>] cn_queue_wrapper+0x28/0x50
[<ffffffff8108bb90>] worker_thread+0x170/0x2b0
[<ffffffff81091570>] ? autoremove_wake_function+0x0/0x40
[<ffffffff8108ba20>] ? worker_thread+0x0/0x2b0
[<ffffffff81091206>] kthread+0x96/0xa0
[<ffffffff8100c14a>] child_rip+0xa/0x20
[<ffffffff81091170>] ? kthread+0x0/0xa0
[<ffffffff8100c140>] ? child_rip+0x0/0x20
# drbdadm --version
DRBDADM_BUILDTAG=GIT-hash:\ bbf851ee755a878a495cfd93e1a76bf90dc79442\
Makefile.in\ build\ by\ local at myhost.com\,\ 2012-06-07\ 16:03:04
DRBDADM_API_VERSION=88
DRBD_KERNEL_VERSION_CODE=0x08030d
DRBDADM_VERSION_CODE=0x08030d
DRBDADM_VERSION=8.3.13
# uname -a
Linux nodeb 2.6.32-220.el6.x86_64 #1 SMP Thu May 31 22:47:14 EDT 2012
x86_64 x86_64 x86_64 GNU/Linux
# drbdadm dump drbd0
resource drbd0 {
protocol A;
on nodeb {
device /dev/drbd0 minor 0;
disk /dev/vg_node/oracle_drbd;
address ipv4 192.168.8.84:7700;
meta-disk internal;
}
}
on node {
device /dev/drbd0 minor 0;
disk /dev/vg_node/oracle_drbd;
address ipv4 192.168.8.91:7700;
meta-disk internal;
}
net {
ping-timeout 30;
ping-int 30;
data-integrity-alg crc32c;
ko-count 6;
max-epoch-size 20000;
max-buffers 32000;
unplug-watermark 16;
}
disk {
on-io-error pass_on;
}
syncer {
rate 800M;
csums-alg crc32c;
al-extents 3833;
}
handlers {
before-resync-target
"/usr/lib/drbd/snapshot-resync-target-lvm.sh -p 5 -- -c 16k";
after-resync-target /usr/lib/drbd/unsnapshot-resync-target-lvm.sh;
}
}