Note: "permalinks" may not be as permanent as we would like,
direct links of old sources may well be a few messages off.
Hi,
I'm doing tests on a new DRBD setup, so I'm hammering the DRBD system
with reads and writes (3 VMs writing with dd and three VMs reading with
dd). The test max's out my 2x1GigE bonded links (both data and sync)
and max's out my hard drives (5 7200 RPM SATA, RAID6). I share the drbd
disks to Proxmox (KVM based) via NFS v3.
1. I tested the system all night, and both DRBD servers handled
everything fine.
2. I reboot the primary
3. failover of the IP and NFS worked, and secondary became primary.
4. reboot server came back up, and entered slit-brain.
I use uCarp for the failover instead of heartbeat/pacemaker.
I've used iSCSI over DRBD/heartbeat before, but not NFS. Any ideas why
I hit split brain?
Gerald
drbd.conf
# cat /etc/drbd.conf
# You can find an example in /usr/share/doc/drbd.../drbd.conf.example
include "drbd.d/global_common.conf";
# include "drbd.d/*.res";
resource target.0 {
protocol C;
handlers {
pri-on-incon-degr "echo o > /proc/sysrq-trigger ; halt -f";
pri-lost-after-sb "echo o > /proc/sysrq-trigger ; halt -f";
local-io-error "echo o > /proc/sysrq-trigger ; halt -f";
outdate-peer "/usr/lib/heartbeat/drbd-peer-outdater -t 5";
before-resync-target /usr/local/bin/resync-start-RAID6.sh;
after-resync-target /usr/local/bin/resync-end-RAID6.sh;
}
startup {
degr-wfc-timeout 120;
}
disk {
on-io-error detach;
}
net {
cram-hmac-alg sha1;
shared-secret "password";
after-sb-0pri disconnect;
after-sb-1pri disconnect;
after-sb-2pri disconnect;
rr-conflict disconnect;
sndbuf-size 0;
}
syncer {
c-plan-ahead 0;
rate 30M;
verify-alg sha1;
# al-extents 257;
al-extents 3389;
}
on iscsi-filer-1 {
device /dev/drbd0;
disk /dev/md0;
address 192.168.10.1:7789;
flexible-meta-disk /dev/md3;
}
on iscsi-filer-2 {
device /dev/drbd0;
disk /dev/md0;
address 192.168.10.2:7789;
flexible-meta-disk /dev/md3;
}
}
resource target.2 {
protocol C;
handlers {
pri-on-incon-degr "echo o > /proc/sysrq-trigger ; halt -f";
pri-lost-after-sb "echo o > /proc/sysrq-trigger ; halt -f";
local-io-error "echo o > /proc/sysrq-trigger ; halt -f";
outdate-peer "/usr/lib/heartbeat/drbd-peer-outdater -t 5";
before-resync-target /usr/local/bin/resync-start-RAID5.sh;
after-resync-target /usr/local/bin/resync-end-RAID5.sh;
}
startup {
degr-wfc-timeout 120;
}
disk {
on-io-error detach;
}
net {
cram-hmac-alg sha1;
shared-secret "password";
after-sb-0pri disconnect;
after-sb-1pri disconnect;
after-sb-2pri disconnect;
rr-conflict disconnect;
sndbuf-size 0;
}
syncer {
c-plan-ahead 0;
rate 30M;
verify-alg sha1;
# al-extents 257;
al-extents 3389;
}
on iscsi-filer-1 {
device /dev/drbd2;
disk /dev/md2;
address 192.168.10.1:7790;
flexible-meta-disk /dev/md4;
}
on iscsi-filer-2 {
device /dev/drbd2;
disk /dev/md2;
address 192.168.10.2:7790;
flexible-meta-disk /dev/md4;
}
}
ucarp-up
#!/bin/sh
/sbin/drbdadm primary all
/sbin/ifup $1:ucarp
/sbin/drbdadm primary all
/sbin/drbdadm primary all
/sbin/drbdadm primary all
mount -o defaults,noatime,nodiratime /dev/drbd0 /nfs-exported/raid6
mount -o defaults,noatime,nodiratime /dev/drbd2 /nfs-exported/raid5
/etc/init.d/nfs-kernel-server restart
sleep 2
echo 256 > /proc/fs/nfsd/threads
ucarp-down
#!/bin/sh
/etc/init.d/nfs-kernel-server stop
umount /nfs-exported/raid6
umount /nfs-exported/raid5
/sbin/drbdadm secondary all
/sbin/ifdown $1:ucarp
--
Gerald Brandt
Majentis Technologies
gbr at majentis.com
204-229-6595
www.majentis.com
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.linbit.com/pipermail/drbd-user/attachments/20140228/49bf75cb/attachment.htm>