Note: "permalinks" may not be as permanent as we would like,
direct links of old sources may well be a few messages off.
Hi, I'm doing tests on a new DRBD setup, so I'm hammering the DRBD system with reads and writes (3 VMs writing with dd and three VMs reading with dd). The test max's out my 2x1GigE bonded links (both data and sync) and max's out my hard drives (5 7200 RPM SATA, RAID6). I share the drbd disks to Proxmox (KVM based) via NFS v3. 1. I tested the system all night, and both DRBD servers handled everything fine. 2. I reboot the primary 3. failover of the IP and NFS worked, and secondary became primary. 4. reboot server came back up, and entered slit-brain. I use uCarp for the failover instead of heartbeat/pacemaker. I've used iSCSI over DRBD/heartbeat before, but not NFS. Any ideas why I hit split brain? Gerald drbd.conf # cat /etc/drbd.conf # You can find an example in /usr/share/doc/drbd.../drbd.conf.example include "drbd.d/global_common.conf"; # include "drbd.d/*.res"; resource target.0 { protocol C; handlers { pri-on-incon-degr "echo o > /proc/sysrq-trigger ; halt -f"; pri-lost-after-sb "echo o > /proc/sysrq-trigger ; halt -f"; local-io-error "echo o > /proc/sysrq-trigger ; halt -f"; outdate-peer "/usr/lib/heartbeat/drbd-peer-outdater -t 5"; before-resync-target /usr/local/bin/resync-start-RAID6.sh; after-resync-target /usr/local/bin/resync-end-RAID6.sh; } startup { degr-wfc-timeout 120; } disk { on-io-error detach; } net { cram-hmac-alg sha1; shared-secret "password"; after-sb-0pri disconnect; after-sb-1pri disconnect; after-sb-2pri disconnect; rr-conflict disconnect; sndbuf-size 0; } syncer { c-plan-ahead 0; rate 30M; verify-alg sha1; # al-extents 257; al-extents 3389; } on iscsi-filer-1 { device /dev/drbd0; disk /dev/md0; address 192.168.10.1:7789; flexible-meta-disk /dev/md3; } on iscsi-filer-2 { device /dev/drbd0; disk /dev/md0; address 192.168.10.2:7789; flexible-meta-disk /dev/md3; } } resource target.2 { protocol C; handlers { pri-on-incon-degr "echo o > /proc/sysrq-trigger ; halt -f"; pri-lost-after-sb "echo o > /proc/sysrq-trigger ; halt -f"; local-io-error "echo o > /proc/sysrq-trigger ; halt -f"; outdate-peer "/usr/lib/heartbeat/drbd-peer-outdater -t 5"; before-resync-target /usr/local/bin/resync-start-RAID5.sh; after-resync-target /usr/local/bin/resync-end-RAID5.sh; } startup { degr-wfc-timeout 120; } disk { on-io-error detach; } net { cram-hmac-alg sha1; shared-secret "password"; after-sb-0pri disconnect; after-sb-1pri disconnect; after-sb-2pri disconnect; rr-conflict disconnect; sndbuf-size 0; } syncer { c-plan-ahead 0; rate 30M; verify-alg sha1; # al-extents 257; al-extents 3389; } on iscsi-filer-1 { device /dev/drbd2; disk /dev/md2; address 192.168.10.1:7790; flexible-meta-disk /dev/md4; } on iscsi-filer-2 { device /dev/drbd2; disk /dev/md2; address 192.168.10.2:7790; flexible-meta-disk /dev/md4; } } ucarp-up #!/bin/sh /sbin/drbdadm primary all /sbin/ifup $1:ucarp /sbin/drbdadm primary all /sbin/drbdadm primary all /sbin/drbdadm primary all mount -o defaults,noatime,nodiratime /dev/drbd0 /nfs-exported/raid6 mount -o defaults,noatime,nodiratime /dev/drbd2 /nfs-exported/raid5 /etc/init.d/nfs-kernel-server restart sleep 2 echo 256 > /proc/fs/nfsd/threads ucarp-down #!/bin/sh /etc/init.d/nfs-kernel-server stop umount /nfs-exported/raid6 umount /nfs-exported/raid5 /sbin/drbdadm secondary all /sbin/ifdown $1:ucarp -- Gerald Brandt Majentis Technologies gbr at majentis.com 204-229-6595 www.majentis.com -------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.linbit.com/pipermail/drbd-user/attachments/20140228/49bf75cb/attachment.htm>