[DRBD-user] failover resulted in split brain

Fri Feb 28 13:24:01 CET 2014

Hi,

I'm doing tests on a new DRBD setup, so I'm hammering the DRBD system 
with reads and writes (3 VMs writing with dd and three VMs reading with 
dd).  The test max's out my 2x1GigE bonded links (both data and sync) 
and max's out my hard drives (5 7200 RPM SATA, RAID6).  I share the drbd 
disks to Proxmox (KVM based) via NFS v3.

1. I tested the system all night, and both DRBD servers handled 
everything fine.
2. I reboot the primary
3. failover of the IP and NFS worked, and secondary became primary.
4. reboot server came back up, and entered slit-brain.

I use uCarp for the failover instead of heartbeat/pacemaker.

I've used iSCSI over DRBD/heartbeat before, but not NFS.  Any ideas why 
I hit split brain?

Gerald

drbd.conf
# cat /etc/drbd.conf
# You can find an example in /usr/share/doc/drbd.../drbd.conf.example

include "drbd.d/global_common.conf";
# include "drbd.d/*.res";

resource target.0 {
         protocol C;

         handlers {
         pri-on-incon-degr "echo o > /proc/sysrq-trigger ; halt -f";
         pri-lost-after-sb "echo o > /proc/sysrq-trigger ; halt -f";
         local-io-error "echo o > /proc/sysrq-trigger ; halt -f";
         outdate-peer "/usr/lib/heartbeat/drbd-peer-outdater -t 5";
         before-resync-target /usr/local/bin/resync-start-RAID6.sh;
         after-resync-target /usr/local/bin/resync-end-RAID6.sh;
         }

         startup {
         degr-wfc-timeout 120;
         }

         disk {
         on-io-error detach;
         }

         net {
         cram-hmac-alg sha1;
         shared-secret "password";
         after-sb-0pri disconnect;
         after-sb-1pri disconnect;
         after-sb-2pri disconnect;
         rr-conflict disconnect;
         sndbuf-size 0;
         }

         syncer {
         c-plan-ahead 0;
         rate 30M;
         verify-alg sha1;
#        al-extents 257;
         al-extents 3389;
         }

         on iscsi-filer-1 {
         device  /dev/drbd0;
         disk    /dev/md0;
         address 192.168.10.1:7789;
         flexible-meta-disk /dev/md3;
         }

         on iscsi-filer-2 {
         device  /dev/drbd0;
         disk    /dev/md0;
         address 192.168.10.2:7789;
         flexible-meta-disk /dev/md3;
         }
}

resource target.2 {
         protocol C;

         handlers {
         pri-on-incon-degr "echo o > /proc/sysrq-trigger ; halt -f";
         pri-lost-after-sb "echo o > /proc/sysrq-trigger ; halt -f";
         local-io-error "echo o > /proc/sysrq-trigger ; halt -f";
         outdate-peer "/usr/lib/heartbeat/drbd-peer-outdater -t 5";
         before-resync-target /usr/local/bin/resync-start-RAID5.sh;
         after-resync-target /usr/local/bin/resync-end-RAID5.sh;
         }

         startup {
         degr-wfc-timeout 120;
         }

         disk {
         on-io-error detach;
         }

         net {
         cram-hmac-alg sha1;
         shared-secret "password";
         after-sb-0pri disconnect;
         after-sb-1pri disconnect;
         after-sb-2pri disconnect;
         rr-conflict disconnect;
         sndbuf-size 0;
         }

         syncer {
         c-plan-ahead 0;
         rate 30M;
         verify-alg sha1;
#        al-extents 257;
         al-extents 3389;
         }

         on iscsi-filer-1 {
         device  /dev/drbd2;
         disk    /dev/md2;
         address 192.168.10.1:7790;
         flexible-meta-disk /dev/md4;
         }

         on iscsi-filer-2 {
         device  /dev/drbd2;
         disk    /dev/md2;
         address 192.168.10.2:7790;
         flexible-meta-disk /dev/md4;
         }
}

ucarp-up
#!/bin/sh

/sbin/drbdadm primary all
/sbin/ifup $1:ucarp
/sbin/drbdadm primary all
/sbin/drbdadm primary all
/sbin/drbdadm primary all
mount -o defaults,noatime,nodiratime /dev/drbd0 /nfs-exported/raid6
mount -o defaults,noatime,nodiratime /dev/drbd2 /nfs-exported/raid5
/etc/init.d/nfs-kernel-server restart
sleep 2
echo 256 > /proc/fs/nfsd/threads

ucarp-down
#!/bin/sh

/etc/init.d/nfs-kernel-server stop
umount /nfs-exported/raid6
umount /nfs-exported/raid5
/sbin/drbdadm secondary all
/sbin/ifdown $1:ucarp

-- 
Gerald Brandt
Majentis Technologies
gbr at majentis.com
204-229-6595
www.majentis.com

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.linbit.com/pipermail/drbd-user/attachments/20140228/49bf75cb/attachment.htm>