[DRBD-user] failover resulted in split brain

Gerald Brandt gbr at majentis.com
Fri Feb 28 13:24:01 CET 2014

Note: "permalinks" may not be as permanent as we would like,
direct links of old sources may well be a few messages off.


Hi,

I'm doing tests on a new DRBD setup, so I'm hammering the DRBD system 
with reads and writes (3 VMs writing with dd and three VMs reading with 
dd).  The test max's out my 2x1GigE bonded links (both data and sync) 
and max's out my hard drives (5 7200 RPM SATA, RAID6).  I share the drbd 
disks to Proxmox (KVM based) via NFS v3.

1. I tested the system all night, and both DRBD servers handled 
everything fine.
2. I reboot the primary
3. failover of the IP and NFS worked, and secondary became primary.
4. reboot server came back up, and entered slit-brain.

I use uCarp for the failover instead of heartbeat/pacemaker.

I've used iSCSI over DRBD/heartbeat before, but not NFS.  Any ideas why 
I hit split brain?

Gerald


drbd.conf
# cat /etc/drbd.conf
# You can find an example in /usr/share/doc/drbd.../drbd.conf.example

include "drbd.d/global_common.conf";
# include "drbd.d/*.res";

resource target.0 {
         protocol C;

         handlers {
         pri-on-incon-degr "echo o > /proc/sysrq-trigger ; halt -f";
         pri-lost-after-sb "echo o > /proc/sysrq-trigger ; halt -f";
         local-io-error "echo o > /proc/sysrq-trigger ; halt -f";
         outdate-peer "/usr/lib/heartbeat/drbd-peer-outdater -t 5";
         before-resync-target /usr/local/bin/resync-start-RAID6.sh;
         after-resync-target /usr/local/bin/resync-end-RAID6.sh;
         }

         startup {
         degr-wfc-timeout 120;
         }

         disk {
         on-io-error detach;
         }

         net {
         cram-hmac-alg sha1;
         shared-secret "password";
         after-sb-0pri disconnect;
         after-sb-1pri disconnect;
         after-sb-2pri disconnect;
         rr-conflict disconnect;
         sndbuf-size 0;
         }

         syncer {
         c-plan-ahead 0;
         rate 30M;
         verify-alg sha1;
#        al-extents 257;
         al-extents 3389;
         }

         on iscsi-filer-1 {
         device  /dev/drbd0;
         disk    /dev/md0;
         address 192.168.10.1:7789;
         flexible-meta-disk /dev/md3;
         }

         on iscsi-filer-2 {
         device  /dev/drbd0;
         disk    /dev/md0;
         address 192.168.10.2:7789;
         flexible-meta-disk /dev/md3;
         }
}

resource target.2 {
         protocol C;

         handlers {
         pri-on-incon-degr "echo o > /proc/sysrq-trigger ; halt -f";
         pri-lost-after-sb "echo o > /proc/sysrq-trigger ; halt -f";
         local-io-error "echo o > /proc/sysrq-trigger ; halt -f";
         outdate-peer "/usr/lib/heartbeat/drbd-peer-outdater -t 5";
         before-resync-target /usr/local/bin/resync-start-RAID5.sh;
         after-resync-target /usr/local/bin/resync-end-RAID5.sh;
         }

         startup {
         degr-wfc-timeout 120;
         }

         disk {
         on-io-error detach;
         }

         net {
         cram-hmac-alg sha1;
         shared-secret "password";
         after-sb-0pri disconnect;
         after-sb-1pri disconnect;
         after-sb-2pri disconnect;
         rr-conflict disconnect;
         sndbuf-size 0;
         }

         syncer {
         c-plan-ahead 0;
         rate 30M;
         verify-alg sha1;
#        al-extents 257;
         al-extents 3389;
         }

         on iscsi-filer-1 {
         device  /dev/drbd2;
         disk    /dev/md2;
         address 192.168.10.1:7790;
         flexible-meta-disk /dev/md4;
         }

         on iscsi-filer-2 {
         device  /dev/drbd2;
         disk    /dev/md2;
         address 192.168.10.2:7790;
         flexible-meta-disk /dev/md4;
         }
}


ucarp-up
#!/bin/sh

/sbin/drbdadm primary all
/sbin/ifup $1:ucarp
/sbin/drbdadm primary all
/sbin/drbdadm primary all
/sbin/drbdadm primary all
mount -o defaults,noatime,nodiratime /dev/drbd0 /nfs-exported/raid6
mount -o defaults,noatime,nodiratime /dev/drbd2 /nfs-exported/raid5
/etc/init.d/nfs-kernel-server restart
sleep 2
echo 256 > /proc/fs/nfsd/threads


ucarp-down
#!/bin/sh

/etc/init.d/nfs-kernel-server stop
umount /nfs-exported/raid6
umount /nfs-exported/raid5
/sbin/drbdadm secondary all
/sbin/ifdown $1:ucarp



-- 
Gerald Brandt
Majentis Technologies
gbr at majentis.com
204-229-6595
www.majentis.com

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.linbit.com/pipermail/drbd-user/attachments/20140228/49bf75cb/attachment.htm>


More information about the drbd-user mailing list