[DRBD-user] dual primary + pacemaker issue

Fri Nov 9 15:30:54 CET 2012

Hi,

We are setting a brand new cluster with dual primary + pacemaker + xen. Here's the current conf:

- global_common.conf
drbd.resglobal {
   dialog-refresh  	 1;
   minor-count  5;
}
common {
}

- drbd0.res
resource drbd0 {
  protocol      C;
   disk {
	  on-io-error       detach;
	  fencing resource-and-stonith;

   }
   syncer {
	  rate      33M;
	    al-extents 3389;
   }

   handlers {
	    fence-peer "/usr/lib/drbd/stonith_admin-fence-peer.sh";
		   }

   net {
	  allow-two-primaries yes; # Enable this *after* initial testing
	    cram-hmac-alg sha1;
	    shared-secret "a6a0680c40bca2439dbe48343ddddcf4";
	    after-sb-0pri discard-zero-changes;
	    after-sb-1pri discard-secondary;
	    after-sb-2pri disconnect;
   }
   startup {
#	   become-primary-on both;
   }
   on xs02 {
	  disk      /dev/sdb;
	  device    /dev/drbd0;
	  meta-disk internal;
	  address   10.1.1.136:7780;
   }
   on xs01 {
	  disk      /dev/sdb;
	  device    /dev/drbd0;
	  meta-disk internal;
	  address   10.1.1.135:7780;
   }
}

- crm configuration
node xs01
node xs02
primitive dlm ocf:pacemaker:controld \
	    operations $id="dlm-operations" \
	    op monitor interval="10" timeout="20" start-delay="0"
primitive drbd0 ocf:linbit:drbd \
	    operations $id="drbd0-operations" \
	    op monitor interval="20" role="Slave" timeout="20" \
	    op monitor interval="10" role="Master" timeout="20" \
	    params drbd_resource="drbd0"
primitive o2cb ocf:ocfs2:o2cb \
	    operations $id="o2cb-operations" \
	    op monitor interval="10" timeout="20" \
	    meta target-role="Started"
primitive stonith-ipmi-xs01 stonith:external/ipmi \
	    meta target-role="Started" is-managed="true" \
	    operations $id="stonith-ipmi-xs01-operations" \
	    op monitor interval="3600" timeout="20" \
	    params hostname="xs01" ipaddr="125.1.254.135" userid="radmin" passwd="xxxxxx" interface="lan"
primitive stonith-ipmi-xs02 stonith:external/ipmi \
	    meta target-role="Started" is-managed="true" \
	    operations $id="stonith-ipmi-xs02-operations" \
	    op monitor interval="3600" timeout="20" \
	    params hostname="xs02" ipaddr="125.1.254.136" userid="radmin" passwd="xxxxx" interface="lan"
primitive vmdisk-pri ocf:heartbeat:Filesystem \
	    operations $id="vmdisk-pri-operations" \
	    op monitor interval="20" timeout="40" \
	    params device="/dev/drbd/by-disk/sdb" directory="/vmdisk" fstype="ocfs2" options="rw,noatime"
group init dlm o2cb \
	    meta is-managed="true"
ms ms_drbd0 drbd0 \
	    meta master-max="2" clone-max="2" notify="true" target-role="Started"
clone init-clone init \
	    meta interleave="true" target-role="Started" is-managed="true"
clone vmdisk-clone vmdisk-pri \
	    meta target-role="Started"
location fence-xs01 stonith-ipmi-xs01 -inf: xs01
location fence-xs02 stonith-ipmi-xs02 -inf: xs02
colocation colocacion : init-clone vmdisk-clone ms_drbd0:Master
order ordenamiento : ms_drbd0:promote init-clone:start vmdisk-clone:start
property $id="cib-bootstrap-options" \
	    dc-version="1.1.7-77eeb099a504ceda05d648ed161ef8b1582c7daf" \
	    cluster-infrastructure="openais" \
	    expected-quorum-votes="2" \
	    batch-limit="1" \
	    no-quorum-policy="ignore" \
	    last-lrm-refresh="1352468954" \
	    default-resource-stickiness="1000"
op_defaults $id="op_defaults-options" \
	    record-pending="false"

Problem is that when we want to start the master drbd0 resource in pacemaker, it fails and then xs02 powers off. Soon after that, the resource promotes to Master in xs01. I go to the XS02 server, boot up and the resource is also promoted to Master becoming both in Master state.

We have already tune the eth1 card setting the MTU to 9000 as it is documented in the site.

Is this the normal behaviour?

Regards,
Daniel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.linbit.com/pipermail/drbd-user/attachments/20121109/89755ad5/attachment.htm>