Note: "permalinks" may not be as permanent as we would like,
direct links of old sources may well be a few messages off.
Hi
I'm having problems configuring an active/active setup. I've done
several searches, leading only to few howtos and documentations, very
similar each other.
The main problem is that I cannot handle split-brain in case of net
failure. I think I've correctly setup both drbd and heartbeat to do so,
but drbd always remains in an inconsistent state after sb.
In case of sb I can have a master node which can be the syncsource, so
I've set after-sb-0pri accordingly.
And I've also another big problem, as in case of short network outage
BOTH nodes gets stonith'ed! What am I doing wrong? Both nodes' drbd.conf
must be identical, right?
They're connected only via network.
Any help really appreciated. Thanks.
These are the relevant parts of drbd.conf:
resource r0 {
protocol C;
handlers {
pri-on-incon-degr "echo o > /proc/sysrq-trigger ; halt -f";
pri-lost-after-sb "echo o > /proc/sysrq-trigger ; halt -f";
local-io-error "echo o > /proc/sysrq-trigger ; halt -f";
outdate-peer "/usr/lib/heartbeat/drbd-peer-outdater";
pri-lost "echo pri-lost. Have a look at the log files. | mail -s
'DRBD Alert' dontspamme at spamme.com";
# Notify someone in case DRBD split brained.
split-brain "echo split-brain. drbdadm -- --discard-my-data connect
$DRBD_RESOURCE ? | mail -s 'DRBD Alert' dontspamme at spamme.com";
}
startup {
degr-wfc-timeout 120;
wfc-timeout 120;
become-primary-on both;
}
disk {
on-io-error detach;
fencing resource-and-stonith;
}
net {
allow-two-primaries;
after-sb-0pri discard-node-node2;
after-sb-1pri discard-secondary;
after-sb-2pri disconnect;
rr-conflict disconnect;
syncer {
rate 50M;
al-extents 257;
}
omitting the "on" statements as they're fine.
this is the heartbeat's cib.xml, for stonith:
<cib generated="true" admin_epoch="0" have_quorum="1" num_peers="2"
cib_feature_revision="1.3" ccm_transition="1" ignore_dtd="false"
crm_feature_set="2.1" epoch="795"
dc_uuid="6259bf18-2678-4fe5-834f-d1e4bb63c9e8" num_updates="17"
cib-last-written="Wed Jun 11 11:01:57 2008">
<configuration>
<crm_config>
<cluster_property_set id="bootstrap">
<attributes>
<nvpair id="bootstrap01" name="transition-idle-timeout"
value="60"/>
<nvpair id="bootstrap02" name="default-resource-stickiness"
value="INFINITY"/>
<nvpair id="bootstrap03"
name="default-resource-failure-stickiness" value="-500"/>
<nvpair id="bootstrap04" name="stonith-enabled"
value="true"/>
<nvpair id="bootstrap05" name="stonith-action"
value="reboot"/>
<nvpair id="bootstrap06" name="symmetric-cluster"
value="true"/>
<nvpair id="bootstrap07" name="no-quorum-policy"
value="stop"/>
<nvpair id="bootstrap08" name="stop-orphan-resources"
value="true"/>
<nvpair id="bootstrap09" name="stop-orphan-actions"
value="true"/>
<nvpair id="bootstrap10" name="is-managed-default"
value="true"/>
</attributes>
</cluster_property_set>
<cluster_property_set id="cib-bootstrap-options">
<attributes>
<nvpair id="cib-bootstrap-options-dc-version"
name="dc-version" value="0.6.4-node:
226d8e35692415c56dbd05258a85905414938ce9"/>
</attributes>
</cluster_property_set>
</crm_config>
<nodes>
<node id="6259bf18-2678-4fe5-834f-d1e4bb63c9e8" uname="host1"
type="normal"/>
<node id="52f4f725-21b0-49e8-b4d8-a7124637ca6f" uname="host2"
type="normal"/>
</nodes>
<resources>
<clone id="stonithcloneGroup" globally_unique="false">
<instance_attributes id="stonithcloneGroup">
<attributes>
<nvpair id="stonithclone01" name="clone_node_max"
value="1"/>
</attributes>
</instance_attributes>
<primitive id="stonithclone" class="stonith"
type="external/ssh" provider="heartbeat">
<operations>
<op name="monitor" interval="5s" timeout="20s"
prereq="nothing" id="stonithclone-op01"/>
<op name="start" timeout="20s" prereq="nothing"
id="stonithclone-op02"/>
</operations>
<instance_attributes id="stonithclone">
<attributes>
<nvpair id="stonithclone01attr" name="hostlist"
value="host1,host2"/>
</attributes>
</instance_attributes>
</primitive>
</clone>
</resources>
<constraints/>
</configuration>
</cib>
--
Lorenzo Milesi - lorenzo.milesi at yetopen.it
YetOpen S.r.l. - http://www.yetopen.it/
C.so E. Filiberto, 74 23900 Lecco - ITALY -
Tel 0341 220 205 - Fax 178 607 8199
GPG/PGP Key-Id: 0xE704E230 - http://keyserver.linux.it