Note: "permalinks" may not be as permanent as we would like,
direct links of old sources may well be a few messages off.
Hi All,
I posted about this edge case in #drbd last night and it was recommended
to bring it to attention of the devs.
I have a two node DRBD setup which is in primary/secondary.
The following test case presents a problem -
1. Secondary goes into diskless state due to a broken array (as
expected) while primary is being written to
2. Primary then dies (power failure)
3. Secondary gets rebooted, or dies and comes back online etc.
The secondary will become primary - and as secondary was 'diskless'
before, contains out of date stale data.
When the 'real' primary comes back online we then have our split brain.
What I think needs to happen is a way to mark 'diskless' state as
outdated so that pacemaker will not attempt to bring this node into
primary.
Alternitively, a constraint in pacemaker on diskless state until a
re-sync has been completed.
As this disk is diskless with internal metadata, this cannot be stored
in the drbd metadata.
Any Suggestions?
To Reproduce:
echo 1 > /proc/sys/sdb/device/delete (go to diskless on I/O error)
echo b > /proc/sysrq-trigger (to simulate an unclean restart)
I have included my configuration below:
drbd version: 8.3.11 (api:88/proto:86-96)
ii heartbeat 1:3.0.3-2
Subsystem for High-Availability Linux
ii pacemaker 1.0.9.1+hg15626-1 HA
cluster resource manager
drbd configuration:
data0b.res
resource data0b {
protocol C;
device /dev/drbd0;
disk /dev/sdb;
meta-disk internal;
disk {
on-io-error detach;
# enable barriers
# safe defaults - only enable once BBU is enabled and
known good
#disk-barrier no;
#disk-flushes no;
}
syncer {
al-extents 3389;
rate 99M;
verify-alg sha1;
}
net {
sndbuf-size 512k;
max-buffers 8000;
max-epoch-size 8000;
timeout 60; # 6 secs * ko-count
ko-count 2;
# enable network integrity check
# turn off in production
data-integrity-alg crc32c;
}
on san2 {
address 172.29.202.152:3389;
}
on san3 {
address 172.29.202.153:3389;
}
}
global-common.conf
common {
protocol C;
handlers {
pri-on-incon-degr
"/usr/lib/drbd/notify-pri-on-incon-degr.sh;
/usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ;
reboot -f";
pri-lost-after-sb
"/usr/lib/drbd/notify-pri-lost-after-sb.sh;
/usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ;
reboot -f";
local-io-error "/usr/lib/drbd/notify-io-error.sh;
/usr/lib/drbd/notify-emergency-shutdown.sh; echo o > /proc/sysrq-trigger
; halt -f";
# fence-peer "/usr/lib/drbd/crm-fence-peer.sh";
# split-brain "/usr/lib/drbd/notify-split-brain.sh root";
# out-of-sync "/usr/lib/drbd/notify-out-of-sync.sh root";
# before-resync-target
"/usr/lib/drbd/snapshot-resync-target-lvm.sh -p 15 -- -c 16k";
# after-resync-target
/usr/lib/drbd/unsnapshot-resync-target-lvm.sh;
fence-peer "/usr/lib/drbd/crm-fence-peer.sh --timeout
60 --dc-timeout 90";
after-resync-target "/usr/lib/drbd/crm-unfence-peer.sh
--timeout 60 --dc-timeout 90";
crm configuration
crm options editor nano
crm configure
property stonith-enabled="false"
property no-quorum-policy="ignore"
primitive p_drbd_data0b \
ocf:linbit:drbd \
params drbd_resource="data0b" \
op monitor interval="29s" role="Master" \
op monitor interval="31s" role="Slave" \
op start interval="0" timeout="240" \
op promote interval="0" timeout="90" \
op demote interval="0" timeout="90" \
op stop interval="0" timeout="100"
ms ms_drbd_data0b p_drbd_data0b \
meta master-max="1" master-node-max="1" clone-max="2" \
clone-node-max="1" notify="true"
primitive p_lvm_data0b \
ocf:heartbeat:LVM \
params volgrpname="data0b" \
op monitor interval="120s" timeout="60s" \
op start timeout="60s" \
op stop timeout="60s"
primitive p_ip_data0b_1 \
ocf:heartbeat:IPaddr2 \
params ip="172.29.101.201" cidr_netmask="24" nic="eth2" \
op monitor interval="10s"
primitive p_ip_data0b_2 \
ocf:heartbeat:IPaddr2 \
params ip="172.29.102.201" cidr_netmask="24" nic="eth4" \
op monitor interval="10s"
primitive p_ip_data0b_3 \
ocf:heartbeat:IPaddr2 \
params ip="172.29.103.201" cidr_netmask="24" nic="eth6" \
op monitor interval="10s"
primitive p_ip_data0b_4 \
ocf:heartbeat:IPaddr2 \
params ip="172.29.104.201" cidr_netmask="24" nic="eth8" \
op monitor interval="10s"
primitive p_ip_data0b_5 \
ocf:heartbeat:IPaddr2 \
params ip="172.29.105.201" cidr_netmask="24" nic="eth5" \
op monitor interval="10s"
primitive p_ip_data0b_6 \
ocf:heartbeat:IPaddr2 \
params ip="172.29.106.201" cidr_netmask="24" nic="eth9" \
op monitor interval="10s"
primitive p_target_data0b \
ocf:heartbeat:iSCSITarget \
params iqn="iqn.2012-04.com.test:data0b" tid="1" portals=""
implementation="iet" \
op monitor interval="40s"
primitive p_lun_data0b_test \
ocf:heartbeat:iSCSILogicalUnit \
params target_iqn="iqn.2012-04.com.test:data0b" \
lun="1" path="/dev/data0b/test" \
scsi_id="data0b.test" scsi_sn="data0b.test" \
additional_parameters="type=blockio" \
op monitor interval="40s"
primitive p_lun_data0b_test2 \
ocf:heartbeat:iSCSILogicalUnit \
params target_iqn="iqn.2012-04.com.test:data0b" \
lun="2" path="/dev/data0b/test2" \
scsi_id="data0b.test2" scsi_sn="data0b.test2" \
additional_parameters="type=blockio" \
op monitor interval="40s"
primitive iscsi_lsb lsb:iscsi-target \
op monitor interval="5s" timeout="15s" \
op start timeout="60s" \
op stop timeout="60s" \
clone iscsi_init iscsi_lsb meta clone-max="2" clone-node-max="1"
globally-unique="false"
group p_data0b \
p_lvm_data0b p_target_data0b p_lun_data0b_test p_lun_data0b_test2
group p_data0b_ip \
p_ip_data0b_1 p_ip_data0b_2 p_ip_data0b_3 p_ip_data0b_4
p_ip_data0b_5 p_ip_data0b_6
order o_drbd_before_d0b_services \
inf: ms_drbd_data0b:promote p_data0b:start
order o_d0b_services_before_ip \
inf: p_data0b:start p_data0b_ip:start
colocation c_d0b_services_on_drbd \
inf: p_data0b ms_drbd_data0b:Master
colocation c_ip_on_d0b_services \
inf: p_data0b_ip p_data0b
property stonith-enabled="true"
primitive p_stonith_san3 stonith:external/ipmi \
params hostname=san3 \
ipaddr=10.2.1.236 userid=* passwd=* interface=lan
stonith-timeout="60" \
op monitor timeout=60s interval=120m \
op start timeout="60s" \
op stop timeout="60s" \
primitive p_stonith_san2 stonith:external/ipmi \
params hostname=san2 \
ipaddr=10.2.1.234 userid=* passwd=* interface=lan
stonith-timeout="60" \
op monitor timeout=60s interval=120m \
op start timeout="60s" \
op stop timeout="60s" \
location stonith_san3_not_on_san3 p_stonith_san3 -inf: san3
location stonith_san2_not_on_san2 p_stonith_san2 -inf: san2
ha.cf
keepalive 5 deadtime 30 warntime 10 initdead 120