[DRBD-user] drbd pacemaker scst/srp 2 node active/passive question

Fri Mar 1 03:49:32 CET 2013

First time posting to a mailing list hope I get this right.

I have a 2 node DRBD backed SCST/SRP single target(ib_srpt) setup working great using pacemaker/corosync.  I am using this for the data store for a mail server.  Where I am running into an issue is the initiator's are running on vmware ESXi 4.1 hosts, when a fail over occurs on the target the vm host initiators go dead and you have to rescan to pick up the target via the new path causing the vm guest to go down until the new path is discovered.

Hope that makes sense.

What I see as the potential problem is lvm and scst are only active on the primary node thus the secondary node is un-discoverable by ESXi host until it fails over.  I am not sure what the answer is but my thought process is I am trying to figure out if it is possible to have: 

1. on the node1 (primary node) drbd(primary), lvm, scst with the target in read/write mode 
2. on the node2 (secondary node) drbd(secondary), lvm, scst with the target in read mode 

and when the node1 fails over, node1 scst target goes ready only and node2 scst target would switch to read/write.  What I am trying to achieve is the the vm host seeing the target and paths at all times.

Hopefully there is an easier solution to this and that I am not making things more difficult.  I have been researching this for weeks and at the point of frustration.  Any guidance would be appreciated.

Side note: I modified SCSTTarget RA to work with ib_srpt as it was not written for it originally and did not find another RA out there specifically for my setup.

Thank you for any help you may be able to provide.

Setup:
Initiator machines vmware ESXi 4.1

Target machines
2 nodes running CentOS 2.6.32-279.19.1.el6.x86_64

DRBD:
kmod-drbd84-8.4.2-1.el6_3.elrepo.x86_64

Pacemaker/Corosync:
pacemaker-libs-1.1.7-6.el6.x86_64
pacemaker-cli-1.1.7-6.el6.x86_64
pacemaker-1.1.7-6.el6.x86_64
pacemaker-cluster-libs-1.1.7-6.el6.x86_64
corosync-1.4.1-7.el6_3.1.x86_64
corosynclib-1.4.1-7.el6_3.1.x86_64

SCST/SRPT:
scst-tools-2.6.32-279.19.1.el6-2.2.1-1.ab.x86_64
kernel-module-scst-iscsi-2.6.32-279.19.1.el6-2.2.1-1.ab.x86_64
kernel-module-scst-core-2.6.32-279.19.1.el6-2.2.1-1.ab.x86_64
kernel-module-scst-srpt-2.6.32-279.19.1.el6-2.2.1-1.ab.x86_64

scst config:

HANDLER vdisk_fileio {

        DEVICE disk00 {
                filename /dev/drbd-stor/mail-stor
                nv_cache 1
        }
}

TARGET_DRIVER ib_srpt {
        TARGET 0002:c902:0020:2020 {
                enabled 1
                cpu_mask ff
                rel_tgt_id 1

                GROUP data {
                        LUN 0 disk00

                        INITIATOR 0x8102c902002020210002c903000f2bf3
                        INITIATOR 0x8102c902002020220002c903000f2bf3
                        INITIATOR 0x8102c902002020210002c903000c67bd
                        INITIATOR 0x8102c902002020220002c903000c67bd

                        INITIATOR 0x8102c902002008ed0002c903000c67bd
                        INITIATOR 0x8102c902002008ee0002c903000c67bd
                        INITIATOR 0x8102c902002008ee0002c903000f2bf3
                        INITIATOR 0x8102c902002008ed0002c903000f2bf3

                        cpu_mask ff
                }

        }
}

drbd config:

resource r0 {
        device    /dev/drbd1;
        disk      /dev/mapper/lun-lun00;
        meta-disk internal;
        net {
                protocol C;
                max-buffers 8000;
                max-epoch-size 8000;
                cram-hmac-alg sha1;
                shared-secret "secret";
        }
        handlers {
                split-brain "/usr/lib/drbd/notify-split-brain.sh admins at domain.com";
        }
        disk {
                resync-rate 10M;
        }
        on mail-stor01.domain.com {
                address 172.24.252.1:7790;
        }
        on mail-stor02.domain.com {
                address 172.24.252.2:7790;
        }
}

crm configure show:

node mail-stor01.domain.com
node mail-stor02.domain.com
primitive drbd-r0 ocf:linbit:drbd \
        params drbd_resource="r0" \
        operations $id="drbd-r0-operations" \
        op monitor start-delay="0" interval="25"
primitive lvm-r0 ocf:heartbeat:LVM \
        params volgrpname="drbd-stor" \
        meta is-managed="true" target-role="Started" \
        op monitor interval="10" timeout="30" depth="0" \
        op start interval="0" timeout="500" \
        op stop interval="0" timeout="500"
primitive node1-stonith stonith:fence_drac5 \
         params action="off" ipaddr="drac-mailstor01" login="cluster" passwd="secret" ipport="22" inet4_only="true" secure="true" verbose="true" debug="/var/log/stonith" pcmk_host_check="static-list" pcmk_host_list="mail-stor01.domain.com" \
        meta target-role="started" is-managed="true"
primitive node2-stonith stonith:fence_drac5 \
         params action="off" ipaddr="drac-mailstor02" login="cluster" passwd="secret" ipport="22" inet4_only="true" secure="true" verbose="true" debug="/var/log/stonith" pcmk_host_check="static-list" pcmk_host_list="mail-stor02.domain.com" \
        meta target-role="started" is-managed="true"
primitive p_ping ocf:pacemaker:ping \
        params host_list="172.24.252.126 172.24.252.254" multiplier="100" dampen="5s" \
        op monitor interval="60" timeout="60" \
        op start interval="0" timeout="60" \
        op stop interval="0" timeout="60"
ms ms-drbd-r0 drbd-r0 \
        meta clone-max="2" notify="true" target-role="Started" resource-stickiness="0"
clone c_ping p_ping \
        meta target-role="Started"
location loc-node1-stonith node1-stonith -inf: mail-stor01.domain.com
location loc-node2-stonith node2-stonith -inf: mail-stor02.domain.com
location pref-ping-lvm-drbd ms-drbd-r0  \
        rule $id="pref-ping-lvm-rule" -inf: not_defined ping or ping lte 100
location pref-drbd-r0 ms-drbd-r0 \
        rule $id="pref-drbd-r0-rule" $role="master" 100: #uname eq mail-stor01.domain.com
colocation lvm-drbd-r0 inf: lvm-r0 ms-drbd-r0:Master
order or-drbd-lvm inf: ms-drbd-r0:promote lvm-r0:start
property $id="cib-bootstrap-options" \
        default-resource-stickiness="200" \
        expected-quorum-votes="2" \
        dc-version="1.1.7-6.el6-148fccfd5985c5590cc601123c6c16e966b85d14" \
        no-quorum-policy="ignore" \
        cluster-infrastructure="openais" \
        last-lrm-refresh="1355877808" \
        stonith-enabled="true" \
        stonith-action="poweroff"

Corosync.conf

totem {
   version: 2
   token: 5000
   token_retransmits_before_loss_const: 10
   join: 1000
   consensus: 2500
   vsftype: none
   max_messages: 20
   send_join: 45
   clear_node_high_bit: yes
   secauth: off
   threads: 0
   # RRP can have three modes (rrp_mode): if set to active, Corosync uses both
   # interfaces actively. If set to passive, Corosync uses the second interface
   # only if the first ring fails. If rrp_mode is set to none, RRP is disabled.
   rrp_mode: active
   interface {
        ringnumber: 0
        bindnetaddr: 172.24.0.0
        mcastaddr: 226.94.1.1
        mcastport: 4000
        ttl: 1
     }
   interface {
        ringnumber: 1
        bindnetaddr: 172.24.16.0
        mcastaddr: 226.94.1.1
        mcastport: 4000
        ttl: 1
     }
}

logging {
        fileline: off
        to_stderr: no
        to_logfile: yes
        to_syslog: no
        logfile: /var/log/cluster/corosync.log
        debug: off
        timestamp: on
        syslog_facility: daemon
}

amf {
        mode: disabled
}

aisexec {
 user: root
 group: root
}

service {
   name: pacemaker
   ver: 0

------
Jason Thomas jthomas at medata.com