[DRBD-user] pacemaker + corosync and postgresql

Fri Oct 11 22:08:04 CEST 2013

I'm trying to make a sample cluster, in virtual machine, and after migrate
to a physical machine, however i have problems to configure the pacemaker (
crm ),  to startup the resources and failover.

I cant mount the device /dev/drbd0 in the primary node and start postgresql
manually, but use in crm resource,  dont can mount the device, and start de
postgresql.

I reboot the virtual machines, and not have successful.
the DRBD not start the primary, and not mount the /dev/drbd0 and stard the
postgresql  :-(

DRBD Version: 8.3.11 (api:88)
Corosync Cluster Engine, version '1.4.2'
Pacemaker 1.1.6

**** after reboot the virtual machine. *****

ha-slave:

version: 8.3.13 (api:88/proto:86-96)
srcversion: 697DE8B1973B1D8914F04DB
 0: cs:Connected ro:Secondary/Secondary ds:UpToDate/UpToDate C r-----
    ns:0 nr:28672 dw:28672 dr:0 al:0 bm:5 lo:0 pe:0 ua:0 ap:0 ep:1 wo:n
oos:0

ha-master:
version: 8.3.13 (api:88/proto:86-96)
srcversion: 697DE8B1973B1D8914F04DB
 0: cs:Connected ro:Secondary/Secondary ds:UpToDate/UpToDate C r-----
    ns:28672 nr:0 dw:0 dr:28672 al:0 bm:5 lo:0 pe:0 ua:0 ap:0 ep:1 wo:n
oos:0

crm(live)# configure
crm(live)configure# show
node ha-master
node ha-slave
primitive drbd_postgresql ocf:heartbeat:drbd \
        params drbd_resource="postgresql"
primitive fs_postgresql ocf:heartbeat:Filesystem \
        params device="/dev/drbd/by-res/postgresql" directory="/mnt"
fstype="ext4"
primitive postgresqld lsb:postgresql
primitive vip_cluster ocf:heartbeat:IPaddr2 \
        params ip="172.70.65.200" nic="eth0:1"
group postgresql fs_postgresql vip_cluster postgresqld \
        meta target-role="Started"
ms ms_drbd_postgresql drbd_postgresql \
        meta master-max="1" master-node-max="1" clone-max="2"
clone-node-max="1" notify="true"
colocation postgresql_on_drbd inf: postgresql ms_drbd_postgresql:Master
order postgresql_after_drbd inf: ms_drbd_postgresql:promote postgresql:start
property $id="cib-bootstrap-options" \
        dc-version="1.1.6-9971ebba4494012a93c03b40a2c58ec0eb60f50c" \
        cluster-infrastructure="openais" \
        expected-quorum-votes="2" \
        stonith-enabled="false" \
        no-quorum-policy="ignore"
rsc_defaults $id="rsc-options" \
        resource-stickiness="100"

crm(live)# resource
crm(live)resource# list
 Master/Slave Set: ms_drbd_postgresql [drbd_postgresql]
     Stopped: [ drbd_postgresql:0 drbd_postgresql:1 ]
 Resource Group: postgresql
     fs_postgresql      (ocf::heartbeat:Filesystem) Stopped
     vip_cluster        (ocf::heartbeat:IPaddr2) Stopped
     postgresqld        (lsb:postgresql) Stopped

============
Last updated: Fri Oct 11 14:22:50 2013
Last change: Fri Oct 11 14:11:06 2013 via cibadmin on ha-slave
Stack: openais
Current DC: ha-slave - partition with quorum
Version: 1.1.6-9971ebba4494012a93c03b40a2c58ec0eb60f50c
2 Nodes configured, 2 expected votes
5 Resources configured.
============

Online: [ ha-slave ha-master ]

Failed actions:
    drbd_postgresql:0_start_0 (node=ha-slave, call=14, rc=1,
status=complete): unknown error
    drbd_postgresql:0_start_0 (node=ha-master, call=18, rc=1,
status=complete): unknown error

**** that is my global_common on drbd  ****

global {
        usage-count yes;
        # minor-count dialog-refresh disable-ip-verification
}

common {
        protocol C;

        handlers {
                pri-on-incon-degr
"/usr/lib/drbd/notify-pri-on-incon-degr.sh; /usr/lib/drbd/not

           ify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot
-f";
                pri-lost-after-sb
"/usr/lib/drbd/notify-pri-lost-after-sb.sh; /usr/lib/drbd/not

           ify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot
-f";
                local-io-error "/usr/lib/drbd/notify-io-error.sh;
/usr/lib/drbd/notify-emergenc
                                                       y-shutdown.sh; echo
o > /proc/sysrq-trigger ; halt -f";
                fence-peer "/usr/lib/drbd/crm-fence-peer.sh";
                after-resync-target "/usr/lib/drbd/crm-unfence-peer.sh";
                # split-brain "/usr/lib/drbd/notify-split-brain.sh root";
                # out-of-sync "/usr/lib/drbd/notify-out-of-sync.sh root";
                # before-resync-target
"/usr/lib/drbd/snapshot-resync-target-lvm.sh -p 15 -- -c

        16k";
                # after-resync-target
/usr/lib/drbd/unsnapshot-resync-target-lvm.sh;
        }

        startup {
                 # wfc-timeout 15;
                 # degr-wfc-timeout 60;
                 # outdated-wfc-timeout wait-after-sb
        }

        disk {
                # on-io-error fencing use-bmbv no-disk-barrier
no-disk-flushes
                # no-disk-drain no-md-flushes max-bio-bvecs
        }

        net {
                # cram-hmac-alg sha1;
                # shared-secret "secret";
                # sndbuf-size rcvbuf-size timeout connect-int ping-int
ping-timeout max-buffers
                # max-epoch-size ko-count allow-two-primaries cram-hmac-alg
shared-secret
                # after-sb-0pri after-sb-1pri after-sb-2pri
data-integrity-alg no-tcp-cork
        }

        syncer {
                # rate 150M;
                # rate after al-extents use-rle cpu-mask verify-alg
csums-alg
        }
}

**** that is my postgresql.res ****

resource postgresql {
  startup {
    wfc-timeout 15;
    degr-wfc-timeout 60;
  }

  syncer {
    rate 150M;
    verify-alg md5;
 }

 disk {
   on-io-error detach;
   no-disk-barrier;
   no-disk-flushes;
   no-disk-drain;
   fencing resource-only;
 }

 on ha-master {
    device /dev/drbd0;
    disk /dev/sdb1;
    address 172.70.65.210:7788;
    meta-disk internal;
 }

 on ha-slave {
    device /dev/drbd0;
    disk /dev/sdb1;
    address 172.70.65.220:7788;
    meta-disk internal;
 }

}

**** that is my corosync.conf ****

compatibility: whitetank

totem {
        version: 2
        secauth: off
        threads: 0
        interface {
                ringnumber: 0
                bindnetaddr: 172.70.65.200
                mcastaddr: 226.94.1.1
                mcastport: 5405
                ttl: 1
        }
}

logging {
        fileline: off
        to_stderr: yes
        to_logfile: yes
        to_syslog: yes
        logfile: /var/log/cluster/corosync.log
        debug: on
        timestamp: on
        logger_subsys {
                subsys: AMF
                debug: off
        }
}

amf {
        mode: disabled
}

aisexec{
  user : root
  group : root
}

service{
  # Load the Pacemaker Cluster Resource Manager
  name : pacemaker
  ver : 0
}

DRBD, postgresql, manually start :

version: 8.3.13 (api:88/proto:86-96)
srcversion: 697DE8B1973B1D8914F04DB
 0: cs:Connected ro:Primary/Secondary ds:UpToDate/UpToDate C r-----
    ns:0 nr:0 dw:0 dr:664 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:n oos:0

version: 8.3.13 (api:88/proto:86-96)
srcversion: 697DE8B1973B1D8914F04DB
 0: cs:Connected ro:Secondary/Primary ds:UpToDate/UpToDate C r-----
    ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:n oos:0

root at ha-master:/mnt# df -hT
Sist. Arq.     Tipo      Tam. Usado Disp. Uso% Montado em
/dev/sda1      ext4      4,0G  1,8G  2,1G  47% /
udev           devtmpfs  473M  4,0K  473M   1% /dev
tmpfs          tmpfs     193M  264K  193M   1% /run
none           tmpfs     5,0M  4,0K  5,0M   1% /run/lock
none           tmpfs     482M   17M  466M   4% /run/shm
/dev/drbd0     ext4      2,0G   69M  1,9G   4% /mnt

root at ha-master:/mnt# service postgresql status
Running clusters: 9.1/main

-- 
------------------------------
Thomaz Luiz Santos
Linux User: #359356
http://thomaz.santos.googlepages.com/
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.linbit.com/pipermail/drbd-user/attachments/20131011/bb3d90e1/attachment.htm>