[DRBD-user] cluster doesn't failover - log at the end of msg

Sat Oct 31 02:57:37 CET 2009

Hello,

I'm trying to setup a cluster for shared storage with pacemaker, drbd, and
nfs.
I use two server and there is two network interface one each server, eth0
connected to the network, and eth1 direct link betwene the two node for drbd
replication,

                 Eth1: replication link
 --------------      ----------------
 | Server 1   |------| Server 2     |
 --------------      ----------------
      |                    |
     Eth0                 eth0
      |                    |  
         External Network
IPs
- server1 (storage01): eth0:10.1.1.70 eth1:192.168.0.2
- server2 (storage02): eth0:10.1.1.71 eth1:192.168.0.3

I can migrate the resource between the to node, but the problem is when i
make a hard power-off of server01 ( the server where is running the
resource) the second server (server02) doesn't make failover. (the resource
doesn't start) May you can take a look to my config file in the following

Thanks for your help

Thomas Schneider

Drbd.conf
------------------------------------------------------------------------
global {
        usage-count no;
}

common {
        syncer { rate 35M; }
}

resource nfs {
        protocol C;
        handlers {
                fence-peer "/usr/lib/drbd/crm-fence-peer.sh";
                after-resync-target "/usr/lib/drbd/crm-unfence-peer.sh";
        }
        startup {
                degr-wfc-timeout 120;    # 2 minutes.
        }
        disk {
                on-io-error   detach;
                fencing resource-only;
        }
        net {
                max-buffers     2048;
                max-epoch-size  2048;
                }
        syncer {
                rate 35M;
                al-extents 257;
        }
        on storage01.myriapulse.local {
                device     /dev/drbd0;
                disk       /dev/sda4;
                address    192.168.0.2:7788;
                meta-disk  /dev/sda3[0];
        }
        on storage02.myriapulse.local {
                device    /dev/drbd0;
                disk      /dev/sda4;
                address   192.168.0.3:7788;
                meta-disk /dev/sda3[0];
        }
}
----------------------------------------------------------------------------
----------

storage01:~# crm configure show
node storage01.myriapulse.local \
        attributes standby="off"
node storage02.myriapulse.local \
        attributes standby="off"
primitive drbd_nfs ocf:linbit:drbd \
        params drbd_resource="nfs" \
        op monitor interval="15s" \
        meta target-role="Started"
primitive fs_nfs ocf:heartbeat:Filesystem \
        params device="/dev/drbd/by-res/nfs" directory="/share"
fstype="ext3" \
        meta is-managed="true"
primitive ftp-server lsb:proftpd \
        op monitor interval="1min"
primitive ip_nfs ocf:heartbeat:IPaddr2 \
        params ip="10.1.1.69" nic="eth0"
primitive nfs-kernel-server lsb:nfs-kernel-server \
        op monitor interval="1min"
group nfs fs_nfs ip_nfs nfs-kernel-server \
        meta target-role="Started"
ms ms_drbd_nfs drbd_nfs \
        meta master-max="1" master-node-max="1" clone-max="2"
clone-node-max="1" notify="true"
location cli-standby-nfs nfs \
        rule $id="cli-standby-rule-nfs" -inf: #uname eq
storage02.myriapulse.local colocation ftp_on_nfs inf: ftp-server nfs
colocation nfs_on_drbd inf: nfs ms_drbd_nfs:Master order ftp_after_nfs inf:
nfs ftp-server order nfs_after_drbd inf: ms_drbd_nfs:promote nfs:start
property $id="cib-bootstrap-options" \
        dc-version="1.0.5-unknown" \
        cluster-infrastructure="openais" \
        expected-quorum-votes="2" \
        no-quorum-policy="ignore" \
        stonith-enabled="false"
storage01:~#
----------------------------------------------------------------------
Openais.conf

storage02:~# cat /etc/openais/openais.conf # Please read the openais.conf.5
manual page

totem {
        version: 2

        # How long before declaring a token lost (ms)
        token: 3000

        # How many token retransmits before forming a new configuration
        token_retransmits_before_loss_const: 10

        # How long to wait for join messages in the membership protocol (ms)
        join: 60

        # How long to wait for consensus to be achieved before starting a
new round of membership configuration (ms)
        consensus: 1500

        # Turn off the virtual synchrony filter
        vsftype: none

        # Number of messages that may be sent by one processor on receipt of
the token
        max_messages: 20

        # Limit generated nodeids to 31-bits (positive signed integers)
        clear_node_high_bit: yes

        # Disable encryption
        secauth: on

        # How many threads to use for encryption/decryption
        threads: 0

        # Optionally assign a fixed node id (integer)
        # nodeid: 1234

        # This specifies the mode of redundant ring, which may be none,
active, or passive.
        rrp_mode: passive

        interface {
                # The following values need to be set based on your
environment
                ringnumber: 0
                bindnetaddr: 10.1.1.0
                mcastaddr: 226.94.1.1
                mcastport: 5405
        }
        interface {
                # The following values need to be set based on your
environment
                ringnumber: 1
                bindnetaddr: 192.168.0.0
                mcastaddr: 226.94.2.1
                mcastport: 5405
        }
}

logging {
        to_stderr: yes
        debug: off
        timestamp: on
        to_file: /var/log/openais/openais.log
        to_syslog: yes
        syslog_facility: daemon
}

amf {
        mode: disabled
}

service {
        # Load the Pacemaker Cluster Resource Manager
        ver:       0
        name:      pacemaker
        use_mgmtd: yes
}

aisexec {
        user:   root
        group:  root
}

-------------------------------

Log

Oct 31 02:44:17 storage02 crm_attribute: [24179]: info: Invoked:crm_attribute -N storage02.myriapulse.local -n master-drbd_nfs:0 -Q -lreboot -v 10000 
Oct 31 02:44:32 storage02 crm_attribute: [24207]: info: Invoked:crm_attribute -N storage02.myriapulse.local -n master-drbd_nfs:0 -Q -lreboot -v 10000 
Oct 31 02:44:47 storage02 crm_attribute: [24234]: info: Invoked:crm_attribute -N storage02.myriapulse.local -n master-drbd_nfs:0 -Q -lreboot -v 10000 
Oct 31 02:45:01 storage02 /USR/SBIN/CRON[24237]: (root) CMD ([ -x/usr/lib/sysstat/sa1 ] && { [ -r "$DEFAULT" ] && . "$DEFAULT" ; [ "$ENABLED"= "true" ] && exec /usr/lib/sysstat/sa1 $SA1_OPTIONS 1 1 ; })
Oct 31 02:45:02 storage02 crm_attribute: [24264]: info: Invoked:crm_attribute -N storage02.myriapulse.local -n master-drbd_nfs:0 -Q -lreboot -v 10000 
Oct 31 02:45:17 storage02 crm_attribute: [24291]: info: Invoked:crm_attribute -N storage02.myriapulse.local -n master-drbd_nfs:0 -Q -lreboot -v 10000 
Oct 31 02:45:32 storage02 crm_attribute: [24318]: info: Invoked:crm_attribute -N storage02.myriapulse.local -n master-drbd_nfs:0 -Q -lreboot -v 10000 
Oct 31 02:45:47 storage02 crm_attribute: [24345]: info: Invoked:crm_attribute -N storage02.myriapulse.local -n master-drbd_nfs:0 -Q -lreboot -v 10000 
Oct 31 02:46:02 storage02 crm_attribute: [24372]: info: Invoked:crm_attribute -N storage02.myriapulse.local -n master-drbd_nfs:0 -Q -lreboot -v 10000 
Oct 31 02:46:17 storage02 crm_attribute: [24399]: info: Invoked:crm_attribute -N storage02.myriapulse.local -n master-drbd_nfs:0 -Q -lreboot -v 10000 
Oct 31 02:46:32 storage02 crm_attribute: [24426]: info: Invoked:crm_attribute -N storage02.myriapulse.local -n master-drbd_nfs:0 -Q -lreboot -v 10000 
Oct 31 02:46:47 storage02 crm_attribute: [24453]: info: Invoked:crm_attribute -N storage02.myriapulse.local -n master-drbd_nfs:0 -Q -lreboot -v 10000 
Oct 31 02:46:58 storage02 kernel: [419448.665572] e1000: eth1:e1000_watchdog: NIC Link is Down
Oct 31 02:47:01 storage02 openais[23815]: [TOTEM] The token was lost in theOPERATIONAL state.
Oct 31 02:47:01 storage02 openais[23815]: [TOTEM] Receive multicast socketrecv buffer size (262142 bytes).
Oct 31 02:47:01 storage02 openais[23815]: [TOTEM] Transmit multicast socketsend buffer size (262142 bytes).
Oct 31 02:47:01 storage02 openais[23815]: [TOTEM] Receive multicast socketrecv buffer size (262142 bytes).
Oct 31 02:47:01 storage02 openais[23815]: [TOTEM] Transmit multicast socketsend buffer size (262142 bytes).
Oct 31 02:47:01 storage02 openais[23815]: [TOTEM] entering GATHER state from2.
Oct 31 02:47:02 storage02 openais[23815]: [TOTEM] entering GATHER state from0.
Oct 31 02:47:02 storage02 openais[23815]: [TOTEM] Creating commit tokenbecause I am the rep.
Oct 31 02:47:02 storage02 openais[23815]: [TOTEM] Saving state aru b0 highseq received b0
Oct 31 02:47:02 storage02 openais[23815]: [TOTEM] Storing new sequence idfor ring 23c
Oct 31 02:47:02 storage02 openais[23815]: [TOTEM] entering COMMIT state.
Oct 31 02:47:02 storage02 openais[23815]: [TOTEM] position [0] member10.1.1.71:
Oct 31 02:47:02 storage02 openais[23815]: [TOTEM] previous ring seq 568 rep10.1.1.70
Oct 31 02:47:02 storage02 openais[23815]: [TOTEM] aru b0 high delivered b0received flag 1
Oct 31 02:47:02 storage02 openais[23815]: [TOTEM] Did not need to originateany messages in recovery.
Oct 31 02:47:02 storage02 openais[23815]: [TOTEM] Sending initial ORF token572: quorum lost
Oct 31 02:47:02 storage02 cib: [23821]: info: ais_dispatch: Membership 572:quorum still lost
Oct 31 02:47:02 storage02 crmd: [23825]: info: crm_update_peer: Nodestorage01.myriapulse.local: id=1174470922 state=lost (new) addr=r(0)ip(10.1.1.70) r(1) ip(192.168.0.2)  votes=1 born=544 seen=568proc=00000000000000000000000000053312
Oct 31 02:47:02 storage02 crmd: [23825]: WARN: check_dead_member: Our DCnode (storage01.myriapulse.local) left the cluster
Oct 31 02:47:02 storage02 cib: [23821]: info: crm_update_peer: Nodestorage01.myriapulse.local: id=1174470922 state=lost (new) addr=r(0)ip(10.1.1.70) r(1) ip(192.168.0.2)  votes=1 born=544 seen=568proc=00000000000000000000000000053312
Oct 31 02:47:02 storage02 crmd: [23825]: info: do_state_transition: Statetransition S_NOT_DC -> S_ELECTION [ input=I_ELECTION cause=C_FSA_INTERNALorigin=check_dead_member ]
Oct 31 02:47:02 storage02 crmd: [23825]: info: update_dc: Unset DCstorage01.myriapulse.local
Oct 31 02:47:02 storage02 crmd: [23825]: info: do_state_transition: Statetransition S_ELECTION -> S_INTEGRATION [ input=I_ELECTION_DCcause=C_FSA_INTERNAL origin=do_election_check ]
Oct 31 02:47:02 storage02 crmd: [23825]: info: do_te_control: Registering TEUUID: df06c9c7-f73a-4af0-962f-e96e6b3c8df6
Oct 31 02:47:02 storage02 crmd: [23825]: WARN:cib_client_add_notify_callback: Callback already present
Oct 31 02:47:02 storage02 crmd: [23825]: info: set_graph_functions: Settingcustom graph functions
Oct 31 02:47:02 storage02 crmd: [23825]: info: unpack_graph: Unpackedtransition -1: 0 actions in 0 synapses
Oct 31 02:47:02 storage02 crmd: [23825]: info: do_dc_takeover: Taking overDC status for this partition
Oct 31 02:47:02 storage02 cib: [23821]: info: cib_process_readwrite: We arenow in R/W mode
Oct 31 02:47:02 storage02 cib: [23821]: info: cib_process_request: Operationcomplete: op cib_master for section 'all' (origin=local/crmd/18,version=0.244.24): ok (rc=0)
Oct 31 02:47:02 storage02 cib: [23821]: info: cib_process_request: Operationcomplete: op cib_modify for section cib (origin=local/crmd/19,version=0.244.24): ok (rc=0)
Oct 31 02:47:02 storage02 cib: [23821]: info: cib_process_request: Operationcomplete: op cib_modify for section crm_config (origin=local/crmd/21,version=0.244.24): ok (rc=0)
Oct 31 02:47:02 storage02 openais[23815]: [CLM  ] CLM CONFIGURATION CHANGEoffers based on membership 572
Oct 31 02:47:02 storage02 openais[23815]: [CLM  ] New Configuration:complete: op cib_modify for section crm_config (origin=local/crmd/23,version=0.244.24): ok (rc=0)
Oct 31 02:47:02 storage02 crmd: [23825]: info: do_dc_join_offer_all: join-1:Waiting on 1 outstanding join acks
Oct 31 02:47:02 storage02 crmd: [23825]: info: ais_dispatch: Membership 572:quorum still lost
Oct 31 02:47:02 storage02 openais[23815]: [CLM  ] #011r(0) ip(10.1.1.71)r(1) ip(192.168.0.3) 
Oct 31 02:47:02 storage02 openais[23815]: [CLM  ] Members Left:Checking for expired actions every 900000ms
Oct 31 02:47:02 storage02 crmd: [23825]: info: update_dc: Set DC tostorage02.myriapulse.local (3.0.1)
Oct 31 02:47:02 storage02 crmd: [23825]: info: ais_dispatch: Membership 572:quorum still lost
Oct 31 02:47:02 storage02 openais[23815]: [CLM  ] #011r(0) ip(10.1.1.70)r(1) ip(192.168.0.2) 
Oct 31 02:47:02 storage02 openais[23815]: [CLM  ] Members Joined:Transitional membership event on ring 572: memb=1, new=0, lost=1
Oct 31 02:47:02 storage02 openais[23815]: [crm  ] info: pcmk_peer_update:memb: storage02.myriapulse.local 1191248138
Oct 31 02:47:02 storage02 openais[23815]: [crm  ] info: pcmk_peer_update:lost: storage01.myriapulse.local 1174470922
Oct 31 02:47:02 storage02 openais[23815]: [CLM  ] CLM CONFIGURATION CHANGE
Oct 31 02:47:02 storage02 openais[23815]: [CLM  ] #011r(0) ip(10.1.1.71)r(1) ip(192.168.0.3) 
Oct 31 02:47:02 storage02 openais[23815]: [CLM  ] Members Left:
Oct 31 02:47:02 storage02 openais[23815]: [crm  ] notice: pcmk_peer_update:Stable membership event on ring 572: memb=1, new=0, lost=0
Oct 31 02:47:02 storage02 openais[23815]: [crm  ] info: pcmk_peer_update:MEMB: storage02.myriapulse.local 1191248138
Oct 31 02:47:02 storage02 openais[23815]: [crm  ] info:ais_mark_unseen_peer_dead: Node storage01.myriapulse.local was not seen inthe previous transition
Oct 31 02:47:02 storage02 openais[23815]: [MAIN ] info: update_member: Node1174470922/storage01.myriapulse.local is now: lost
Oct 31 02:47:02 storage02 openais[23815]: [crm  ] info:send_member_notification: Sending membership update 572 to 2 children
Oct 31 02:47:02 storage02 openais[23815]: [SYNC ] This node is within theprimary component and will provide service.
Oct 31 02:47:02 storage02 openais[23815]: [TOTEM] entering OPERATIONALstate.
Oct 31 02:47:02 storage02 openais[23815]: [CLM  ] got nodejoin message10.1.1.71
Oct 31 02:47:02 storage02 cib: [23821]: info: cib_process_request: Operationcomplete: op cib_modify for section crm_config (origin=local/crmd/26,version=0.244.24): ok (rc=0)
Oct 31 02:47:02 storage02 crmd: [23825]: info: do_state_transition: Statetransition S_INTEGRATION -> S_FINALIZE_JOIN [ input=I_INTEGRATEDcause=C_FSA_INTERNAL origin=check_join_state ]
Oct 31 02:47:02 storage02 crmd: [23825]: info: do_state_transition: All 1cluster nodes responded to the join offer.
Oct 31 02:47:02 storage02 crmd: [23825]: info: do_dc_join_finalize: join-1:Syncing the CIB from storage02.myriapulse.local to the rest of the cluster
Oct 31 02:47:02 storage02 crmd: [23825]: info: te_connect_stonith:Attempting connection to fencing daemon...
Oct 31 02:47:02 storage02 cib: [23821]: info: cib_process_request: Operationcomplete: op cib_modify for section crm_config (origin=local/crmd/29,version=0.244.24): ok (rc=0)
Oct 31 02:47:02 storage02 cib: [23821]: info: cib_process_request: Operationcomplete: op cib_sync for section 'all' (origin=local/crmd/30,version=0.244.24): ok (rc=0)
Oct 31 02:47:03 storage02 crm_attribute: [24480]: info: Invoked:crm_attribute -N storage02.myriapulse.local -n master-drbd_nfs:0 -Q -lreboot -v 10000 
Oct 31 02:47:03 storage02 crmd: [23825]: info: te_connect_stonith: ConnectedUpdating node state to member for storage02.myriapulse.local
Oct 31 02:47:03 storage02 cib: [23821]: info: cib_process_request: Operationcomplete: op cib_modify for section nodes (origin=local/crmd/31,version=0.244.24): ok (rc=0)
Oct 31 02:47:03 storage02 cib: [23821]: info: cib_process_request: Operationcomplete: op cib_delete for section//node_state[@uname='storage02.myriapulse.local']/lrm (origin=local/crmd/32,version=0.244.25): ok (rc=0)
Oct 31 02:47:03 storage02 crmd: [23825]: info: erase_xpath_callback:Deletion of "//node_state[@uname='storage02.myriapulse.local']/lrm": ok(rc=0)
Oct 31 02:47:03 storage02 crmd: [23825]: info: do_state_transition: Statetransition S_FINALIZE_JOIN -> S_POLICY_ENGINE [ input=I_FINALIZEDcause=C_FSA_INTERNAL origin=check_join_state ]
Oct 31 02:47:03 storage02 crmd: [23825]: info: do_state_transition: All 1cluster nodes are eligible to run resources.
Oct 31 02:47:03 storage02 crmd: [23825]: info: do_dc_join_final: EnsuringDC, quorum and node attributes are up-to-date
Oct 31 02:47:03 storage02 crmd: [23825]: info: crm_update_quorum: Updatingquorum status to false (call=36)
Oct 31 02:47:03 storage02 crmd: [23825]: info: abort_transition_graph:do_te_invoke:190 - Triggered transition abort (complete=1) : Peer Cancelled
Oct 31 02:47:03 storage02 crmd: [23825]: info: do_pe_invoke: Query 37:Requesting the current CIB: S_POLICY_ENGINE
Oct 31 02:47:03 storage02 attrd: [23823]: info: attrd_local_callback:Sending full refresh (origin=crmd)
Oct 31 02:47:03 storage02 attrd: [23823]: info: attrd_trigger_update:Sending flush op to all hosts for: probe_complete (true)
Oct 31 02:47:03 storage02 cib: [23821]: info: cib_process_request: Operationcomplete: op cib_modify for section nodes (origin=local/crmd/34,version=0.244.26): ok (rc=0)
Oct 31 02:47:03 storage02 crmd: [23825]: WARN: match_down_event: No matchfor shutdown action on storage01.myriapulse.local
Oct 31 02:47:03 storage02 crmd: [23825]: info: te_update_diff:Stonith/shutdown of storage01.myriapulse.local not matched
Oct 31 02:47:03 storage02 crmd: [23825]: info: abort_transition_graph:te_update_diff:191 - Triggered transition abort (complete=1, tag=node_state,id=storage01.myriapulse.local, magic=NA, cib=0.244.27) : Node failure
Oct 31 02:47:03 storage02 crmd: [23825]: info: do_pe_invoke: Query 38:Requesting the current CIB: S_POLICY_ENGINE
Oct 31 02:47:03 storage02 cib: [23821]: info: log_data_element: cib:diff: -<cib have-quorum="1" dc-uuid="storage01.myriapulse.local" admin_epoch="0"epoch="244" num_updates="27" />
Oct 31 02:47:03 storage02 cib: [23821]: info: log_data_element: cib:diff: +<cib have-quorum="0" dc-uuid="storage02.myriapulse.local" admin_epoch="0"epoch="245" num_updates="1" />
Oct 31 02:47:03 storage02 cib: [23821]: info: cib_process_request: Operationcomplete: op cib_modify for section cib (origin=local/crmd/36,version=0.245.1): ok (rc=0)
Oct 31 02:47:03 storage02 crmd: [23825]: info: abort_transition_graph:need_abort:59 - Triggered transition abort (complete=1) : Non-status change
Oct 31 02:47:03 storage02 crmd: [23825]: info: need_abort: Aborting onchange to have-quorum
Oct 31 02:47:03 storage02 crmd: [23825]: info: do_pe_invoke: Query 39:Requesting the current CIB: S_POLICY_ENGINE
Oct 31 02:47:03 storage02 crmd: [23825]: info: do_pe_invoke_callback:Invoking the PE: ref=pe_calc-dc-1256953623-10, seq=572, quorate=0
Oct 31 02:47:03 storage02 pengine: [23824]: notice: unpack_config: On lossof CCM Quorum: Ignore
Oct 31 02:47:03 storage02 pengine: [23824]: info: unpack_config: Nodescores: 'red' = -INFINITY, 'yellow' = 0, 'green' = 0
Oct 31 02:47:03 storage02 pengine: [23824]: info: determine_online_status:Node storage02.myriapulse.local is online
Oct 31 02:47:03 storage02 pengine: [23824]: info: unpack_rsc_op:ftp-server_monitor_0 on storage02.myriapulse.local returned 0 (ok) insteadof the expected value: 7 (not running)
Oct 31 02:47:03 storage02 pengine: [23824]: notice: unpack_rsc_op: Operationftp-server_monitor_0 found resource ftp-server active onstorage02.myriapulse.local
Oct 31 02:47:03 storage02 pengine: [23824]: notice: group_print: ResourceGroup: nfs
Oct 31 02:47:03 storage02 pengine: [23824]: notice: native_print:fs_nfs#011(ocf::heartbeat:Filesystem):#011Stopped 
Oct 31 02:47:03 storage02 pengine: [23824]: notice: native_print:ip_nfs#011(ocf::heartbeat:IPaddr2):#011Stopped 
Oct 31 02:47:03 storage02 pengine: [23824]: notice: native_print:nfs-kernel-server#011(lsb:nfs-kernel-server):#011Stopped 
Oct 31 02:47:03 storage02 pengine: [23824]: notice: clone_print:Master/Slave Set: ms_drbd_nfs
Oct 31 02:47:03 storage02 pengine: [23824]: notice: print_list: #011Slaves:[ storage02.myriapulse.local ]
Oct 31 02:47:03 storage02 attrd: [23823]: info: attrd_trigger_update:Sending flush op to all hosts for: master-drbd_nfs:0 (10000)
Oct 31 02:47:03 storage02 pengine: [23824]: notice: print_list: #011Stopped:[ drbd_nfs:1 ]
Oct 31 02:47:03 storage02 pengine: [23824]: notice: native_print:ftp-server#011(lsb:proftpd):#011Stopped 
Oct 31 02:47:03 storage02 pengine: [23824]: info: native_merge_weights:ms_drbd_nfs: Rolling back scores from fs_nfs
Oct 31 02:47:03 storage02 pengine: [23824]: WARN: native_color: Resourcedrbd_nfs:1 cannot run anywhere
Oct 31 02:47:03 storage02 pengine: [23824]: info: native_merge_weights:ms_drbd_nfs: Rolling back scores from fs_nfs
Oct 31 02:47:03 storage02 pengine: [23824]: info: master_color: Promotingdrbd_nfs:0 (Slave storage02.myriapulse.local)
Oct 31 02:47:03 storage02 pengine: [23824]: info: master_color: ms_drbd_nfs:Promoted 1 instances of a possible 1 to master
Oct 31 02:47:03 storage02 pengine: [23824]: info: native_merge_weights:fs_nfs: Rolling back scores from ip_nfs
Oct 31 02:47:03 storage02 pengine: [23824]: info: native_merge_weights:fs_nfs: Rolling back scores from ftp-server
Oct 31 02:47:03 storage02 pengine: [23824]: WARN: native_color: Resourcefs_nfs cannot run anywhere
Oct 31 02:47:03 storage02 pengine: [23824]: info: native_merge_weights:ip_nfs: Rolling back scores from nfs-kernel-server
Oct 31 02:47:03 storage02 pengine: [23824]: WARN: native_color: Resourceip_nfs cannot run anywhere
Oct 31 02:47:03 storage02 pengine: [23824]: WARN: native_color: Resourcenfs-kernel-server cannot run anywhere
Oct 31 02:47:03 storage02 pengine: [23824]: info: master_color: ms_drbd_nfs:Promoted 1 instances of a possible 1 to master
Oct 31 02:47:03 storage02 pengine: [23824]: WARN: native_color: Resourceftp-server cannot run anywhere
Oct 31 02:47:03 storage02 pengine: [23824]: info: RecurringOp: Cancellingaction drbd_nfs:0_monitor_15000 (Slave vs. Master)
Oct 31 02:47:03 storage02 pengine: [23824]: info: RecurringOp: Cancellingaction drbd_nfs:0_monitor_15000 (Slave vs. Master)
Oct 31 02:47:03 storage02 pengine: [23824]: notice: LogActions: Leaveresource fs_nfs#011(Stopped)
Oct 31 02:47:03 storage02 attrd: [23823]: info: attrd_trigger_update:Sending flush op to all hosts for: terminate (<null>)
Oct 31 02:47:03 storage02 pengine: [23824]: notice: LogActions: Leaveresource ip_nfs#011(Stopped)
Oct 31 02:47:03 storage02 pengine: [23824]: notice: LogActions: Leaveresource nfs-kernel-server#011(Stopped)
Oct 31 02:47:03 storage02 pengine: [23824]: notice: LogActions: Promotedrbd_nfs:0#011(Slave -> Master storage02.myriapulse.local)
Oct 31 02:47:03 storage02 pengine: [23824]: notice: LogActions: Leaveresource drbd_nfs:1#011(Stopped)
Oct 31 02:47:03 storage02 pengine: [23824]: notice: LogActions: Leaveresource ftp-server#011(Stopped)
Oct 31 02:47:03 storage02 crmd: [23825]: info: do_state_transition: Statetransition S_POLICY_ENGINE -> S_TRANSITION_ENGINE [ input=I_PE_SUCCESScause=C_IPC_MESSAGE origin=handle_response ]
Oct 31 02:47:03 storage02 cib: [24481]: info: write_cib_contents: Archivedprevious version as /var/lib/heartbeat/crm/cib-64.raw
Oct 31 02:47:03 storage02 attrd: [23823]: info: attrd_trigger_update:Sending flush op to all hosts for: master-drbd_nfs:1 (<null>)
Oct 31 02:47:03 storage02 crmd: [23825]: info: unpack_graph: Unpackedtransition 0: 10 actions in 10 synapses
Oct 31 02:47:03 storage02 crmd: [23825]: info: do_te_invoke: Processinggraph 0 (ref=pe_calc-dc-1256953623-10) derived from/var/lib/pengine/pe-warn-258.bz2
Oct 31 02:47:03 storage02 crmd: [23825]: info: te_rsc_command: Initiatingaction 1: cancel drbd_nfs:0_monitor_15000 on storage02.myriapulse.local(local)
Oct 31 02:47:03 storage02 attrd: [23823]: info: attrd_trigger_update:Sending flush op to all hosts for: pingdnet1 (<null>)
Oct 31 02:47:03 storage02 attrd: [23823]: info: attrd_trigger_update:Sending flush op to all hosts for: shutdown (<null>)
Oct 31 02:47:03 storage02 crmd: [23825]: info: send_direct_ack: ACK'ingresource op drbd_nfs:0_monitor_15000 from1:0:0:df06c9c7-f73a-4af0-962f-e96e6b3c8df6: lrm_invoke-lrmd-1256953623-12
Oct 31 02:47:03 storage02 crmd: [23825]: info: process_te_message:Processing (N)ACK lrm_invoke-lrmd-1256953623-12 fromstorage02.myriapulse.local
Oct 31 02:47:03 storage02 crmd: [23825]: info: match_graph_event: Actiondrbd_nfs:0_monitor_15000 (1) confirmed on storage02.myriapulse.local (rc=0)
Oct 31 02:47:03 storage02 crmd: [23825]: info: te_pseudo_action: Pseudoaction 27 fired and confirmed
Oct 31 02:47:03 storage02 crmd: [23825]: info: process_lrm_event: LRMoperation drbd_nfs:0_monitor_15000 (call=10, rc=-2, cib-update=0,confirmed=true) Cancelled unknown exec error
Oct 31 02:47:03 storage02 cib: [24481]: info: write_cib_contents: Wroteversion 0.245.0 of the CIB to disk (digest:d1cd4b3ddc5831e6b702492a19e5311f)
Oct 31 02:47:03 storage02 pengine: [23824]: WARN: process_pe_message:Transition 0: WARNINGs found during PE processing. PEngine Input stored in:/var/lib/pengine/pe-warn-258.bz2
Oct 31 02:47:03 storage02 crmd: [23825]: info: te_rsc_command: Initiatingaction 54: notify drbd_nfs:0_pre_notify_promote_0 onstorage02.myriapulse.local (local)
Oct 31 02:47:03 storage02 pengine: [23824]: info: process_pe_message:Configuration WARNINGs found during PE processing.  Please run "crm_verify-L" to identify issues.
Oct 31 02:47:03 storage02 crmd: [23825]: info: do_lrm_rsc_op: Performingkey=54:0:0:df06c9c7-f73a-4af0-962f-e96e6b3c8df6 op=drbd_nfs:0_notify_0 )
Oct 31 02:47:03 storage02 cib: [24481]: info: retrieveCib: Reading clusterconfiguration from: /var/lib/heartbeat/crm/cib.HQ5vBU (digest:/var/lib/heartbeat/crm/cib.Pn6wgG)
Oct 31 02:47:03 storage02 lrmd: [23822]: info: rsc:drbd_nfs:0:11: notifyoperation drbd_nfs:0_notify_0 (call=11, rc=0, cib-update=41, confirmed=true)complete ok
Oct 31 02:47:03 storage02 crmd: [23825]: info: match_graph_event: Actiondrbd_nfs:0_pre_notify_promote_0 (54) confirmed on storage02.myriapulse.local(rc=0)
Oct 31 02:47:03 storage02 crmd: [23825]: info: te_pseudo_action: Pseudoaction 28 fired and confirmed
Oct 31 02:47:03 storage02 crmd: [23825]: info: te_pseudo_action: Pseudoaction 25 fired and confirmed
Oct 31 02:47:03 storage02 crmd: [23825]: info: te_rsc_command: Initiatingaction 12: promote drbd_nfs:0_promote_0 on storage02.myriapulse.local(local)
Oct 31 02:47:03 storage02 crmd: [23825]: info: do_lrm_rsc_op: Performingkey=12:0:0:df06c9c7-f73a-4af0-962f-e96e6b3c8df6 op=drbd_nfs:0_promote_0 )
Oct 31 02:47:03 storage02 lrmd: [23822]: info: rsc:drbd_nfs:0:12: promotenot arrive in time.
Oct 31 02:47:04 storage02 kernel: [419455.436376] block drbd0: peer( Primary-> Unknown ) conn( Connected -> NetworkFailure ) pdsk( UpToDate -> DUnknown) 
Oct 31 02:47:04 storage02 kernel: [419455.436390] block drbd0: asenderterminated
Oct 31 02:47:04 storage02 kernel: [419455.436392] block drbd0: Terminatingasender thread
Oct 31 02:47:04 storage02 kernel: [419455.436319] block drbd0: short readexpecting header on sock: r=-512
Oct 31 02:47:04 storage02 kernel: [419455.504319] block drbd0: Connectionclosed
Oct 31 02:47:04 storage02 kernel: [419455.504319] block drbd0: conn(NetworkFailure -> Unconnected ) 
Oct 31 02:47:04 storage02 kernel: [419455.504319] block drbd0: receiverterminated
Oct 31 02:47:04 storage02 kernel: [419455.504319] block drbd0: Restartingreceiver thread
Oct 31 02:47:04 storage02 kernel: [419455.504319] block drbd0: receiver(re)started
Oct 31 02:47:04 storage02 kernel: [419455.536327] block drbd0: helpercommand: /sbin/drbdadm fence-peer minor-0
Oct 31 02:47:05 storage02 crm-fence-peer.sh[24533]: invoked for nfs
Oct 31 02:47:05 storage02 cibadmin: [24546]: info: Invoked: cibadmin -Q -t 1
Oct 31 02:47:06 storage02 cibadmin: [24558]: info: Invoked: cibadmin -Q -t 1
Oct 31 02:47:06 storage02 cibadmin: [24570]: info: Invoked: cibadmin -C -oconstraints -X <rsc_location rsc="ms_drbd_nfs"id="drbd-fence-by-handler-ms_drbd_nfs">#012  <rule role="Master"score="-INFINITY" id="drbd-fence-by-handler-rule-ms_drbd_nfs">#012<expression attribute="#uname" operation="ne"value="storage02.myriapulse.local"id="drbd-fence-by-handler-expr-ms_drbd_nfs"/>#012</rule>#012</rsc_location> 
Oct 31 02:47:06 storage02 cib: [23821]: info: log_data_element: cib:diff: -<cib admin_epoch="0" epoch="245" num_updates="3" />
Oct 31 02:47:06 storage02 cib: [23821]: info: log_data_element: cib:diff: +<cib admin_epoch="0" epoch="246" num_updates="1" >
Oct 31 02:47:06 storage02 cib: [23821]: info: log_data_element: cib:diff: +<configuration >
Oct 31 02:47:06 storage02 cib: [23821]: info: log_data_element: cib:diff: +<constraints >
Oct 31 02:47:06 storage02 cib: [23821]: info: log_data_element: cib:diff: +<rsc_location rsc="ms_drbd_nfs" id="drbd-fence-by-handler-ms_drbd_nfs"__crm_diff_marker__="added:top" >
Oct 31 02:47:06 storage02 cib: [23821]: info: log_data_element: cib:diff: +<rule role="Master" score="-INFINITY"id="drbd-fence-by-handler-rule-ms_drbd_nfs" >
Oct 31 02:47:06 storage02 cib: [23821]: info: log_data_element: cib:diff: +<expression attribute="#uname" operation="ne"value="storage02.myriapulse.local"id="drbd-fence-by-handler-expr-ms_drbd_nfs" />
Oct 31 02:47:06 storage02 cib: [23821]: info: log_data_element: cib:diff: +</rule>
Oct 31 02:47:06 storage02 cib: [23821]: info: log_data_element: cib:diff: +</rsc_location>
Oct 31 02:47:06 storage02 cib: [23821]: info: log_data_element: cib:diff: +</constraints>
Oct 31 02:47:06 storage02 cib: [23821]: info: log_data_element: cib:diff: +</configuration>
Oct 31 02:47:06 storage02 cib: [23821]: info: log_data_element: cib:diff: +</cib>
Oct 31 02:47:06 storage02 crmd: [23825]: info: abort_transition_graph:need_abort:59 - Triggered transition abort (complete=0) : Non-status change
Oct 31 02:47:06 storage02 cib: [23821]: info: cib_process_request: Operationcomplete: op cib_create for section constraints (origin=local/cibadmin/2,version=0.246.1): ok (rc=0)
Oct 31 02:47:06 storage02 crmd: [23825]: info: update_abort_priority: Abortpriority upgraded from 0 to 1000000
Oct 31 02:47:06 storage02 crmd: [23825]: info: update_abort_priority: Abortaction done superceeded by restart
Oct 31 02:47:06 storage02 crmd: [23825]: info: need_abort: Aborting onchange to admin_epoch
Oct 31 02:47:06 storage02 kernel: [419458.059824] block drbd0: helpercommand: /sbin/drbdadm fence-peer minor-0 exit code 4 (0x400)
Oct 31 02:47:06 storage02 kernel: [419458.059824] block drbd0: fence-peerhelper returned 4 (peer was fenced)
Oct 31 02:47:06 storage02 lrmd: [23822]: info: RA output:(drbd_nfs:0:promote:stderr) 0: State change failed: (-7) Refusing to bePrimary while peer is not outdated
Oct 31 02:47:06 storage02 lrmd: [23822]: info: RA output:(drbd_nfs:0:promote:stderr) Command 'drbdsetup 0 primary' terminated withexit code 11
Oct 31 02:47:06 storage02 kernel: [419458.059824] block drbd0: conn(Unconnected -> WFConnection ) 
Oct 31 02:47:06 storage02 drbd[24504]: ERROR: nfs: Called drbdadm -c/etc/drbd.conf primary nfs
Oct 31 02:47:06 storage02 cib: [24571]: info: write_cib_contents: Archivedprevious version as /var/lib/heartbeat/crm/cib-65.raw
Oct 31 02:47:06 storage02 lrmd: [23822]: info: RA output:(drbd_nfs:0:promote:stderr) 2009/10/31_02:47:06 ERROR: nfs: Called drbdadm-c /etc/drbd.conf primary nfs
Oct 31 02:47:06 storage02 cib: [24571]: info: write_cib_contents: Wroteversion 0.246.0 of the CIB to disk (digest:f4ae8df353f75d942fe9f71d498328ad)
Oct 31 02:47:06 storage02 cib: [24571]: info: retrieveCib: Reading clusterconfiguration from: /var/lib/heartbeat/crm/cib.WYi97O (digest:/var/lib/heartbeat/crm/cib.0KwMXG)
Oct 31 02:47:06 storage02 drbd[24504]: ERROR: nfs: Exit code 11(drbd_nfs:0:promote:stderr) 2009/10/31_02:47:06 ERROR: nfs: Exit code 11
Oct 31 02:47:06 storage02 drbd[24504]: ERROR: nfs: Command output: (drbd_nfs:0:promote:stderr) 2009/10/31_02:47:06 ERROR: nfs: Command output: 
Oct 31 02:47:06 storage02 lrmd: [23822]: info: RA output:(drbd_nfs:0:promote:stdout) 
Oct 31 02:47:06 storage02 kernel: [419458.084631] block drbd0: helpercommand: /sbin/drbdadm fence-peer minor-0
Oct 31 02:47:07 storage02 crm-fence-peer.sh[24594]: invoked for nfs
Oct 31 02:47:07 storage02 kernel: [419459.283186] block drbd0: helpercommand: /sbin/drbdadm fence-peer minor-0 exit code 4 (0x400)
Oct 31 02:47:07 storage02 kernel: [419459.283186] block drbd0: fence-peerhelper returned 4 (peer was fenced)
Oct 31 02:47:07 storage02 kernel: [419459.283186] block drbd0: role(Secondary -> Primary ) pdsk( DUnknown -> Outdated ) 
Oct 31 02:47:07 storage02 kernel: [419459.343187] block drbd0: Creating newcurrent UUID
Oct 31 02:47:07 storage02 lrmd: [23822]: info: RA output:(drbd_nfs:0:promote:stdout) 
Oct 31 02:47:08 storage02 crmd: [23825]: info: process_lrm_event: LRMoperation drbd_nfs:0_promote_0 (call=12, rc=0, cib-update=42,confirmed=true) complete ok
Oct 31 02:47:08 storage02 crmd: [23825]: info: match_graph_event: Actiondrbd_nfs:0_promote_0 (12) confirmed on storage02.myriapulse.local (rc=0)
Oct 31 02:47:08 storage02 crmd: [23825]: info: te_pseudo_action: Pseudoaction 26 fired and confirmed
Oct 31 02:47:08 storage02 crmd: [23825]: info: te_pseudo_action: Pseudoaction 29 fired and confirmed
Oct 31 02:47:08 storage02 crmd: [23825]: info: te_rsc_command: Initiatingaction 55: notify drbd_nfs:0_post_notify_promote_0 onstorage02.myriapulse.local (local)
Oct 31 02:47:08 storage02 crmd: [23825]: info: do_lrm_rsc_op: Performingkey=55:0:0:df06c9c7-f73a-4af0-962f-e96e6b3c8df6 op=drbd_nfs:0_notify_0 )
Oct 31 02:47:08 storage02 lrmd: [23822]: info: rsc:drbd_nfs:0:13: notifycrm_attribute -N storage02.myriapulse.local -n master-drbd_nfs:0 -Q -lreboot -v 10000 
Oct 31 02:47:08 storage02 lrmd: [23822]: info: RA output:(drbd_nfs:0:notify:stdout) 
Oct 31 02:47:08 storage02 crmd: [23825]: info: process_lrm_event: LRMoperation drbd_nfs:0_notify_0 (call=13, rc=0, cib-update=43, confirmed=true)complete ok
Oct 31 02:47:08 storage02 crmd: [23825]: info: match_graph_event: Actiondrbd_nfs:0_post_notify_promote_0 (55) confirmed onstorage02.myriapulse.local (rc=0)
Oct 31 02:47:08 storage02 crmd: [23825]: info: te_pseudo_action: Pseudoaction 30 fired and confirmed
Oct 31 02:47:08 storage02 crmd: [23825]: info: run_graph:====================================================
Oct 31 02:47:08 storage02 crmd: [23825]: notice: run_graph: Transition 0(Complete=10, Pending=0, Fired=0, Skipped=0, Incomplete=0,Source=/var/lib/pengine/pe-warn-258.bz2): Complete
Oct 31 02:47:08 storage02 crmd: [23825]: info: te_graph_trigger: Transition0 is now complete
Oct 31 02:47:08 storage02 crmd: [23825]: info: do_state_transition: Statetransition S_TRANSITION_ENGINE -> S_POLICY_ENGINE [ input=I_PE_CALCcause=C_FSA_INTERNAL origin=notify_crmd ]
Oct 31 02:47:08 storage02 crmd: [23825]: info: do_state_transition: All 1cluster nodes are eligible to run resources.
Oct 31 02:47:08 storage02 crmd: [23825]: info: do_pe_invoke: Query 44:Requesting the current CIB: S_POLICY_ENGINE
Oct 31 02:47:08 storage02 crmd: [23825]: info: do_pe_invoke_callback:Invoking the PE: ref=pe_calc-dc-1256953628-16, seq=572, quorate=0
Oct 31 02:47:08 storage02 pengine: [23824]: notice: unpack_config: On lossof CCM Quorum: Ignore
Oct 31 02:47:08 storage02 pengine: [23824]: info: unpack_config: Nodescores: 'red' = -INFINITY, 'yellow' = 0, 'green' = 0
Oct 31 02:47:08 storage02 pengine: [23824]: info: determine_online_status:Node storage02.myriapulse.local is online
Oct 31 02:47:08 storage02 pengine: [23824]: info: unpack_rsc_op:ftp-server_monitor_0 on storage02.myriapulse.local returned 0 (ok) insteadof the expected value: 7 (not running)
Oct 31 02:47:08 storage02 pengine: [23824]: notice: unpack_rsc_op: Operationftp-server_monitor_0 found resource ftp-server active onstorage02.myriapulse.local
Oct 31 02:47:08 storage02 pengine: [23824]: notice: group_print: ResourceGroup: nfs
Oct 31 02:47:08 storage02 pengine: [23824]: notice: native_print:fs_nfs#011(ocf::heartbeat:Filesystem):#011Stopped 
Oct 31 02:47:08 storage02 pengine: [23824]: notice: native_print:ip_nfs#011(ocf::heartbeat:IPaddr2):#011Stopped 
Oct 31 02:47:08 storage02 pengine: [23824]: notice: native_print:nfs-kernel-server#011(lsb:nfs-kernel-server):#011Stopped 
Oct 31 02:47:08 storage02 pengine: [23824]: notice: clone_print:Master/Slave Set: ms_drbd_nfs
Oct 31 02:47:08 storage02 pengine: [23824]: notice: print_list: #011Masters:[ storage02.myriapulse.local ]
Oct 31 02:47:08 storage02 pengine: [23824]: notice: print_list: #011Stopped:[ drbd_nfs:1 ]
Oct 31 02:47:08 storage02 pengine: [23824]: notice: native_print:ftp-server#011(lsb:proftpd):#011Stopped 
Oct 31 02:47:08 storage02 pengine: [23824]: info: native_merge_weights:ms_drbd_nfs: Rolling back scores from fs_nfs
Oct 31 02:47:08 storage02 pengine: [23824]: WARN: native_color: Resourcedrbd_nfs:1 cannot run anywhere
Oct 31 02:47:08 storage02 pengine: [23824]: info: native_merge_weights:ms_drbd_nfs: Rolling back scores from fs_nfs
Oct 31 02:47:08 storage02 pengine: [23824]: info: master_color: Promotingdrbd_nfs:0 (Master storage02.myriapulse.local)
Oct 31 02:47:08 storage02 pengine: [23824]: info: master_color: ms_drbd_nfs:Promoted 1 instances of a possible 1 to master
Oct 31 02:47:08 storage02 pengine: [23824]: info: native_merge_weights:fs_nfs: Rolling back scores from ip_nfs
Oct 31 02:47:08 storage02 pengine: [23824]: info: native_merge_weights:fs_nfs: Rolling back scores from ftp-server
Oct 31 02:47:08 storage02 pengine: [23824]: WARN: native_color: Resourcefs_nfs cannot run anywhere
Oct 31 02:47:08 storage02 pengine: [23824]: info: native_merge_weights:ip_nfs: Rolling back scores from nfs-kernel-server
Oct 31 02:47:08 storage02 pengine: [23824]: WARN: native_color: Resourceip_nfs cannot run anywhere
Oct 31 02:47:08 storage02 pengine: [23824]: WARN: native_color: Resourcenfs-kernel-server cannot run anywhere
Oct 31 02:47:08 storage02 pengine: [23824]: info: master_color: ms_drbd_nfs:Promoted 1 instances of a possible 1 to master
Oct 31 02:47:08 storage02 pengine: [23824]: WARN: native_color: Resourceftp-server cannot run anywhere
Oct 31 02:47:08 storage02 pengine: [23824]: notice: LogActions: Leaveresource fs_nfs#011(Stopped)
Oct 31 02:47:08 storage02 pengine: [23824]: notice: LogActions: Leaveresource ip_nfs#011(Stopped)
Oct 31 02:47:08 storage02 pengine: [23824]: notice: LogActions: Leaveresource nfs-kernel-server#011(Stopped)
Oct 31 02:47:08 storage02 pengine: [23824]: notice: LogActions: Leaveresource drbd_nfs:0#011(Master storage02.myriapulse.local)
Oct 31 02:47:08 storage02 pengine: [23824]: notice: LogActions: Leaveresource drbd_nfs:1#011(Stopped)
Oct 31 02:47:08 storage02 pengine: [23824]: notice: LogActions: Leaveresource ftp-server#011(Stopped)
Oct 31 02:47:08 storage02 crmd: [23825]: info: do_state_transition: Statetransition S_POLICY_ENGINE -> S_TRANSITION_ENGINE [ input=I_PE_SUCCESScause=C_IPC_MESSAGE origin=handle_response ]
Oct 31 02:47:08 storage02 crmd: [23825]: WARN: destroy_action: Cancellingtimer for action 1 (src=44)
Oct 31 02:47:08 storage02 crmd: [23825]: info: unpack_graph: Unpackedtransition 1: 0 actions in 0 synapses
Oct 31 02:47:08 storage02 crmd: [23825]: info: do_te_invoke: Processinggraph 1 (ref=pe_calc-dc-1256953628-16) derived from/var/lib/pengine/pe-warn-259.bz2
Oct 31 02:47:08 storage02 crmd: [23825]: info: run_graph:====================================================
Oct 31 02:47:08 storage02 crmd: [23825]: notice: run_graph: Transition 1(Complete=0, Pending=0, Fired=0, Skipped=0, Incomplete=0,Source=/var/lib/pengine/pe-warn-259.bz2): Complete
Oct 31 02:47:08 storage02 crmd: [23825]: info: te_graph_trigger: Transition1 is now complete
Oct 31 02:47:08 storage02 crmd: [23825]: info: notify_crmd: Transition 1status: done - <null>
Oct 31 02:47:08 storage02 crmd: [23825]: info: do_state_transition: Statetransition S_TRANSITION_ENGINE -> S_IDLE [ input=I_TE_SUCCESScause=C_FSA_INTERNAL origin=notify_crmd ]
Oct 31 02:47:08 storage02 crmd: [23825]: info: do_state_transition: StartingPEngine Recheck Timer
Oct 31 02:47:08 storage02 pengine: [23824]: WARN: process_pe_message:Transition 1: WARNINGs found during PE processing. PEngine Input stored in:/var/lib/pengine/pe-warn-259.bz2
Oct 31 02:47:08 storage02 pengine: [23824]: info: process_pe_message: Configuration WARNINGs found during PE processing. Please run "crm_verify -L" to identify issues.

Thomas Schneider
Directeur des Opérations
Euskill SARL
Web: www.euskill.com
Mobile: +33 (0)6 19 26 47 76
Mail: thomas.schneider at euskill.com
5 rue de Phalsbourg
F-67000 Strasbourg