Note: "permalinks" may not be as permanent as we would like,
direct links of old sources may well be a few messages off.
Hello all,
Eventually I managed to get a log during DRBD crash.
I have a two nodes RHEL5.3 cluster with 2.6.18-164.el5xen and
drbd-8.3.1-3 self compiled.
Both nodes have a dedicated 1G ethernet back to back connection over
RTL8169sb/8110sb cards.
When I run applications, that constantly read or write to the disks
(active/active config), drbd kept on crashing.
Now I have the logs for the reason of that:
______________________
ON TWEETY1
Oct 20 15:46:52 localhost kernel: drbd2: Digest integrity check FAILED.
Oct 20 15:46:52 localhost kernel: drbd2: Digest integrity check FAILED.
Oct 20 15:46:52 localhost kernel: drbd2: error receiving Data, l: 540!
Oct 20 15:46:52 localhost kernel: drbd2: error receiving Data, l: 540!
Oct 20 15:46:52 localhost kernel: drbd2: peer( Primary -> Unknown )
conn( Connected -> ProtocolError ) pdsk( UpToDate -> DUnknown ) susp( 0
-> 1 )
Oct 20 15:46:52 localhost kernel: drbd2: peer( Primary -> Unknown )
conn( Connected -> ProtocolError ) pdsk( UpToDate -> DUnknown ) susp( 0
-> 1 )
Oct 20 15:46:52 localhost kernel: drbd2: asender terminated
Oct 20 15:46:52 localhost kernel: drbd2: asender terminated
Oct 20 15:46:52 localhost kernel: drbd2: Terminating asender thread
Oct 20 15:46:52 localhost kernel: drbd2: Terminating asender thread
Oct 20 15:46:52 localhost kernel: drbd2: Creating new current UUID
Oct 20 15:46:52 localhost kernel: drbd2: Creating new current UUID
Oct 20 15:46:52 localhost clurgmgrd: [4161]: <info>
Executing /etc/init.d/drbd status
Oct 20 15:46:52 localhost clurgmgrd: [4161]: <info>
Executing /etc/init.d/drbd status
Oct 20 15:46:52 localhost kernel: drbd2: Connection closed
Oct 20 15:46:52 localhost kernel: drbd2: Connection closed
___________________________
ON TWEETY2
Oct 20 15:46:52 localhost kernel: drbd2: sock was reset by peer
Oct 20 15:46:52 localhost kernel: drbd2: peer( Primary -> Unknown )
conn( Connected -> BrokenPipe ) pdsk( UpToDate -> DUnknown ) susp( 0 ->
1 )
Oct 20 15:46:52 localhost kernel: drbd2: short read expecting header on
sock: r=-104
Oct 20 15:46:52 localhost kernel: drbd2: meta connection shut down by
peer.
Oct 20 15:46:52 localhost kernel: drbd2: asender terminated
Oct 20 15:46:52 localhost kernel: drbd2: Terminating asender thread
Oct 20 15:46:52 localhost kernel: drbd2: Creating new current UUID
Oct 20 15:46:52 localhost kernel: drbd2: Connection closed
Oct 20 15:46:52 localhost kernel: drbd2: helper command: /sbin/drbdadm
fence-peer minor-2
____________________
DRBD.CONF
#
# drbd.conf
#
global {
usage-count yes;
}
common {
protocol C;
syncer {
rate 100M;
al-extents 257;
}
handlers {
pri-on-incon-degr "echo b > /proc/sysrq-trigger ; reboot -f";
pri-lost-after-sb "echo b > /proc/sysrq-trigger ; reboot -f";
local-io-error "echo o > /proc/sysrq-trigger ; halt -f";
outdate-peer "/sbin/obliterate";
pri-lost "echo pri-lost. Have a look at the log files. | mail -s
'DRBD Alert' root; echo b > /proc/sysrq-trigger ; reboot -f";
split-brain "echo split-brain. drbdadm -- --discard-my-data connect
$DRBD_RESOURCE ? | mail -s 'DRBD Alert' root";
}
startup {
wfc-timeout 60;
degr-wfc-timeout 60; # 1 minutes.
become-primary-on both;
}
disk {
fencing resource-and-stonith;
}
net {
sndbuf-size 512k;
timeout 60; # 6 seconds (unit = 0.1 seconds)
connect-int 10; # 10 seconds (unit = 1 second)
ping-int 10; # 10 seconds (unit = 1 second)
ping-timeout 50; # 500 ms (unit = 0.1 seconds)
max-buffers 2048;
max-epoch-size 2048;
ko-count 10;
allow-two-primaries;
cram-hmac-alg "sha1";
shared-secret "*****";
after-sb-0pri discard-least-changes;
after-sb-1pri violently-as0p;
after-sb-2pri violently-as0p;
rr-conflict call-pri-lost;
data-integrity-alg "crc32c";
}
}
resource r0 {
device /dev/drbd0;
disk /dev/hda4;
meta-disk internal;
on tweety-1 { address 10.254.254.253:7788; }
on tweety-2 { address 10.254.254.254:7788; }
}
resource r1 {
device /dev/drbd1;
disk /dev/hdb4;
meta-disk internal;
on tweety-1 { address 10.254.254.253:7789; }
on tweety-2 { address 10.254.254.254:7789; }
}
resource r2 {
device /dev/drbd2;
disk /dev/sda1;
meta-disk internal;
on tweety-1 { address 10.254.254.253:7790; }
on tweety-2 { address 10.254.254.254:7790; }
}
_________
Also available in http://pastebin.ca/1633173
How can I solve this?
Thank you All for your time.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.linbit.com/pipermail/drbd-user/attachments/20091020/dec50695/attachment.htm>