Note: "permalinks" may not be as permanent as we would like,
direct links of old sources may well be a few messages off.
Hello,
I'm new to linux and drbd.
We have setup primary and secondary machines. Our primary machine
works fine. After I reboot my secondary machine, try to start the
drbd, I've got the following error message.
root at suukyi:~# /etc/init.d/drbd start
Starting DRBD resources: can not open /dev/drbd0: No such file or directory
can not open /dev/drbd1: No such file or directory
can not open /dev/drbd2: No such file or directory
can not open /dev/drbd3: No such file or directory
[ d0 can not open /dev/drbd0: No such file or directory
cmd /sbin/drbdsetup /dev/drbd0 disk /dev/sda10 /dev/sda14 0
--on-io-error=detach failed!
I run the following script manually
for i in $(seq 0 15) ; do
mknod /dev/drbd$i b 147 $i ; done
After I run the above script, I can start /etc/init.d/dbrbd start.
/etc/init.d/drbd start
Starting DRBD resources: [ d0 d1 d2 d3 s0 s1 s2 s3 n0 n1 n2 n3 ].
root at suukyi:~# cat /proc/drbd
version: 0.7.15 (api:77/proto:74)
SVN Revision: 2020 build by root at suukyi, 2006-08-24 16:50:05
0: cs:Connected st:Secondary/Secondary ld:Consistent
ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0
1: cs:SyncTarget st:Secondary/Primary ld:Inconsistent
ns:0 nr:27648 dw:27648 dr:0 al:0 bm:2 lo:0 pe:0 ua:0 ap:0
[=>..................] sync'ed: 5.6% (488200/515848)K
finish: 0:00:34 speed: 13,824 (13,824) K/sec
2: cs:PausedSyncT st:Secondary/Primary ld:Inconsistent
ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0
3: cs:PausedSyncT st:Secondary/Primary ld:Inconsistent
ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0
Attached is our drbd script and .conf.
Please help me.
Thanks
Ganesan
-------------- next part --------------
A non-text attachment was scrubbed...
Name: drbd
Type: application/octet-stream
Size: 2995 bytes
Desc: not available
URL: <http://lists.linbit.com/pipermail/drbd-user/attachments/20060926/fc495fed/attachment.obj>
-------------- next part --------------
#
# drbd.conf example
#
# parameters you _need_ to change are the hostname, device, disk,
# meta-disk, address and port in the "on <hostname> {}" sections.
#
# you ought to know about the protocol, and the various timeouts.
#
# you probably want to set the rate in the syncer sections
#
# NOTE common pitfall:
# rate is given in units of _byte_ not bit
#
#
# increase timeout and maybe ping-int in net{}, if you see
# problems with "connection lost/connection established"
# (or change your setup to reduce network latency; make sure full
# duplex behaves as such; check average roundtrip times while
# network is saturated; and so on ...)
#
#
# Upgrading from DRBD-0.6.x
#
# Using the size parameter in the disk section (was disk-size) is
# no longer valid. The agreed disk size is now stored
# in DRBD's non volatile meta data files.
#
# NOTE that if you do not have some dedicated partition to use for
# the meta-data, you may use 'internal' meta-data.
#
# THIS HOWEVER WILL DESTROY THE LAST 128M
# OF THE LOWER LEVEL DEVICE.
#
# So you better make sure you shrink the filesystem by 128M FIRST!
# or by 132M just to be sure... :)
#
skip {
As you can see, you can also comment chunks of text
with a 'skip[optional nonsense]{ skipped text }' section.
This comes in handy, if you just want to comment out
some 'resource <some name> {...}' section:
just precede it with 'skip'.
The basic format of option assignment is
<option name><linear whitespace><value>;
It should be obvious from the examples below,
but if you really care to know the details:
<option name> :=
valid options in the respective scope
<value> := <num>|<string>|<choice>|...
depending on the set of allowed values
for the respective option.
<num> := [0-9]+, sometimes with an optional suffix of K,M,G
<string> := (<name>|\"([^\"\\\n]*|\\.)*\")+
<name> := [/_.A-Za-z0-9-]+
}
#
# At most ONE global section is allowed.
# It must precede any resource section.
#
#global {
# use this if you want to define more resources later
# without reloading the module.
# by default we load the module with exactly as many devices
# as configured mentioned in this file.
#
# minor-count 5;
# The user dialog counts and displays the seconds it waited so
# far. You might want to disable this if you have the console
# of your server connected to a serial terminal server with
# limited logging capacity.
# The Dialog will print the count each 'dialog-refresh' seconds,
# set it to 0 to disable redrawing completely. [ default = 1 ]
#
# dialog-refresh 5; # 5 seconds
# You might disable one of drbdadm's sanity check.
# disable-ip-verification;
#}
#
# this need not be r#, you may use phony resource names,
# like "resource web" or "resource mail", too
#
#===========#
# Resources #
#===========#
resource dbdata { # This is for /dbdata
protocol C;
incon-degr-cmd "echo '!DRBD! pri on incon-degr' | wall ; sleep 60 ; halt -f";
startup {
wfc-timeout 0;
degr-wfc-timeout 120; # 2 minutes.
}
disk {
on-io-error detach;
}
net {
}
syncer {
rate 10M;
group 1;
al-extents 257;
}
on suukyi {
device /dev/drbd0;
disk /dev/sda10;
address 10.0.0.1:7788;
meta-disk /dev/sda14[0];
}
on xanana {
device /dev/drbd0;
disk /dev/sda10;
address 10.0.0.2:7788;
meta-disk /dev/sda14[0];
}
}
resource home { # This is for /home
protocol C;
incon-degr-cmd "echo '!DRBD! pri on incon-degr' | wall ; sleep 60 ; halt -f";
startup {
wfc-timeout 0;
degr-wfc-timeout 120; # 2 minutes.
}
disk {
on-io-error detach;
}
net {
}
syncer {
rate 10M;
group 2;
al-extents 257;
}
on suukyi {
device /dev/drbd1;
disk /dev/sda11;
address 10.0.0.1:7789;
meta-disk /dev/sda14[1];
}
on xanana {
device /dev/drbd1;
disk /dev/sda11;
address 10.0.0.2:7789;
meta-disk /dev/sda14[1];
}
}
resource backup { # This is for /backup
protocol C;
incon-degr-cmd "echo '!DRBD! pri on incon-degr' | wall ; sleep 60 ; halt -f";
startup {
wfc-timeout 0;
degr-wfc-timeout 120; # 2 minutes.
}
disk {
on-io-error detach;
}
net {
}
syncer {
rate 10M;
group 3;
al-extents 257;
}
on suukyi {
device /dev/drbd2;
disk /dev/sda12;
address 10.0.0.1:7790;
meta-disk /dev/sda14[2];
}
on xanana {
device /dev/drbd2;
disk /dev/sda12;
address 10.0.0.2:7790;
meta-disk /dev/sda14[2];
}
}
resource spare { # This is for /spare
protocol C;
incon-degr-cmd "echo '!DRBD! pri on incon-degr' | wall ; sleep 60 ; halt -f";
startup {
wfc-timeout 0;
degr-wfc-timeout 120; # 2 minutes.
}
disk {
on-io-error detach;
}
net {
}
syncer {
rate 10M;
group 4;
al-extents 257;
}
on suukyi {
device /dev/drbd3;
disk /dev/sda13;
address 10.0.0.1:7791;
meta-disk /dev/sda14[3];
}
on xanana {
device /dev/drbd3;
disk /dev/sda13;
address 10.0.0.2:7791;
meta-disk /dev/sda14[3];
}
}
#==========#
# Examples #
#==========#
#--resource r0 {
# transfer protocol to use.
# C: write IO is reported as completed, if we know it has
# reached _both_ local and remote DISK.
# * for critical transactional data.
# * for most cases.
# B: write IO is reported as completed, if it has reached
# local DISK and remote buffer cache.
# A: write IO is reported as completed, if it has reached
# local DISK and local tcp send buffer. (see also sndbuf-size)
# * for high latency networks
#
#**********
# uhm, benchmarks have shown that C is actually better than B.
# this note shall disappear, when we are convinced that B is
# the right choice "for most cases".
# Until then, always use C unless you have a reason not to.
# --lge
#**********
#
#-- protocol C;
# what should be done in case the cluster starts up in
# degraded mode, but knows it has inconsistent data.
#-- incon-degr-cmd "echo '!DRBD! pri on incon-degr' | wall ; sleep 60 ; halt -f";
#-- startup {
# Wait for connection timeout.
# The init script blocks the boot process until the resources
# are connected. This is so when the cluster manager starts later,
# it does not see a resource with internal split-brain.
# In case you want to limit the wait time, do it here.
# Default is 0, which means unlimited. Unit is seconds.
#
# wfc-timeout 0;
# Wait for connection timeout if this node was a degraded cluster.
# In case a degraded cluster (= cluster with only one node left)
# is rebooted, this timeout value is used.
#
#-- degr-wfc-timeout 120; # 2 minutes.
#-- }
#-- disk {
# if the lower level device reports io-error you have the choice of
# "pass_on" -> Report the io-error to the upper layers.
# Primary -> report it to the mounted file system.
# Secondary -> ignore it.
# "panic" -> The node leaves the cluster by doing a kernel panic.
# "detach" -> The node drops its backing storage device, and
# continues in disk less mode.
#
#-- on-io-error detach;
# In case you only want to use a fraction of the available space
# you might use the "size" option here.
#
# size 10G;
#-- }
#-- net {
# this is the size of the tcp socket send buffer
# increase it _carefully_ if you want to use protocol A over a
# high latency network with reasonable write throughput.
# defaults to 2*65535; you might try even 1M, but if your kernel or
# network driver chokes on that, you have been warned.
# sndbuf-size 512k;
# timeout 60; # 6 seconds (unit = 0.1 seconds)
# connect-int 10; # 10 seconds (unit = 1 second)
# ping-int 10; # 10 seconds (unit = 1 second)
# Maximal number of requests (4K) to be allocated by DRBD.
# The minimum is hardcoded to 32 (=128 kByte).
# For high performance installations it might help if you
# increase that number. These buffers are used to hold
# datablocks while they are written to disk.
#
# max-buffers 2048;
# When the number of outstanding requests on a standby (secondary)
# node exceeds unplug-watermark, we start to kick the backing device
# to start its request processing. This is an advanced tuning
# parameter to get more performance out of capable storage controlers.
# Some controlers like to be kicked often, other controlers
# deliver better performance when they are kicked less frequently.
# Set it to the value of max-buffers to get the least possible
# number of run_task_queue_disk() / q->unplug_fn(q) calls.
#
# unplug-watermark 128;
# The highest number of data blocks between two write barriers.
# If you set this < 10 you might decrease your performance.
# max-epoch-size 2048;
# if some block send times out this many times, the peer is
# considered dead, even if it still answers ping requests.
# ko-count 4;
# if the connection to the peer is lost you have the choice of
# "reconnect" -> Try to reconnect (AKA WFConnection state)
# "stand_alone" -> Do not reconnect (AKA StandAlone state)
# "freeze_io" -> Try to reconnect but freeze all IO until
# the connection is established again.
# [ lge: oops. freeze_io is not implemented yet... ]
# [ at least not in drbd 0.7.x; but nobody wanted to use ]
# [ that anyways, otherwise we had noticed earlier :-) ]
# on-disconnect reconnect;
#-- }
#-- syncer {
# Limit the bandwith used by the resynchronisation process.
# default unit is kByte/sec; optional suffixes K,M,G are allowed.
#
# Even though this is a network setting, the units are based
# on _byte_ (octet for our french friends) not bit.
# We are storage guys.
#
# Note that on 100Mbit ethernet, you cannot expect more than
# 12.5 MByte total transfer rate.
# Consider using GigaBit Ethernet.
#
#-- rate 10M;
# All devices in one group are resynchronized parallel.
# Resychronisation of groups is serialized in ascending order.
# Put DRBD resources which are on different physical disks in one group.
# Put DRBD resources on one physical disk in different groups.
#
#-- group 1;
# Configures the size of the active set. Each extent is 4M,
# 257 Extents ~> 1GB active set size. In case your syncer
# runs @ 10MB/sec, all resync after a primary's crash will last
# 1GB / ( 10MB/sec ) ~ 102 seconds ~ One Minute and 42 Seconds.
# BTW, the hash algorithm works best if the number of al-extents
# is prime. (To test the worst case performace use a power of 2)
#-- al-extents 257;
#-- }
#-- on amd {
#-- device /dev/drbd0;
#-- disk /dev/hde5;
#-- address 192.168.22.11:7788;
#-- meta-disk internal;
# meta-disk is either 'internal' or '/dev/ice/name [idx]'
#
# You can use a single block device to store meta-data
# of multiple DRBD's.
# E.g. use meta-disk /dev/hde6[0]; and meta-disk /dev/hde6[1];
# for two different resources. In this case the meta-disk
# would need to be at least 256 MB in size.
#
# 'internal' means, that the last 128 MB of the lower device
# are used to store the meta-data.
# You must not give an index with 'internal'.
#-- }
#-- on alf {
#-- device /dev/drbd0;
#-- disk /dev/hdc5;
#-- address 192.168.22.12:7788;
#-- meta-disk internal;
#-- }
#--}
#
# yes, you may also quote the resource name.
# but don't include whitespace, unless you mean it :)
#
#--resource "r1" {
#-- protocol C;
#-- incon-degr-cmd "echo '!DRBD! pri on incon-degr' | wall ; sleep 60 ; halt -f";
#-- startup {
#-- wfc-timeout 0; ## Infinite!
#-- degr-wfc-timeout 120; ## 2 minutes.
#-- }
#-- disk {
#-- on-io-error detach;
#-- }
#-- net {
# timeout 60;
# connect-int 10;
# ping-int 10;
# max-buffers 2048;
# max-epoch-size 2048;
#-- }
#-- syncer {
#-- rate 4M;
#-- group 1; # sync concurrently with r0
#-- }
#-- on amd {
#-- device /dev/drbd1;
#-- disk /dev/hde6;
#-- address 192.168.22.11:7789;
#-- meta-disk /dev/somewhere [7];
#-- }
#-- on alf {
#-- device /dev/drbd1;
#-- disk /dev/hdc6;
#-- address 192.168.22.12:7789;
#-- meta-disk /dev/somewhere [7];
#-- }
#--}
#--resource r2 {
#-- protocol C;
#-- incon-degr-cmd "echo '!DRBD! pri on incon-degr' | wall ; sleep 60 ; halt -f";
#-- startup { wfc-timeout 0; degr-wfc-timeout 120; }
#-- disk { on-io-error detach; }
#-- net { timeout 60; connect-int 10; ping-int 10;
#-- max-buffers 2048; max-epoch-size 2048; }
#-- syncer { rate 4M; group 2; } # sync when r0 and r1 are finished syncing.
#-- on amd {
#-- address 192.168.22.11:7790;
#-- disk /dev/hde7; device /dev/drbd2; meta-disk "internal";
#-- }
#-- on alf {
#-- device "/dev/drbd2"; disk "/dev/hdc7"; meta-disk "internal";
#-- address 192.168.22.12:7790;
#-- }
#--}
#--resource r3 {
#-- protocol C;
#-- incon-degr-cmd "echo '!DRBD! pri on incon-degr' | wall ; sleep 60 ; halt -f";
#-- startup { wfc-timeout 0; degr-wfc-timeout 120; }
#-- disk { on-io-error detach; }
#-- syncer {
#-- rate 4M;
#-- group 3; # sync when r2 is finished syncing.
#-- }
#-- on amd {
#-- device /dev/drbd3;
#-- disk /dev/hde8;
#-- address 192.168.22.11:7791;
#-- meta-disk internal;
#-- }
#-- on alf {
#-- device /dev/drbd3;
#-- disk /dev/hdc8;
#-- address 192.168.22.12:7791;
#-- meta-disk /some/where[8];
#-- }
#--}