Note: "permalinks" may not be as permanent as we would like,
direct links of old sources may well be a few messages off.
Hello, I'm new to linux and drbd. We have setup primary and secondary machines. Our primary machine works fine. After I reboot my secondary machine, try to start the drbd, I've got the following error message. root at suukyi:~# /etc/init.d/drbd start Starting DRBD resources: can not open /dev/drbd0: No such file or directory can not open /dev/drbd1: No such file or directory can not open /dev/drbd2: No such file or directory can not open /dev/drbd3: No such file or directory [ d0 can not open /dev/drbd0: No such file or directory cmd /sbin/drbdsetup /dev/drbd0 disk /dev/sda10 /dev/sda14 0 --on-io-error=detach failed! I run the following script manually for i in $(seq 0 15) ; do mknod /dev/drbd$i b 147 $i ; done After I run the above script, I can start /etc/init.d/dbrbd start. /etc/init.d/drbd start Starting DRBD resources: [ d0 d1 d2 d3 s0 s1 s2 s3 n0 n1 n2 n3 ]. root at suukyi:~# cat /proc/drbd version: 0.7.15 (api:77/proto:74) SVN Revision: 2020 build by root at suukyi, 2006-08-24 16:50:05 0: cs:Connected st:Secondary/Secondary ld:Consistent ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 1: cs:SyncTarget st:Secondary/Primary ld:Inconsistent ns:0 nr:27648 dw:27648 dr:0 al:0 bm:2 lo:0 pe:0 ua:0 ap:0 [=>..................] sync'ed: 5.6% (488200/515848)K finish: 0:00:34 speed: 13,824 (13,824) K/sec 2: cs:PausedSyncT st:Secondary/Primary ld:Inconsistent ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 3: cs:PausedSyncT st:Secondary/Primary ld:Inconsistent ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 Attached is our drbd script and .conf. Please help me. Thanks Ganesan -------------- next part -------------- A non-text attachment was scrubbed... Name: drbd Type: application/octet-stream Size: 2995 bytes Desc: not available URL: <http://lists.linbit.com/pipermail/drbd-user/attachments/20060926/fc495fed/attachment.obj> -------------- next part -------------- # # drbd.conf example # # parameters you _need_ to change are the hostname, device, disk, # meta-disk, address and port in the "on <hostname> {}" sections. # # you ought to know about the protocol, and the various timeouts. # # you probably want to set the rate in the syncer sections # # NOTE common pitfall: # rate is given in units of _byte_ not bit # # # increase timeout and maybe ping-int in net{}, if you see # problems with "connection lost/connection established" # (or change your setup to reduce network latency; make sure full # duplex behaves as such; check average roundtrip times while # network is saturated; and so on ...) # # # Upgrading from DRBD-0.6.x # # Using the size parameter in the disk section (was disk-size) is # no longer valid. The agreed disk size is now stored # in DRBD's non volatile meta data files. # # NOTE that if you do not have some dedicated partition to use for # the meta-data, you may use 'internal' meta-data. # # THIS HOWEVER WILL DESTROY THE LAST 128M # OF THE LOWER LEVEL DEVICE. # # So you better make sure you shrink the filesystem by 128M FIRST! # or by 132M just to be sure... :) # skip { As you can see, you can also comment chunks of text with a 'skip[optional nonsense]{ skipped text }' section. This comes in handy, if you just want to comment out some 'resource <some name> {...}' section: just precede it with 'skip'. The basic format of option assignment is <option name><linear whitespace><value>; It should be obvious from the examples below, but if you really care to know the details: <option name> := valid options in the respective scope <value> := <num>|<string>|<choice>|... depending on the set of allowed values for the respective option. <num> := [0-9]+, sometimes with an optional suffix of K,M,G <string> := (<name>|\"([^\"\\\n]*|\\.)*\")+ <name> := [/_.A-Za-z0-9-]+ } # # At most ONE global section is allowed. # It must precede any resource section. # #global { # use this if you want to define more resources later # without reloading the module. # by default we load the module with exactly as many devices # as configured mentioned in this file. # # minor-count 5; # The user dialog counts and displays the seconds it waited so # far. You might want to disable this if you have the console # of your server connected to a serial terminal server with # limited logging capacity. # The Dialog will print the count each 'dialog-refresh' seconds, # set it to 0 to disable redrawing completely. [ default = 1 ] # # dialog-refresh 5; # 5 seconds # You might disable one of drbdadm's sanity check. # disable-ip-verification; #} # # this need not be r#, you may use phony resource names, # like "resource web" or "resource mail", too # #===========# # Resources # #===========# resource dbdata { # This is for /dbdata protocol C; incon-degr-cmd "echo '!DRBD! pri on incon-degr' | wall ; sleep 60 ; halt -f"; startup { wfc-timeout 0; degr-wfc-timeout 120; # 2 minutes. } disk { on-io-error detach; } net { } syncer { rate 10M; group 1; al-extents 257; } on suukyi { device /dev/drbd0; disk /dev/sda10; address 10.0.0.1:7788; meta-disk /dev/sda14[0]; } on xanana { device /dev/drbd0; disk /dev/sda10; address 10.0.0.2:7788; meta-disk /dev/sda14[0]; } } resource home { # This is for /home protocol C; incon-degr-cmd "echo '!DRBD! pri on incon-degr' | wall ; sleep 60 ; halt -f"; startup { wfc-timeout 0; degr-wfc-timeout 120; # 2 minutes. } disk { on-io-error detach; } net { } syncer { rate 10M; group 2; al-extents 257; } on suukyi { device /dev/drbd1; disk /dev/sda11; address 10.0.0.1:7789; meta-disk /dev/sda14[1]; } on xanana { device /dev/drbd1; disk /dev/sda11; address 10.0.0.2:7789; meta-disk /dev/sda14[1]; } } resource backup { # This is for /backup protocol C; incon-degr-cmd "echo '!DRBD! pri on incon-degr' | wall ; sleep 60 ; halt -f"; startup { wfc-timeout 0; degr-wfc-timeout 120; # 2 minutes. } disk { on-io-error detach; } net { } syncer { rate 10M; group 3; al-extents 257; } on suukyi { device /dev/drbd2; disk /dev/sda12; address 10.0.0.1:7790; meta-disk /dev/sda14[2]; } on xanana { device /dev/drbd2; disk /dev/sda12; address 10.0.0.2:7790; meta-disk /dev/sda14[2]; } } resource spare { # This is for /spare protocol C; incon-degr-cmd "echo '!DRBD! pri on incon-degr' | wall ; sleep 60 ; halt -f"; startup { wfc-timeout 0; degr-wfc-timeout 120; # 2 minutes. } disk { on-io-error detach; } net { } syncer { rate 10M; group 4; al-extents 257; } on suukyi { device /dev/drbd3; disk /dev/sda13; address 10.0.0.1:7791; meta-disk /dev/sda14[3]; } on xanana { device /dev/drbd3; disk /dev/sda13; address 10.0.0.2:7791; meta-disk /dev/sda14[3]; } } #==========# # Examples # #==========# #--resource r0 { # transfer protocol to use. # C: write IO is reported as completed, if we know it has # reached _both_ local and remote DISK. # * for critical transactional data. # * for most cases. # B: write IO is reported as completed, if it has reached # local DISK and remote buffer cache. # A: write IO is reported as completed, if it has reached # local DISK and local tcp send buffer. (see also sndbuf-size) # * for high latency networks # #********** # uhm, benchmarks have shown that C is actually better than B. # this note shall disappear, when we are convinced that B is # the right choice "for most cases". # Until then, always use C unless you have a reason not to. # --lge #********** # #-- protocol C; # what should be done in case the cluster starts up in # degraded mode, but knows it has inconsistent data. #-- incon-degr-cmd "echo '!DRBD! pri on incon-degr' | wall ; sleep 60 ; halt -f"; #-- startup { # Wait for connection timeout. # The init script blocks the boot process until the resources # are connected. This is so when the cluster manager starts later, # it does not see a resource with internal split-brain. # In case you want to limit the wait time, do it here. # Default is 0, which means unlimited. Unit is seconds. # # wfc-timeout 0; # Wait for connection timeout if this node was a degraded cluster. # In case a degraded cluster (= cluster with only one node left) # is rebooted, this timeout value is used. # #-- degr-wfc-timeout 120; # 2 minutes. #-- } #-- disk { # if the lower level device reports io-error you have the choice of # "pass_on" -> Report the io-error to the upper layers. # Primary -> report it to the mounted file system. # Secondary -> ignore it. # "panic" -> The node leaves the cluster by doing a kernel panic. # "detach" -> The node drops its backing storage device, and # continues in disk less mode. # #-- on-io-error detach; # In case you only want to use a fraction of the available space # you might use the "size" option here. # # size 10G; #-- } #-- net { # this is the size of the tcp socket send buffer # increase it _carefully_ if you want to use protocol A over a # high latency network with reasonable write throughput. # defaults to 2*65535; you might try even 1M, but if your kernel or # network driver chokes on that, you have been warned. # sndbuf-size 512k; # timeout 60; # 6 seconds (unit = 0.1 seconds) # connect-int 10; # 10 seconds (unit = 1 second) # ping-int 10; # 10 seconds (unit = 1 second) # Maximal number of requests (4K) to be allocated by DRBD. # The minimum is hardcoded to 32 (=128 kByte). # For high performance installations it might help if you # increase that number. These buffers are used to hold # datablocks while they are written to disk. # # max-buffers 2048; # When the number of outstanding requests on a standby (secondary) # node exceeds unplug-watermark, we start to kick the backing device # to start its request processing. This is an advanced tuning # parameter to get more performance out of capable storage controlers. # Some controlers like to be kicked often, other controlers # deliver better performance when they are kicked less frequently. # Set it to the value of max-buffers to get the least possible # number of run_task_queue_disk() / q->unplug_fn(q) calls. # # unplug-watermark 128; # The highest number of data blocks between two write barriers. # If you set this < 10 you might decrease your performance. # max-epoch-size 2048; # if some block send times out this many times, the peer is # considered dead, even if it still answers ping requests. # ko-count 4; # if the connection to the peer is lost you have the choice of # "reconnect" -> Try to reconnect (AKA WFConnection state) # "stand_alone" -> Do not reconnect (AKA StandAlone state) # "freeze_io" -> Try to reconnect but freeze all IO until # the connection is established again. # [ lge: oops. freeze_io is not implemented yet... ] # [ at least not in drbd 0.7.x; but nobody wanted to use ] # [ that anyways, otherwise we had noticed earlier :-) ] # on-disconnect reconnect; #-- } #-- syncer { # Limit the bandwith used by the resynchronisation process. # default unit is kByte/sec; optional suffixes K,M,G are allowed. # # Even though this is a network setting, the units are based # on _byte_ (octet for our french friends) not bit. # We are storage guys. # # Note that on 100Mbit ethernet, you cannot expect more than # 12.5 MByte total transfer rate. # Consider using GigaBit Ethernet. # #-- rate 10M; # All devices in one group are resynchronized parallel. # Resychronisation of groups is serialized in ascending order. # Put DRBD resources which are on different physical disks in one group. # Put DRBD resources on one physical disk in different groups. # #-- group 1; # Configures the size of the active set. Each extent is 4M, # 257 Extents ~> 1GB active set size. In case your syncer # runs @ 10MB/sec, all resync after a primary's crash will last # 1GB / ( 10MB/sec ) ~ 102 seconds ~ One Minute and 42 Seconds. # BTW, the hash algorithm works best if the number of al-extents # is prime. (To test the worst case performace use a power of 2) #-- al-extents 257; #-- } #-- on amd { #-- device /dev/drbd0; #-- disk /dev/hde5; #-- address 192.168.22.11:7788; #-- meta-disk internal; # meta-disk is either 'internal' or '/dev/ice/name [idx]' # # You can use a single block device to store meta-data # of multiple DRBD's. # E.g. use meta-disk /dev/hde6[0]; and meta-disk /dev/hde6[1]; # for two different resources. In this case the meta-disk # would need to be at least 256 MB in size. # # 'internal' means, that the last 128 MB of the lower device # are used to store the meta-data. # You must not give an index with 'internal'. #-- } #-- on alf { #-- device /dev/drbd0; #-- disk /dev/hdc5; #-- address 192.168.22.12:7788; #-- meta-disk internal; #-- } #--} # # yes, you may also quote the resource name. # but don't include whitespace, unless you mean it :) # #--resource "r1" { #-- protocol C; #-- incon-degr-cmd "echo '!DRBD! pri on incon-degr' | wall ; sleep 60 ; halt -f"; #-- startup { #-- wfc-timeout 0; ## Infinite! #-- degr-wfc-timeout 120; ## 2 minutes. #-- } #-- disk { #-- on-io-error detach; #-- } #-- net { # timeout 60; # connect-int 10; # ping-int 10; # max-buffers 2048; # max-epoch-size 2048; #-- } #-- syncer { #-- rate 4M; #-- group 1; # sync concurrently with r0 #-- } #-- on amd { #-- device /dev/drbd1; #-- disk /dev/hde6; #-- address 192.168.22.11:7789; #-- meta-disk /dev/somewhere [7]; #-- } #-- on alf { #-- device /dev/drbd1; #-- disk /dev/hdc6; #-- address 192.168.22.12:7789; #-- meta-disk /dev/somewhere [7]; #-- } #--} #--resource r2 { #-- protocol C; #-- incon-degr-cmd "echo '!DRBD! pri on incon-degr' | wall ; sleep 60 ; halt -f"; #-- startup { wfc-timeout 0; degr-wfc-timeout 120; } #-- disk { on-io-error detach; } #-- net { timeout 60; connect-int 10; ping-int 10; #-- max-buffers 2048; max-epoch-size 2048; } #-- syncer { rate 4M; group 2; } # sync when r0 and r1 are finished syncing. #-- on amd { #-- address 192.168.22.11:7790; #-- disk /dev/hde7; device /dev/drbd2; meta-disk "internal"; #-- } #-- on alf { #-- device "/dev/drbd2"; disk "/dev/hdc7"; meta-disk "internal"; #-- address 192.168.22.12:7790; #-- } #--} #--resource r3 { #-- protocol C; #-- incon-degr-cmd "echo '!DRBD! pri on incon-degr' | wall ; sleep 60 ; halt -f"; #-- startup { wfc-timeout 0; degr-wfc-timeout 120; } #-- disk { on-io-error detach; } #-- syncer { #-- rate 4M; #-- group 3; # sync when r2 is finished syncing. #-- } #-- on amd { #-- device /dev/drbd3; #-- disk /dev/hde8; #-- address 192.168.22.11:7791; #-- meta-disk internal; #-- } #-- on alf { #-- device /dev/drbd3; #-- disk /dev/hdc8; #-- address 192.168.22.12:7791; #-- meta-disk /some/where[8]; #-- } #--}