Note: "permalinks" may not be as permanent as we would like,
direct links of old sources may well be a few messages off.
Hello, The thread "drbd8 and 80+ 1TB mirrors/cluster, can it be done?" suggests using RAID5 or RAID6 on top of DRBD to improve redundancy... A distributed RAID 15/16 :-) I like this idea so I tried to build it in VMware but I can't get the RAID5 array to assemble on the mirrored node :-/ I hope I'm missing something basic but I could not find any documentation on the web about this. My best guess is that drbd is not synchronizing the RAID super blocks but I don't know how to change this... I know I can flip this setup and use DRBD to mirror a RAID5 array but I would rather mirror first improved reliability and better re-sync characteristics... In my lap I've got 2 VMware Images (vmpod-000-0 and vmpod-000-1), each with 5 1GB SCSI drives (/dev/sdb - /dev/sdf). I'm using drbd v8.0.12 and mdadm v2.5.6 on 64-bit Debian "Etch." Here's what I tried to do in copy/paste script format.... -------------------------------------------------------------------------------- fdisk /dev/sdb # Create a new primary partition (1) using 99% of the cylinders (120375 on WD 1TB drives) of type Linux LVM (8e) # Write changes to disk sfdisk -d /dev/sdb | sfdisk /dev/sdc sfdisk -d /dev/sdb | sfdisk /dev/sdd sfdisk -d /dev/sdb | sfdisk /dev/sde sfdisk -d /dev/sdb | sfdisk /dev/sdf # Give the volumes stable device names using LVM pvcreate /dev/sdb pvcreate /dev/sdc pvcreate /dev/sdd pvcreate /dev/sde pvcreate /dev/sdf vgcreate vg00 /dev/sdb vgcreate vg01 /dev/sdc vgcreate vg02 /dev/sdd vgcreate vg03 /dev/sde vgcreate vg04 /dev/sdf lvcreate -l`vgdisplay vg00 | grep "Total PE" | awk '{print $3;}'` vg00 -n lv00 lvcreate -l`vgdisplay vg01 | grep "Total PE" | awk '{print $3;}'` vg01 -n lv01 lvcreate -l`vgdisplay vg02 | grep "Total PE" | awk '{print $3;}'` vg02 -n lv02 lvcreate -l`vgdisplay vg03 | grep "Total PE" | awk '{print $3;}'` vg03 -n lv03 lvcreate -l`vgdisplay vg04 | grep "Total PE" | awk '{print $3;}'` vg04 -n lv04 # Configure drbd8.... cat >/etc/drbd.conf << EOF # # drbd.conf # global { usage-count no; minor-count 85; } common { protocol C; handlers { pri-on-incon-degr "echo o > /proc/sysrq-trigger ; halt -f"; pri-lost-after-sb "echo o > /proc/sysrq-trigger ; halt -f"; local-io-error "echo o > /proc/sysrq-trigger ; halt -f"; # outdate-peer "/usr/lib/heartbeat/drbd-peer-outdater -t 5"; pri-lost "echo pri-lost. Have a look at the log files. | mail -s 'DRBD Alert' root"; split-brain "echo split-brain. drbdadm -- --discard-my-data connect \$DRBD_RESOURCE ? | mail -s 'DRBD Alert' root"; } startup { wfc-timeout 0; ## Infinite! degr-wfc-timeout 120; ## 2 minutes. } disk { on-io-error detach; } net { # sndbuf-size 512k; # timeout 60; # 6 seconds (unit = 0.1 seconds) # connect-int 10; # 10 seconds (unit = 1 second) # ping-int 10; # 10 seconds (unit = 1 second) # max-buffers 2048; # unplug-watermark 128; # max-epoch-size 2048; # ko-count 4; } syncer { rate 33M; # group 1; al-extents 257; } } resource lv00 { on vmpod-000-0 { device /dev/drbd0; disk /dev/vg00/lv00; flexible-meta-disk internal; address 172.16.0.101:7788; } on vmpod-000-1 { device /dev/drbd0; disk /dev/vg00/lv00; flexible-meta-disk internal; address 172.16.0.102:7788; } } resource lv01 { on vmpod-000-0 { device /dev/drbd1; disk /dev/vg01/lv01; flexible-meta-disk internal; address 172.16.0.101:7789; } on vmpod-000-1 { device /dev/drbd1; disk /dev/vg01/lv01; flexible-meta-disk internal; address 172.16.0.102:7789; } } resource lv02 { on vmpod-000-0 { device /dev/drbd2; disk /dev/vg02/lv02; flexible-meta-disk internal; address 172.16.0.101:7790; } on vmpod-000-1 { device /dev/drbd2; disk /dev/vg02/lv02; flexible-meta-disk internal; address 172.16.0.102:7790; } } resource lv03 { on vmpod-000-0 { device /dev/drbd3; disk /dev/vg03/lv03; flexible-meta-disk internal; address 172.16.0.101:7791; } on vmpod-000-1 { device /dev/drbd3; disk /dev/vg03/lv03; flexible-meta-disk internal; address 172.16.0.102:7791; } } resource lv04 { on vmpod-000-0 { device /dev/drbd4; disk /dev/vg04/lv04; flexible-meta-disk internal; address 172.16.0.101:7792; } on vmpod-000-1 { device /dev/drbd4; disk /dev/vg04/lv04; flexible-meta-disk internal; address 172.16.0.102:7792; } } EOF drbdadm create-md lv00 drbdadm create-md lv01 drbdadm create-md lv02 drbdadm create-md lv03 drbdadm create-md lv04 # Only on the primary system... drbdadm -- --overwrite-data-of-peer primary lv00 drbdadm -- --overwrite-data-of-peer primary lv01 drbdadm -- --overwrite-data-of-peer primary lv02 drbdadm -- --overwrite-data-of-peer primary lv03 drbdadm -- --overwrite-data-of-peer primary lv04 ## At this point DRBD is working great and the mirrors are syncing nicely :-) ## # Configure a RAID5 volume on top of DRBD mdadm --create /dev/md2 --auto=yes -l 5 -n 5 /dev/drbd0 /dev/drbd1 / dev/drbd2 /dev/drbd3 /dev/drbd4 mkfs.ext3 -b 4096 /dev/md2 ## Test... mount -t ext3 /dev/md2 /mnt echo "Hello Tim" > /mnt/hi cat /mnt/hi ## All of this worked great, so try to fail-over to the other node... umount /mnt mdadm --stop --scan drbdadm secondary all # On the second node drbdadm primary all mdadm --assemble --scan ## This last call returns "mdadm: No arrays found in config file or automatically" -------------------------------------------------------------------------------- I tried adding "DEVICE /dev/drbd*" to /etc/mdadm/mdadm.conf but that did not help.. If I look at the drbd status everything looks right... vmpod-000-1:/home/tnufire# cat /proc/drbd version: 8.0.12 (api:86/proto:86) GIT-hash: 5c9f89594553e32adb87d9638dce591782f947e3 build by root at vmpod-000-1, 2008-05-29 21:17:12 0: cs:Connected st:Primary/Secondary ds:UpToDate/UpToDate C r--- ns:0 nr:1079032 dw:1079032 dr:216 al:0 bm:64 lo:0 pe:0 ua:0 ap:0 resync: used:0/61 hits:65212 misses:64 starving:0 dirty:0 changed:64 act_log: used:0/257 hits:0 misses:0 starving:0 dirty:0 changed:0 1: cs:Connected st:Primary/Secondary ds:UpToDate/UpToDate C r--- ns:0 nr:1079168 dw:1079168 dr:216 al:0 bm:64 lo:0 pe:0 ua:0 ap:0 resync: used:0/61 hits:65212 misses:64 starving:0 dirty:0 changed:64 act_log: used:0/257 hits:0 misses:0 starving:0 dirty:0 changed:0 2: cs:Connected st:Primary/Secondary ds:UpToDate/UpToDate C r--- ns:0 nr:1079340 dw:1079340 dr:216 al:0 bm:64 lo:0 pe:0 ua:0 ap:0 resync: used:0/61 hits:65212 misses:64 starving:0 dirty:0 changed:64 act_log: used:0/257 hits:0 misses:0 starving:0 dirty:0 changed:0 3: cs:Connected st:Primary/Secondary ds:UpToDate/UpToDate C r--- ns:0 nr:1079204 dw:1079204 dr:216 al:0 bm:64 lo:0 pe:0 ua:0 ap:0 resync: used:0/61 hits:65212 misses:64 starving:0 dirty:0 changed:64 act_log: used:0/257 hits:0 misses:0 starving:0 dirty:0 changed:0 4: cs:Connected st:Primary/Secondary ds:UpToDate/UpToDate C r--- ns:0 nr:1078772 dw:1078772 dr:216 al:0 bm:64 lo:0 pe:0 ua:0 ap:0 resync: used:0/61 hits:65212 misses:64 starving:0 dirty:0 changed:64 act_log: used:0/257 hits:0 misses:0 starving:0 dirty:0 changed:0 If I revers the process and go back to the first node, "mdadm -- assemble --scan" returns "mdadm: /dev/md/2 has been started with 5 drives" as it should. What am I missing? Should this work? Thanks, Tim