Note: "permalinks" may not be as permanent as we would like,
direct links of old sources may well be a few messages off.
Hi All,
I’m having serieus problems setting up a new DRBD cluster. At the moment i have 2 clusters in production without any problems for around 2 years now.
Debian Squeeze 6 + drbd 8.3.11 running at a nice speed of 800MB/s with 16x SATA 1TB disks HW raid 6 :-) and 10G NIC’s
No partitions on the DRBD device it’s raw block device.
Bought some new hardware:
2x Xeon 6core CPU’s with HT - 16GB ram - Areca 1882i - 24x 600GB SAS 10k HDD’s - 10G NIC's
Started of with Debian Wheezy and DRBD 8.3.13 but performance was really bad +- 400MB/s with DRBD primary/secondary while local raw speed of /dev/sdb = 1.5GB/s
Then went on with Ubuntu 13.10 and DRBD 8.4.3 same deal. Then installed with the latest stable DRBD 8.4.4 but no difference.
I can cranck up the initial sync to 750MB/s when playing with the c-rate’s (still not a good performance) while after the initial sync it drops back to 400MB/s
I tested all the different performance settings on everything! I mean local block tuning, NIC tuning, DRBD tuning still i can’t get it to perform well. It’s worse then my production DRBD with SATA drives on 7200rpm!
Current config:
root at vstore7:~# drbdsetup /dev/drbd0 show
disk {
size 0s _is_default; # bytes
on-io-error detach;
fencing dont-care _is_default;
no-disk-flushes ;
max-bio-bvecs 0 _is_default;
}
net {
timeout 60 _is_default; # 1/10 seconds
max-epoch-size 8000;
max-buffers 8000;
unplug-watermark 128 _is_default;
connect-int 10 _is_default; # seconds
ping-int 10 _is_default; # seconds
sndbuf-size 2097152; # bytes
rcvbuf-size 0 _is_default; # bytes
ko-count 0 _is_default;
after-sb-0pri disconnect _is_default;
after-sb-1pri disconnect _is_default;
after-sb-2pri disconnect _is_default;
rr-conflict disconnect _is_default;
ping-timeout 5 _is_default; # 1/10 seconds
on-congestion block _is_default;
congestion-fill 0s _is_default; # byte
congestion-extents 127 _is_default;
}
syncer {
rate 1228800k; # bytes/second
after -1 _is_default;
al-extents 3389;
on-no-data-accessible io-error _is_default;
c-plan-ahead 0 _is_default; # 1/10 seconds
c-delay-target 10 _is_default; # 1/10 seconds
c-fill-target 0s _is_default; # bytes
c-max-rate 102400k _is_default; # bytes/second
c-min-rate 4096k _is_default; # bytes/second
}
protocol C;
_this_host {
device minor 0;
disk "/dev/sdb";
meta-disk internal;
address ipv4 172.16.252.7:7789;
}
_remote_host {
address ipv4 172.16.252.8:7789;
}
Here are some test results of iperf and dd so you see my local speeds and network speeds aren’t the problem. The final setup will also be a bond of 10G cards but i took that out for troubleshooting.
root at vstore7:~# iperf -c 172.16.252.8 -t 200 -i 2
------------------------------------------------------------
Client connecting to 172.16.252.8, TCP port 5001
TCP window size: 96.7 KByte (default)
------------------------------------------------------------
[ 3] local 172.16.252.7 port 35149 connected with 172.16.252.8 port 5001
[ ID] Interval Transfer Bandwidth
[ 3] 0.0- 2.0 sec 2.32 GBytes 9.96 Gbits/sec
[ 3] 2.0- 4.0 sec 2.31 GBytes 9.93 Gbits/sec
[ 3] 4.0- 6.0 sec 2.31 GBytes 9.93 Gbits/sec
----total-cpu-usage---- -dsk/total- -net/total- ---paging-- ---system--
usr sys idl wai hiq siq| read writ| recv send| in out | int csw
0 0 99 0 0 0| 96k 246k| 0 0 | 0 0 |9227 186
0 3 96 0 0 1| 0 0 |3692k 1190M| 0 0 | 93k 252
0 3 96 0 0 1| 0 0 |3695k 1189M| 0 0 | 94k 252
0 3 96 0 0 1| 0 0 |3697k 1189M| 0 0 | 94k 268
root at vstore7:~# dd if=/dev/zero of=/dev/sdb bs=1M count=100000
100000+0 records in
100000+0 records out
104857600000 bytes (105 GB) copied, 98.921 s, 1.1 GB/s
root at vstore8:~# dd if=/dev/zero of=/dev/sdb bs=1M count=100000
100000+0 records in
100000+0 records out
104857600000 bytes (105 GB) copied, 98.3511 s, 1.1 GB/s
root at vstore7:~# dd if=/dev/zero of=/dev/sdb bs=1M count=100000 oflag=direct
100000+0 records in
100000+0 records out
104857600000 bytes (105 GB) copied, 59.5348 s, 1.8 GB/s
root at vstore8:~# dd if=/dev/zero of=/dev/sdb bs=1M count=100000 oflag=direct
100000+0 records in
100000+0 records out
104857600000 bytes (105 GB) copied, 59.3226 s, 1.8 GB/s
-------------- next part --------------
A non-text attachment was scrubbed...
Name: drbd_fsio_test.rtf
Type: text/rtf
Size: 7249 bytes
Desc: not available
URL: <http://lists.linbit.com/pipermail/drbd-user/attachments/20140211/c8c017c1/attachment.bin>
-------------- next part --------------
PLEASE SOMEONE TELL ME WHAT I MISS!
Tested DRBD settings:
On DISKS:
block/sdb/queue/scheduler = deadline
block/sdb/queue/iosched/front_merges = 0
block/sdb/queue/iosched/read_expire = 150
block/sdb/queue/iosched/write_expire = 1500
On NIC:
net.core.rmem_max = 33554432
net.core.wmem_max = 33554432
net.ipv4.tcp_rmem = 4096 87380 33554432
net.ipv4.tcp_wmem = 4096 65536 33554432
net.core.netdev_max_backlog = 30000
net.ipv4.tcp_window_scaling = 1
on DRBD:
global {
usage-count no;
# minor-count dialog-refresh disable-ip-verification
}
common {
handlers {
pri-on-incon-degr "echo o > /proc/sysrq-trigger ; halt -f";
pri-lost-after-sb "echo o > /proc/sysrq-trigger ; halt -f";
local-io-error "echo o > /proc/sysrq-trigger ; halt -f";
}
startup {
wfc-timeout 240;
degr-wfc-timeout 120;
outdated-wfc-timeout 120;
}
disk {
on-io-error detach;
disk-barrier no;
disk-flushes no;
md-flushes no;
c-plan-ahead 20; tested different ranges 100 and 200 as well
c-fill-target 2M; tested different settings
c-min-rate 1M; tested different settings
c-max-rate 1500M; tested different settings
resync-rate 1500M; doesn’t do anything when c-rate disabled
al-extents 3389;
}
net {
protocol C;
sndbuf-size 2M; tested different settings
# rcvbuf-size 2M; tested different settings
# ping-int 4;
# timeout 30;
max-buffers 8000; tested between and 131072
max-epoch-size 8000;
# unplug-watermark 8000; tested from 16 and higher
use-rle;
# verify-alg md5;
}
}
Mvg,
Joeri
--
Joeri Casteels
Department of Information Technology
Internet Based Communication Networks and Services (IBCN)
Ghent University - iMinds
Gaston Crommenlaan 8 (Bus 201), B-9050 Gent, Belgium
T: +32 9 33 14964
T Secretariaat: +32 9 33 14900
F: +32 9 33 14899
E: joeri.casteels at intec.UGent.be
W : www.ibcn.intec.UGent.be