[DRBD-user] Diagnosing DRBD Slowness

Robinson, Eric eric.robinson at psmnv.com
Tue Oct 21 21:33:21 CEST 2008

Note: "permalinks" may not be as permanent as we would like,
direct links of old sources may well be a few messages off.


Can someone help please me diagnose a DRBD slow sync problem? 
 
The servers have been syncing for 3-4 days and are now only 12.5%
sync'd. 
 
I'm getting conflicting results from benchmark tests.
 
Summary:
 
netperf is fast
iperf is fast
bonnie++ is fast
dd|nc is extremely slow
drbd sync is extremely slow
 
Details:
 
 
Here's iperf... very fast...
 
 
[root at ha03 download]# iperf -t TCP_STREAM -c 2.1.1.2
------------------------------------------------------------
Client connecting to 2.1.1.2, TCP port 5001
TCP window size: 16.0 KByte (default)
------------------------------------------------------------
[  3] local 2.1.1.1 port 59472 connected with 2.1.1.2 port 5001
[ ID] Interval       Transfer     Bandwidth
[  3]  0.0-127.4 sec  13.9 GBytes    939 Mbits/sec

 
 
Now netperf... very fast...
 
 
 
[root at ha03 download]# netperf -H 2.1.1.2
TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 2.1.1.2
(2.1.1.2) port 0 AF_INET
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec
 87380  16384  16384    10.03     938.80

 
 
 
bonnie++ on one server... fast...
 
Version  1.03       ------Sequential Output------ --Sequential Input-
--Random-
                    -Per Chr- --Block-- -Rewrite- -Per Chr- --Block--
--Seeks--
Machine        Size K/sec %CP K/sec %CP K/sec %CP K/sec %CP K/sec %CP
/sec %CP
ha03.mycharts.md 6G 68212  90 187783  31 91553  17 73379  90 271470  28
1099   1
                    ------Sequential Create------ --------Random
Create--------
                    -Create-- --Read--- -Delete-- -Create-- --Read---
-Delete--
              files  /sec %CP  /sec %CP  /sec %CP  /sec %CP  /sec %CP
/sec %CP
                 16 +++++ +++ +++++ +++ +++++ +++ +++++ +++ +++++ +++
+++++ +++
ha03.mycharts.md,6G,68212,90,187783,31,91553,17,73379,90,271470,28,1098.
6,1,16,+++++,+++,+++++,+++,+++++,+++,+++++,+++,+++++,+++,+++
++,+++

 
bonnie++ on the other server... fast...
 
Version  1.03       ------Sequential Output------ --Sequential Input-
--Random-
                    -Per Chr- --Block-- -Rewrite- -Per Chr- --Block--
--Seeks--
Machine        Size K/sec %CP K/sec %CP K/sec %CP K/sec %CP K/sec %CP
/sec %CP
ha04.mycharts.md 6G 69619  91 191459  31 83074  16 72740  89 265057  27
964.9   1
                    ------Sequential Create------ --------Random
Create--------
                    -Create-- --Read--- -Delete-- -Create-- --Read---
-Delete--
              files  /sec %CP  /sec %CP  /sec %CP  /sec %CP  /sec %CP
/sec %CP
                 16 +++++ +++ +++++ +++ +++++ +++ +++++ +++ +++++ +++
+++++ +++
ha04.mycharts.md,6G,69619,91,191459,31,83074,16,72740,89,265057,27,964.9
,1,16,+++++,+++,+++++,+++,+++++,+++,+++++,+++,+++++,+++,++++
 
 
 
Now the dd command... very slow...
 
 
[root at ha03 download]# dd if=/dev/zero bs=1M count=100 | nc 2.1.1.2 10000
2+0 records in
1+0 records out
1048576 bytes (1.0 MB) copied, 12.7668 seconds, 82.1 kB/s

 
 
drbd stats... very slow...
 
[root at ha04 download]# cat /proc/drbd
version: 8.0.13 (api:86/proto:86)
GIT-hash: ee3ad77563d2e87171a3da17cc002ddfd1677dbe build by
buildsvn at c5-i386-build, 2008-08-07 13:42:56
 0: cs:Connected st:Secondary/Primary ds:UpToDate/UpToDate C r---
    ns:0 nr:196 dw:196 dr:0 al:0 bm:1 lo:0 pe:0 ua:0 ap:0
        resync: used:0/61 hits:9 misses:1 starving:0 dirty:0 changed:1
        act_log: used:0/257 hits:0 misses:0 starving:0 dirty:0 changed:0
 1: cs:SyncTarget st:Secondary/Primary ds:Inconsistent/UpToDate C r---
    ns:0 nr:89232960 dw:89232960 dr:0 al:0 bm:5446 lo:0 pe:0 ua:0 ap:0
        [=>..................] sync'ed: 13.8% (547719/634860)M
        finish: 389:29:20 speed: 320 (316) K/sec
        resync: used:0/61 hits:5571613 misses:5447 starving:0 dirty:0
changed:5447
        act_log: used:0/257 hits:0 misses:0 starving:0 dirty:0 changed:0

 
drbd.conf...
 

[root at ha04 download]# cat /etc/drbd.conf
# drbd.conf
 
global {
    usage-count no;
}
 

common {
  syncer {
    rate 33M;
    al-extents 257;
  }
}
 
resource mysql_ha {
  protocol C;
  handlers {
    pri-on-incon-degr "echo o > /proc/sysrq-trigger ; halt -f";
    pri-lost-after-sb "echo o > /proc/sysrq-trigger ; halt -f";
    local-io-error "echo o > /proc/sysrq-trigger ; halt -f";
    outdate-peer "/usr/lib/heartbeat/drbd-peer-outdater -t 5";
    #pri-lost "echo pri-lost. Have a look at the log files. | mail -s
'DRBD Alert' root";
    # split-brain "echo split-brain. drbdadm -- --discard-my-data
connect $DRBD_RESOURCE ? | mail -s 'DRBD Alert' admin at pmcipa.com";
    #out-of-sync "echo out-of-sync. drbdadm down $DRBD_RESOURCE. drbdadm
::::0 set-gi $DRBD_RESOURCE. drbdadm up $DRBD_RESOURCE. | mail -s 'DRBD
Alert' root";
  }
  startup {
    wfc-timeout  0;          # infinite
    degr-wfc-timeout 120;    # 2 minutes.
  }
  disk {
    on-io-error   detach;
  }
  net {
    cram-hmac-alg "sha1";
    shared-secret "NerfBurger23!";
    after-sb-0pri disconnect;
    after-sb-1pri disconnect;
    after-sb-2pri disconnect;
    rr-conflict disconnect;
    # data-integrity-alg "md5";
  }
  on ha03.mycharts.md {
    device     /dev/drbd0;
    disk       /dev/vg01/lv01;
    address    2.1.1.1:7788;
    meta-disk  internal;
  }
  on ha04.mycharts.md {
    device     /dev/drbd0;
    disk       /dev/vg01/lv01;
    address   2.1.1.2:7788;
    meta-disk  internal;
  }
}
 
resource ftp_ha {
  protocol C;
  handlers {
    pri-on-incon-degr "echo o > /proc/sysrq-trigger ; halt -f";
    pri-lost-after-sb "echo o > /proc/sysrq-trigger ; halt -f";
    local-io-error "echo o > /proc/sysrq-trigger ; halt -f";
    outdate-peer "/usr/lib/heartbeat/drbd-peer-outdater -t 5";
    #pri-lost "echo pri-lost. Have a look at the log files. | mail -s
'DRBD Alert' root";
    # split-brain "echo split-brain. drbdadm -- --discard-my-data
connect $DRBD_RESOURCE ? | mail -s 'DRBD Alert' admin at pmcipa.com";
    #out-of-sync "echo out-of-sync. drbdadm down $DRBD_RESOURCE. drbdadm
::::0 set-gi $DRBD_RESOURCE. drbdadm up $DRBD_RESOURCE. | mail -s 'DRBD
Alert' root";
  }
  startup {
    wfc-timeout  0;          # infinite
    degr-wfc-timeout 120;    # 2 minutes.
  }
  disk {
    on-io-error   detach;
  }
  net {
    cram-hmac-alg "sha1";
    shared-secret "NerfBurger23!";
    after-sb-0pri disconnect;
    after-sb-1pri disconnect;
    after-sb-2pri disconnect;
    rr-conflict disconnect;
    # data-integrity-alg "md5";
  }
  on ha03.mycharts.md {
    device     /dev/drbd1;
    disk       /dev/vg01/lv02;
    address    2.1.1.1:7789;
    meta-disk  internal;
  }
  on ha04.mycharts.md {
    device     /dev/drbd1;
    disk       /dev/vg01/lv02;
    address   2.1.1.2:7789;
    meta-disk  internal;
  }
}

 
 
 

--
Eric Robinson




Disclaimer - October 21, 2008 
This email and any files transmitted with it are confidential and intended solely for drbd-user at lists.linbit.com. If you are not the named addressee you should not disseminate, distribute, copy or alter this email. Any views or opinions presented in this email are solely those of the author and might not represent those of . Warning: Although  has taken reasonable precautions to ensure no viruses are present in this email, the company cannot accept responsibility for any loss or damage arising from the use of this email or attachments. 
This disclaimer was added by Policy Patrol: http://www.policypatrol.com/
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.linbit.com/pipermail/drbd-user/attachments/20081021/e2fbbf24/attachment.htm>


More information about the drbd-user mailing list