Note: "permalinks" may not be as permanent as we would like,
direct links of old sources may well be a few messages off.
Hello all,
I'm wondering about this message, which occured with drbd 0.6.13 running
with original kernel 2.4.27 on a XSeries 235 machine with serveraid 5,
broadcom gigabit ethernet (bcm5700) during copying datas to /dev/nb16.
The machine has 512 MB RAM and 1024 MB cache.
What does this mean:
Oct 29 05:30:20 FAGINTSC kernel: drbd16: Epoch set size wrong!!found=1061
reported=1060
It is interesting, that directly after that message, the whole machine
crashed. In /var/log/messages, I can find the following:
~> ksymoops -m /usr/src/linux-2.4.27/System.map oops
ksymoops 2.4.5 on i686 2.4.27. Options used
-V (default)
-k /proc/ksyms (default)
-l /proc/modules (default)
-o /lib/modules/2.4.27/ (default)
-m /usr/src/linux-2.4.27/System.map (specified)
Oct 29 05:30:29 FAGINTSC kernel: CPU: 0
Oct 29 05:30:29 FAGINTSC kernel: EIP: 0010:[<c0135400>] Not tainted
Using defaults from ksymoops -t elf32-i386 -a i386
Oct 29 05:30:29 FAGINTSC kernel: EFLAGS: 00010002
Oct 29 05:30:29 FAGINTSC kernel: eax: ffffffff ebx: dbcbd160 ecx:
00000001 edx: dffed600
Oct 29 05:30:29 FAGINTSC kernel: esi: c158a37c edi: 00001db7 ebp:
dffed6a4 esp: c15b9f30
Oct 29 05:30:29 FAGINTSC kernel: ds: 0018 es: 0018 ss: 0018
Oct 29 05:30:29 FAGINTSC kernel: Process kswapd (pid: 5, stackpage=c15b9000)
Oct 29 05:30:29 FAGINTSC kernel: Stack: 00000000 c1243108 c158a38c
c158a384 c15b8000 dffed600 00000000 00000008
Oct 29 05:30:29 FAGINTSC kernel: 00000000 00000000 00000000
00000020 000001d0 c028f69c c028f69c c01368ac
Oct 29 05:30:29 FAGINTSC kernel: c15b9f90 000001d0 0000003c
00000020 c0136952 c15b9f90 00000246 00000000
Oct 29 05:30:29 FAGINTSC kernel: Call Trace: [<c01368ac>] [<c0136952>]
[<c0136b0c>] [<c0136b78>] [<c0136cbd>]
Oct 29 05:30:29 FAGINTSC kernel: [<c0105000>] [<c010745e>] [<c0136c20>]
Oct 29 05:30:29 FAGINTSC kernel: Code: 8b 00 47 3b 44 24 08 75 f7 8b 5e 2c
89 fa 8b 46 4c 88 d9 d3
>>EIP; c0135400 <kmem_cache_reap+230/340> <=====
>>eax; ffffffff <END_OF_CODE+1f701d74/????>
>>ebx; dbcbd160 <_end+1b96ee68/204b3d68>
>>edx; dffed600 <_end+1fc9f308/204b3d68>
>>esi; c158a37c <_end+123c084/204b3d68>
>>edi; 00001db7 Before first symbol
>>ebp; dffed6a4 <_end+1fc9f3ac/204b3d68>
>>esp; c15b9f30 <_end+126bc38/204b3d68>
Trace; c01368ac <shrink_caches+1c/60>
Trace; c0136952 <try_to_free_pages_zone+62/f0>
Trace; c0136b0c <kswapd_balance_pgdat+6c/b0>
Trace; c0136b78 <kswapd_balance+28/40>
Trace; c0136cbd <kswapd+9d/b7>
Trace; c0105000 <_stext+0/0>
Trace; c010745e <arch_kernel_thread+2e/40>
Trace; c0136c20 <kswapd+0/b7>
Code; c0135400 <kmem_cache_reap+230/340>
00000000 <_EIP>:
Code; c0135400 <kmem_cache_reap+230/340> <=====
0: 8b 00 mov (%eax),%eax <=====
Code; c0135402 <kmem_cache_reap+232/340>
2: 47 inc %edi
Code; c0135403 <kmem_cache_reap+233/340>
3: 3b 44 24 08 cmp 0x8(%esp,1),%eax
Code; c0135407 <kmem_cache_reap+237/340>
7: 75 f7 jne 0 <_EIP>
Code; c0135409 <kmem_cache_reap+239/340>
9: 8b 5e 2c mov 0x2c(%esi),%ebx
Code; c013540c <kmem_cache_reap+23c/340>
c: 89 fa mov %edi,%edx
Code; c013540e <kmem_cache_reap+23e/340>
e: 8b 46 4c mov 0x4c(%esi),%eax
Code; c0135411 <kmem_cache_reap+241/340>
11: 88 d9 mov %bl,%cl
Code; c0135413 <kmem_cache_reap+243/340>
13: d3 00 roll %cl,(%eax)
drbd.conf:
global {
# use this if you want to define more resources later
# without reloading the module.
# by default we load the module with exactly as many devices
# as configured mentioned in this file.
minor_count=40
# this is for people who set up a drbd device via the
# loopback network interface or between two VMs on the same
# box, for testing/simulating/presentation
# otherwise it could trigger a run_tasq_queue deadlock.
# I'm not sure whether this deadlock can happen with two
# nodes, but it seems at least extremly unlikely; and since
# the io_hints boost performance, keep them enabled.
# disable_io_hints
}
[...]
resource drbd16 {
protocol=C
fsckcmd=/bin/true
disk {
# do-panic
disk-size = 10485760
}
net {
sync-nice = 18
sync-min = 4M
sync-max = 8M # maximal average syncer bandwidth
tl-size = 5000 # transfer log size, ensures strict write ordering
timeout = 60 # 0.1 seconds
connect-int = 10 # seconds
ping-int = 10 # seconds
sync-group = 16
ko-count = 5
}
on FAGINTSB {
device = /dev/nb16
disk = /dev/system/test_LogVol
address = 10.2.18.151
port = 7804
}
on FAGINTSC {
device = /dev/nb16
disk = /dev/system/test_LogVol
address = 10.2.18.150
port = 7804
}
}
Could you please help me, because drbd seems to crash my machine really
often (about once a day).
Kind regards,
Andreas Hartmann