[DRBD-user] Kernel panic when using drbd together with dlm

Mon Aug 30 16:25:45 CEST 2010

Hi all,

Further analysis show the following:

crash> dis -lr vfs_ioctl+0x32
/usr/src/debug/kernel-2.6.34.fc13/linux-2.6.34.x86_64/fs/ioctl.c: 39
0xffffffff8111a719 <vfs_ioctl>: push   %rbp
0xffffffff8111a71a <vfs_ioctl+1>:       mov    %rsp,%rbp
0xffffffff8111a71d <vfs_ioctl+4>:       push   %r13
0xffffffff8111a71f <vfs_ioctl+6>:       push   %r12
0xffffffff8111a721 <vfs_ioctl+8>:       push   %rbx
0xffffffff8111a722 <vfs_ioctl+9>:       sub    $0x8,%rsp
0xffffffff8111a726 <vfs_ioctl+13>:      nopl   0x0(%rax,%rax,1)
0xffffffff8111a72b <vfs_ioctl+18>:      mov    %rdx,%r13
/usr/src/debug/kernel-2.6.34.fc13/linux-2.6.34.x86_64/fs/ioctl.c: 42
0xffffffff8111a72e <vfs_ioctl+21>:      mov    0x20(%rdi),%rdx
/usr/src/debug/kernel-2.6.34.fc13/linux-2.6.34.x86_64/fs/ioctl.c: 39
0xffffffff8111a732 <vfs_ioctl+25>:      mov    %rdi,%rbx
0xffffffff8111a735 <vfs_ioctl+28>:      mov    %esi,%r12d
/usr/src/debug/kernel-2.6.34.fc13/linux-2.6.34.x86_64/fs/ioctl.c: 42
0xffffffff8111a738 <vfs_ioctl+31>:      test   %rdx,%rdx
0xffffffff8111a73b <vfs_ioctl+34>:      je     0xffffffff8111a7af
<vfs_ioctl+150>
/usr/src/debug/kernel-2.6.34.fc13/linux-2.6.34.x86_64/fs/ioctl.c: 45
0xffffffff8111a73d <vfs_ioctl+36>:      mov    0x48(%rdx),%rax
0xffffffff8111a741 <vfs_ioctl+40>:      test   %rax,%rax
0xffffffff8111a744 <vfs_ioctl+43>:      je     0xffffffff8111a75c
<vfs_ioctl+67>
/usr/src/debug/kernel-2.6.34.fc13/linux-2.6.34.x86_64/fs/ioctl.c: 46
0xffffffff8111a746 <vfs_ioctl+45>:      mov    %r13,%rdx
0xffffffff8111a749 <vfs_ioctl+48>:      callq  *%rax
0xffffffff8111a74b <vfs_ioctl+50>:      mov    %eax,%ebx

crash> dis -lr ffffffff8139460e
/usr/src/debug/kernel-2.6.34.fc13/linux-2.6.34.x86_64/net/socket.c: 927
0xffffffff813945ea <sock_ioctl>:        push   %rbp
0xffffffff813945eb <sock_ioctl+1>:      mov    %rsp,%rbp
0xffffffff813945ee <sock_ioctl+4>:      push   %r14
0xffffffff813945f0 <sock_ioctl+6>:      push   %r13
0xffffffff813945f2 <sock_ioctl+8>:      push   %r12
0xffffffff813945f4 <sock_ioctl+10>:     push   %rbx
0xffffffff813945f5 <sock_ioctl+11>:     sub    $0x10,%rsp
0xffffffff813945f9 <sock_ioctl+15>:     nopl   0x0(%rax,%rax,1)
/usr/src/debug/kernel-2.6.34.fc13/linux-2.6.34.x86_64/net/socket.c: 934
0xffffffff813945fe <sock_ioctl+20>:     mov    0xa0(%rdi),%r14
/usr/src/debug/kernel-2.6.34.fc13/linux-2.6.34.x86_64/net/socket.c: 927
0xffffffff81394605 <sock_ioctl+27>:     mov    %esi,%ebx
0xffffffff81394607 <sock_ioctl+29>:     mov    %rdx,%r13
/usr/src/debug/kernel-2.6.34.fc13/linux-2.6.34.x86_64/include/net/sock.h: 1601
0xffffffff8139460a <sock_ioctl+32>:     mov    0x38(%r14),%rax
0xffffffff8139460e <sock_ioctl+36>:     mov    0x40(%rax),%r12

This corresponds to
fs/ioctl.c
static long vfs_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
...
        if (filp->f_op->unlocked_ioctl) {
                error = filp->f_op->unlocked_ioctl(filp, cmd, arg);
...

socket.c:
static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
...
net = sock_net(sk);
...

include/net/sock.h:
static inline
struct net *sock_net(const struct sock *sk)
{
...
        return sk->sk_net;
...
}

>From what I understand, that means that drbdadm issues some ioctl at the
vfs level, which somehow leads to sock_ioctl() call which in turn fails
because it isn't supposed to be called for vfs. This could mean that
filp->f_op->unlocked_ioctl() is set incorrectly to sock_ioctl. And this
means that actual problem is somewhere in kernel, but is rises only
after some magic ioctl call from drbdadm.

Can anybody from DRBD devs look at this issue?

Best,
Vladislav