[Drbd-dev] Please test with CONFIG_PROVE_LOCKING=y
Tetsuo Handa
penguin-kernel at i-love.sakura.ne.jp
Thu Apr 25 11:30:05 CEST 2019
I found that simply doing
# mount /dev/drbd0 /mnt/
on the primary side causes a lockdep splat on the peer side.
[ 16.930719] drbd: loading out-of-tree module taints kernel.
[ 17.003845] drbd: initialized. Version: 9.0.17-1 (api:2/proto:86-114)
[ 17.003849] drbd: GIT-hash: 46e8e3fc31b651fb56ea38134dcf14382fc43000 build by root at localhost.localdomain, 2019-04-24 15:26:32
[ 17.003852] drbd: registered as block device major 147
[ 17.031036] drbd r0: Starting worker thread (from drbdsetup [8212])
[ 17.072004] drbd r0 alice: Starting sender thread (from drbdsetup [8218])
[ 17.094178] drbd r0/0 drbd0: disk( Diskless -> Attaching )
[ 17.094348] drbd r0/0 drbd0: Maximum number of peer devices = 1
[ 17.098582] drbd r0: Method to ensure write ordering: flush
[ 17.098635] drbd r0/0 drbd0: drbd_bm_resize called with capacity == 104852280
[ 17.101095] drbd r0/0 drbd0: resync bitmap: bits=13106535 words=204790 pages=400
[ 17.101101] drbd r0/0 drbd0: size = 50 GB (52426140 KB)
[ 17.101103] drbd r0/0 drbd0: size = 50 GB (52426140 KB)
[ 17.146454] drbd r0/0 drbd0: recounting of set bits took additional 1ms
[ 17.146474] drbd r0/0 drbd0: disk( Attaching -> UpToDate )
[ 17.146477] drbd r0/0 drbd0: attached to current UUID: 85033A04F8E8C728
[ 17.151754] drbd r0 alice: conn( StandAlone -> Unconnected )
[ 17.152160] drbd r0 alice: Starting receiver thread (from drbd_w_r0 [8213])
[ 17.153240] drbd r0 alice: conn( Unconnected -> Connecting )
[ 17.666117] drbd r0 alice: Handshake to peer 1 successful: Agreed network protocol version 114
[ 17.666130] drbd r0 alice: Feature flags enabled on protocol level: 0xf TRIM THIN_RESYNC WRITE_SAME WRITE_ZEROES.
[ 17.666297] drbd r0 alice: Starting ack_recv thread (from drbd_r_r0 [8237])
[ 17.683576] drbd r0: Preparing cluster-wide state change 2247594057 (0->1 499/146)
[ 17.684431] drbd r0: State change 2247594057: primary_nodes=0, weak_nodes=0
[ 17.684436] drbd r0: Committing cluster-wide state change 2247594057 (0ms)
[ 17.684450] drbd r0 alice: conn( Connecting -> Connected ) peer( Unknown -> Secondary )
[ 17.701460] drbd r0/0 drbd0 alice: drbd_sync_handshake:
[ 17.701471] drbd r0/0 drbd0 alice: self 85033A04F8E8C728:0000000000000000:B30C5F9909DD18EE:33CA8E1052CCCE58 bits:0 flags:20
[ 17.701473] drbd r0/0 drbd0 alice: peer 85033A04F8E8C728:0000000000000000:B30C5F9909DD18EE:33CA8E1052CCCE58 bits:0 flags:120
[ 17.701475] drbd r0/0 drbd0 alice: uuid_compare()=0 by rule 38
[ 17.701485] drbd r0/0 drbd0 alice: pdsk( DUnknown -> UpToDate ) repl( Off -> Established )
[ 22.910250] drbd r0 alice: Preparing remote state change 992752788
[ 22.914015] drbd r0 alice: Committing remote state change 992752788 (primary_nodes=2)
[ 22.914025] drbd r0 alice: peer( Secondary -> Primary )
[ 23.039882] ========================================================
[ 23.039906] WARNING: possible irq lock inversion dependency detected
[ 23.039931] 5.0.0 #891 Tainted: G O
[ 23.039950] --------------------------------------------------------
[ 23.039975] drbd_r_r0/8237 just changed the state of lock:
[ 23.039997] 000000007cc227b6 (&(&connection->epoch_lock)->rlock){+.+.}, at: receive_Data+0x36b/0x1ca0 [drbd]
[ 23.040049] but this lock was taken by another, SOFTIRQ-safe lock in the past:
[ 23.040115] (&(&resource->req_lock)->rlock){..-.}
[ 23.040117]
and interrupts could create inverse lock ordering between them.
[ 23.040176]
other info that might help us debug this:
[ 23.040200] Possible interrupt unsafe locking scenario:
[ 23.040225] CPU0 CPU1
[ 23.040243] ---- ----
[ 23.040260] lock(&(&connection->epoch_lock)->rlock);
[ 23.040281] local_irq_disable();
[ 23.040303] lock(&(&resource->req_lock)->rlock);
[ 23.040330] lock(&(&connection->epoch_lock)->rlock);
[ 23.040359] <Interrupt>
[ 23.040370] lock(&(&resource->req_lock)->rlock);
[ 23.040389]
*** DEADLOCK ***
[ 23.040412] no locks held by drbd_r_r0/8237.
[ 23.040429]
the shortest dependencies between 2nd lock and 1st lock:
[ 23.040459] -> (&(&resource->req_lock)->rlock){..-.} ops: 270 {
[ 23.041041] IN-SOFTIRQ-W at:
[ 23.041596] lock_acquire+0xd0/0x1f0
[ 23.042142] _raw_spin_lock_irqsave+0x43/0x80
[ 23.042700] drbd_request_endio+0x75/0x260 [drbd]
[ 23.043269] bio_endio+0x15f/0x310
[ 23.043810] blk_update_request+0xdf/0x3e0
[ 23.044331] scsi_end_request+0x2f/0x360
[ 23.044844] scsi_io_completion+0x89/0x5f0
[ 23.045372] scsi_finish_command+0xd4/0x120
[ 23.045885] scsi_softirq_done+0x140/0x160
[ 23.046386] blk_done_softirq+0x85/0xb0
[ 23.046849] __do_softirq+0xbe/0x455
[ 23.047301] irq_exit+0xb7/0xc0
[ 23.047734] smp_call_function_single_interrupt+0x16b/0x240
[ 23.048190] call_function_single_interrupt+0xf/0x20
[ 23.048654] _raw_spin_unlock_irq+0x29/0x40
[ 23.049107] finish_task_switch+0x91/0x250
[ 23.049551] __schedule+0x2a3/0x9b0
[ 23.049978] schedule_idle+0x27/0x40
[ 23.050353] do_idle+0x16e/0x2a0
[ 23.050739] cpu_startup_entry+0x18/0x20
[ 23.051106] rest_init+0x18c/0x260
[ 23.051457] arch_call_rest_init+0x9/0xb
[ 23.051832] start_kernel+0x497/0x4b8
[ 23.052257] x86_64_start_reservations+0x24/0x26
[ 23.052647] x86_64_start_kernel+0x6f/0x72
[ 23.053044] secondary_startup_64+0xa4/0xb0
[ 23.053395] INITIAL USE at:
[ 23.053759] lock_acquire+0xd0/0x1f0
[ 23.054125] _raw_spin_lock_irq+0x3e/0x80
[ 23.054502] drbd_create_device+0x682/0xb20 [drbd]
[ 23.054872] drbd_adm_new_minor+0x118/0x330 [drbd]
[ 23.055200] genl_family_rcv_msg+0x214/0x470
[ 23.055528] genl_rcv_msg+0x47/0x90
[ 23.055849] netlink_rcv_skb+0xcb/0x100
[ 23.056185] genl_rcv+0x23/0x40
[ 23.056492] netlink_unicast+0x17f/0x230
[ 23.056782] netlink_sendmsg+0x2d4/0x3d0
[ 23.057081] sock_sendmsg+0x33/0x40
[ 23.057362] sock_write_iter+0x83/0xf0
[ 23.057663] __vfs_write+0x117/0x1a0
[ 23.057955] vfs_write+0xc2/0x1d0
[ 23.058233] ksys_write+0x53/0xc0
[ 23.058520] __x64_sys_write+0x15/0x20
[ 23.058797] do_syscall_64+0x4a/0x180
[ 23.059199] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 23.059504] }
[ 23.059784] ... key at: [<ffffffffc07c9198>] __key.70128+0x0/0xfffffffffffe6e68 [drbd]
[ 23.060080] ... acquired at:
[ 23.060342] lock_acquire+0xd0/0x1f0
[ 23.060615] _raw_spin_lock+0x38/0x70
[ 23.060872] finish_state_change+0x987/0x17f0 [drbd]
[ 23.061146] ___end_state_change+0x166/0x4c0 [drbd]
[ 23.061400] __end_state_change+0x48/0xc0 [drbd]
[ 23.061679] change_cluster_wide_state+0x9ff/0x1290 [drbd]
[ 23.061954] change_cstate_es+0x88/0xc0 [drbd]
[ 23.062217] connect_work+0xf1/0x290 [drbd]
[ 23.062508] drbd_sender+0x1c0/0x500 [drbd]
[ 23.062771] drbd_thread_setup+0x12b/0x350 [drbd]
[ 23.063044] kthread+0x10b/0x140
[ 23.063301] ret_from_fork+0x24/0x30
[ 23.063833] -> (&(&connection->epoch_lock)->rlock){+.+.} ops: 6 {
[ 23.064118] HARDIRQ-ON-W at:
[ 23.064395] lock_acquire+0xd0/0x1f0
[ 23.064666] _raw_spin_lock+0x38/0x70
[ 23.064947] receive_Data+0x36b/0x1ca0 [drbd]
[ 23.065197] drbd_receiver+0x534/0xe20 [drbd]
[ 23.065456] drbd_thread_setup+0x12b/0x350 [drbd]
[ 23.065699] kthread+0x10b/0x140
[ 23.066018] ret_from_fork+0x24/0x30
[ 23.066269] SOFTIRQ-ON-W at:
[ 23.066523] lock_acquire+0xd0/0x1f0
[ 23.066768] _raw_spin_lock+0x38/0x70
[ 23.067042] receive_Data+0x36b/0x1ca0 [drbd]
[ 23.067295] drbd_receiver+0x534/0xe20 [drbd]
[ 23.067563] drbd_thread_setup+0x12b/0x350 [drbd]
[ 23.067815] kthread+0x10b/0x140
[ 23.068080] ret_from_fork+0x24/0x30
[ 23.068331] INITIAL USE at:
[ 23.068605] lock_acquire+0xd0/0x1f0
[ 23.068960] _raw_spin_lock+0x38/0x70
[ 23.069323] finish_state_change+0x987/0x17f0 [drbd]
[ 23.069795] ___end_state_change+0x166/0x4c0 [drbd]
[ 23.070077] __end_state_change+0x48/0xc0 [drbd]
[ 23.070323] change_cluster_wide_state+0x9ff/0x1290 [drbd]
[ 23.070591] change_cstate_es+0x88/0xc0 [drbd]
[ 23.070857] connect_work+0xf1/0x290 [drbd]
[ 23.071106] drbd_sender+0x1c0/0x500 [drbd]
[ 23.071369] drbd_thread_setup+0x12b/0x350 [drbd]
[ 23.071616] kthread+0x10b/0x140
[ 23.071874] ret_from_fork+0x24/0x30
[ 23.072117] }
[ 23.072367] ... key at: [<ffffffffc07c9130>] __key.70143+0x0/0xfffffffffffe6ed0 [drbd]
[ 23.072630] ... acquired at:
[ 23.072955] mark_lock+0x190/0x2a0
[ 23.073205] __lock_acquire+0x270/0x1220
[ 23.073464] lock_acquire+0xd0/0x1f0
[ 23.073702] _raw_spin_lock+0x38/0x70
[ 23.073950] receive_Data+0x36b/0x1ca0 [drbd]
[ 23.074185] drbd_receiver+0x534/0xe20 [drbd]
[ 23.074430] drbd_thread_setup+0x12b/0x350 [drbd]
[ 23.074665] kthread+0x10b/0x140
[ 23.074904] ret_from_fork+0x24/0x30
[ 23.075367]
stack backtrace:
[ 23.075812] CPU: 0 PID: 8237 Comm: drbd_r_r0 Tainted: G O 5.0.0 #891
[ 23.076042] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/13/2018
[ 23.076763] Call Trace:
[ 23.077015] dump_stack+0x67/0x95
[ 23.077259] print_irq_inversion_bug.part.37+0x1ac/0x1b8
[ 23.077516] check_usage_backwards+0x12b/0x140
[ 23.077758] ? print_shortest_lock_dependencies+0x210/0x210
[ 23.078023] mark_lock+0x190/0x2a0
[ 23.078267] __lock_acquire+0x270/0x1220
[ 23.078528] lock_acquire+0xd0/0x1f0
[ 23.078813] ? receive_Data+0x36b/0x1ca0 [drbd]
[ 23.079075] _raw_spin_lock+0x38/0x70
[ 23.079340] ? receive_Data+0x36b/0x1ca0 [drbd]
[ 23.079590] receive_Data+0x36b/0x1ca0 [drbd]
[ 23.079970] ? dtt_recv+0xef/0x280 [drbd_transport_tcp]
[ 23.080221] ? drbd_recv+0x3e/0x420 [drbd]
[ 23.080497] drbd_receiver+0x534/0xe20 [drbd]
[ 23.080746] ? drbd_receiver+0x30c/0xe20 [drbd]
[ 23.081008] ? sched_clock+0x9/0x10
[ 23.081269] drbd_thread_setup+0x12b/0x350 [drbd]
[ 23.081520] kthread+0x10b/0x140
[ 23.081786] ? _drbd_send_bio+0x240/0x240 [drbd]
[ 23.082032] ? kthread_cancel_delayed_work_sync+0x10/0x10
[ 23.082286] ret_from_fork+0x24/0x30
Although making below change seems to solve the lockdep splat,
I can't check the correctness because I don't know how drbd works.
Please test with CONFIG_PROVE_LOCKING=y and fix.
---
drbd/drbd_receiver.c | 24 ++++++++++++++----------
drbd/drbd_state.c | 5 +++--
2 files changed, 17 insertions(+), 12 deletions(-)
diff --git a/drbd/drbd_receiver.c b/drbd/drbd_receiver.c
index ccbaad9f..24d1733b 100644
--- a/drbd/drbd_receiver.c
+++ b/drbd/drbd_receiver.c
@@ -86,11 +86,12 @@ static void destroy_peer_ack_req(struct kref *kref);
static struct drbd_epoch *previous_epoch(struct drbd_connection *connection, struct drbd_epoch *epoch)
{
struct drbd_epoch *prev;
- spin_lock(&connection->epoch_lock);
+ unsigned long flags;
+ spin_lock_irqsave(&connection->epoch_lock, flags);
prev = list_entry(epoch->list.prev, struct drbd_epoch, list);
if (prev == epoch || prev == connection->current_epoch)
prev = NULL;
- spin_unlock(&connection->epoch_lock);
+ spin_unlock_irqrestore(&connection->epoch_lock, flags);
return prev;
}
@@ -1189,8 +1190,9 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connectio
int schedule_flush = 0;
enum finish_epoch rv = FE_STILL_LIVE;
struct drbd_resource *resource = connection->resource;
+ unsigned long flags;
- spin_lock(&connection->epoch_lock);
+ spin_lock_irqsave(&connection->epoch_lock, flags);
do {
next_epoch = NULL;
finish = 0;
@@ -1241,9 +1243,9 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connectio
if (!(ev & EV_CLEANUP)) {
/* adjust for nr requests already confirmed via P_CONFIRM_STABLE, if any. */
epoch_size -= atomic_read(&epoch->confirmed);
- spin_unlock(&connection->epoch_lock);
+ spin_unlock_irqrestore(&connection->epoch_lock, flags);
drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
- spin_lock(&connection->epoch_lock);
+ spin_lock_irqsave(&connection->epoch_lock, flags);
}
#if 0
/* FIXME: dec unacked on connection, once we have
@@ -1278,7 +1280,7 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connectio
epoch = next_epoch;
} while (1);
- spin_unlock(&connection->epoch_lock);
+ spin_unlock_irqrestore(&connection->epoch_lock, flags);
if (schedule_flush) {
struct flush_work *fw;
@@ -1808,6 +1810,7 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
int rv, issue_flush;
struct p_barrier *p = pi->data;
struct drbd_epoch *epoch;
+ unsigned long flags;
tr_ops->hint(&connection->transport, DATA_STREAM, QUICKACK);
drbd_unplug_all_devices(connection);
@@ -1865,7 +1868,7 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
return 0;
}
- spin_lock(&connection->epoch_lock);
+ spin_lock_irqsave(&connection->epoch_lock, flags);
if (atomic_read(&connection->current_epoch->epoch_size)) {
list_add(&epoch->list, &connection->current_epoch->list);
connection->current_epoch = epoch;
@@ -1874,7 +1877,7 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
/* The current_epoch got recycled while we allocated this one... */
kfree(epoch);
}
- spin_unlock(&connection->epoch_lock);
+ spin_unlock_irqrestore(&connection->epoch_lock, flags);
return 0;
}
@@ -2784,6 +2787,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
struct drbd_peer_request_details d;
int op, op_flags;
int err, tp;
+ unsigned long flags;
peer_device = conn_peer_device(connection, pi->vnr);
if (!peer_device)
@@ -2883,7 +2887,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
*/
op_flags &= ~DRBD_REQ_HARDBARRIER;
- spin_lock(&connection->epoch_lock);
+ spin_lock_irqsave(&connection->epoch_lock, flags);
peer_req->epoch = connection->current_epoch;
atomic_inc(&peer_req->epoch->epoch_size);
atomic_inc(&peer_req->epoch->active);
@@ -2911,7 +2915,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
}
}
}
- spin_unlock(&connection->epoch_lock);
+ spin_unlock_irqrestore(&connection->epoch_lock, flags);
rcu_read_lock();
nc = rcu_dereference(connection->transport.net_conf);
diff --git a/drbd/drbd_state.c b/drbd/drbd_state.c
index 7b1deda2..52a29461 100644
--- a/drbd/drbd_state.c
+++ b/drbd/drbd_state.c
@@ -2430,11 +2430,12 @@ static void finish_state_change(struct drbd_resource *resource, struct completio
if (connection->epochs != 1)
some_peer_request_in_flight = true;
else if (!some_peer_request_in_flight) {
- spin_lock(&connection->epoch_lock);
+ unsigned long flags;
+ spin_lock_irqsave(&connection->epoch_lock, flags);
if (connection->epochs != 1 ||
atomic_read(&connection->current_epoch->epoch_size) != 0)
some_peer_request_in_flight = true;
- spin_unlock(&connection->epoch_lock);
+ spin_unlock_irqrestore(&connection->epoch_lock, flags);
}
break;
case C_STANDALONE:
--
2.16.5
More information about the drbd-dev
mailing list