[PATCH 1/3] rdma: Fix kernel crash in dtr_create_rx_desc()

zhengbing.huang zhengbing.huang at easystack.cn
Wed Jul 9 04:55:50 CEST 2025


Have the crash info as follow:
BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
PGD 0
Oops: 0000 [#1] SMP NOPTI
CPU: 51 PID: 748 Comm: kworker/51:1 Kdump: loaded Tainted: G           OE    --------- -  - 4.18.0-372.19.1.es8_10.x86_64 #1
Hardware name: SuperCloud R5215 G13/R5215 G13, BIOS EG6.17.12 12/20/2024
Workqueue: events dtr_refill_rx_descs_work_fn [drbd_transport_rdma]
RIP: 0010:dtr_create_rx_desc+0xe1/0x310 [drbd_transport_rdma]
Code: 48 85 db 0f 84 85 01 00 00 48 89 5d 20 48 8b 0d e5 dd 4f c7 4c 89 7d 00 4c 8b 05 ea dd 4f c7 c7 45 18 00 00 00 00 48 8b 43 28 <8b> 00 89 45 34 48 8b 53 08 4c 89 f8 48 29 c8 48 8b 12 48 c1 f8 06
RSP: 0018:ff8baaf70e6a3e28 EFLAGS: 00010286
RAX: 0000000000000000 RBX: ff4b9894e7f1f800 RCX: ffdcf32780000000
RDX: 0000000000000002 RSI: ff8baaf70e6a3dc0 RDI: ff4b98b3edac2fa8
RBP: ff4b989488700280 R08: ff4b989340000000 R09: ff4b989488700280
R10: 0000000000001000 R11: 0000000000000009 R12: ff4b989e12a2b648
R13: ff4b989c5fbaca60 R14: 0000000000001000 R15: ffdcf328136d5100
FS:  0000000000000000(0000) GS:ff4b98b33fac0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000000 CR3: 00000015d3410004 CR4: 0000000000773ee0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400
PKRU: 55555554
Call Trace:
 __dtr_refill_rx_desc+0x5d/0xb0 [drbd_transport_rdma]
 process_one_work+0x1a7/0x360
 ? create_worker+0x1a0/0x1a0
 worker_thread+0x30/0x390
 ? create_worker+0x1a0/0x1a0
 kthread+0x10a/0x120
 ? set_kthread_struct+0x40/0x40
 ret_from_fork+0x1f/0x40

(gdb) l *dtr_create_rx_desc+0xe1
0x1e01 is in dtr_create_rx_desc (/.../drbd_transport_rdma.c:2093).
2088                    goto out;
2089            }
2090            rx_desc->cm = cm;
2091            rx_desc->page = page;
2092            rx_desc->size = 0;
2093            rx_desc->sge.lkey = dtr_cm_to_lkey(cm);
2094            rx_desc->sge.addr = ib_dma_map_single(cm->id->device, page_address(page), alloc_size,
2095                                                  DMA_FROM_DEVICE);

static u32 dtr_cm_to_lkey(struct dtr_cm *cm)
{
	return cm->pd->local_dma_lkey;
}

It is safe to obtain cm through dtr_path_get_cm(), so cm is not a null pointer.

In the dtr_path_prepare() function, it first replaces the cm of the path.
After the replacement is successful,
it will alloc pd in the subsequent dtr_cm_alloc_rdma_res() function.

So if in the __dtr_refill_rx_desc() function,
the cm of the path is replaced with a cm that has no pd yet,
this problem will occur.

In the dtr_path_get_cm() function, check the cm status,
if it is not connected, return null.

Signed-off-by: zhengbing.huang <zhengbing.huang at easystack.cn>
---
 drbd/drbd_transport_rdma.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drbd/drbd_transport_rdma.c b/drbd/drbd_transport_rdma.c
index 14392a33b..442dd8e89 100644
--- a/drbd/drbd_transport_rdma.c
+++ b/drbd/drbd_transport_rdma.c
@@ -842,6 +842,20 @@ static struct dtr_cm *dtr_path_get_cm(struct dtr_path *path)
 {
 	struct dtr_cm *cm;
 
+	rcu_read_lock();
+	cm = __dtr_path_get_cm(path);
+	if (cm && cm->state != DSM_CONNECTED) {
+		kref_put(&cm->kref, dtr_destroy_cm);
+		cm = NULL;
+	}
+	rcu_read_unlock();
+	return cm;
+}
+
+static struct dtr_cm *dtr_path_get_cm_raw(struct dtr_path *path)
+{
+	struct dtr_cm *cm;
+
 	rcu_read_lock();
 	cm = __dtr_path_get_cm(path);
 	rcu_read_unlock();
@@ -2567,9 +2581,6 @@ static int _dtr_cm_alloc_rdma_res(struct dtr_cm *cm,
 		goto createqp_failed;
 	}
 
-	for (i = DATA_STREAM; i <= CONTROL_STREAM ; i++)
-		dtr_create_rx_desc(&path->flow[i], GFP_NOIO);
-
 	return 0;
 
 createqp_failed:
@@ -2756,7 +2767,7 @@ static void __dtr_disconnect_path(struct dtr_path *path)
 		break;
 	}
 
-	cm = dtr_path_get_cm(path);
+	cm = dtr_path_get_cm_raw(path);
 	if (!cm)
 		return;
 
-- 
2.43.0



More information about the drbd-dev mailing list