<div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial">Hi <span style="white-space: pre-wrap">Philipp,</span><div><span style="white-space: pre-wrap"><br></span></div><div><span style="white-space: pre-wrap">This is indeed a better idea.</span></div><div><span style="white-space: pre-wrap">Make it happen.</span></div><div><span style="white-space: pre-wrap"><br></span></div><div>best regards,</div><div>zhengbing<br></div><div  style="position:relative;zoom:1"></div><br style="white-space: pre-wrap;"><pre>发件人：Philipp Reisner &lt;philipp.reisner@linbit.com&gt;

发送日期：2024-10-17 00:48:26

收件人："zhengbing.huang" &lt;zhengbing.huang@easystack.cn&gt;

抄送人：drbd-dev@lists.linbit.com

主题：Re: [PATCH] rdma: Fix multiple disk performance degradation&gt;Hi Zhengbing,

&gt;

&gt;My impression is that using the port number to distribute the IRQs is

&gt;a hack. What about allocating an array and tracking how many CMs got

&gt;assigned to each IRQ, and always using the IRQ number that is used by

&gt;the least number of other CMs?

&gt;

&gt;best regards,

&gt; Philipp

&gt;

&gt;On Sat, Sep 14, 2024 at 8:12 AM zhengbing.huang

&gt;&lt;zhengbing.huang@easystack.cn&gt; wrote:

&gt;&gt;

&gt;&gt; In the performance test of rdma mode, we found that when

&gt;&gt; two drbd disks were simultaneously subjected to high-pressure I/O write,

&gt;&gt; the IOPS of each drbd disk would be reduced by half.

&gt;&gt;

&gt;&gt; The reason is that if the cq_attr.comp_vector parameter

&gt;&gt; is not specified when rdma create send_cq and recv_cq,

&gt;&gt; cq will be allocated to the same irq for processing.

&gt;&gt; At this point, if multiple disks are stress tested at the same time,

&gt;&gt; irq will not be processed in a timely manner

&gt;&gt; and performance will decrease.

&gt;&gt;

&gt;&gt; The solution is to use the network port number as the index of the comp_vector,

&gt;&gt; so that the cq of each disk can be distributed across different IRQs

&gt;&gt;

&gt;&gt; Signed-off-by: zhengbing.huang &lt;zhengbing.huang@easystack.cn&gt;

&gt;&gt; ---

&gt;&gt;  drbd/drbd_transport_rdma.c | 44 ++++++++++++++++++++++++++++++++++----

&gt;&gt;  1 file changed, 40 insertions(+), 4 deletions(-)

&gt;&gt;

&gt;&gt; diff --git a/drbd/drbd_transport_rdma.c b/drbd/drbd_transport_rdma.c

&gt;&gt; index 813787c28..8915e60d1 100644

&gt;&gt; --- a/drbd/drbd_transport_rdma.c

&gt;&gt; +++ b/drbd/drbd_transport_rdma.c

&gt;&gt; @@ -2486,10 +2486,43 @@ static int dtr_init_flow(struct dtr_path *path, enum drbd_stream stream)

&gt;&gt;         return err;

&gt;&gt;  }

&gt;&gt;

&gt;&gt; +static int dtr_get_my_port(struct dtr_path *path)

&gt;&gt; +{

&gt;&gt; +       int port = 0;

&gt;&gt; +       struct sockaddr_storage *addr = (struct sockaddr_storage *)&amp;path-&gt;path.my_addr;

&gt;&gt; +

&gt;&gt; +       if (addr-&gt;ss_family == AF_INET6) {

&gt;&gt; +               const struct sockaddr_in6 *v6a = (const struct sockaddr_in6 *)addr;

&gt;&gt; +

&gt;&gt; +               port = be16_to_cpu(v6a-&gt;sin6_port);

&gt;&gt; +       } else /* AF_INET, AF_SSOCKS, AF_SDP */ {

&gt;&gt; +               const struct sockaddr_in *v4a = (const struct sockaddr_in *)addr;

&gt;&gt; +

&gt;&gt; +               port = be16_to_cpu(v4a-&gt;sin_port);

&gt;&gt; +       }

&gt;&gt; +

&gt;&gt; +       return port;

&gt;&gt; +}

&gt;&gt; +

&gt;&gt; +static void dtr_get_comp_vectors(struct dtr_path *path, int cq_num, int *comp_vectors)

&gt;&gt; +{

&gt;&gt; +       int i;

&gt;&gt; +       int tmp_comp_vector = dtr_get_my_port(path) * cq_num;

&gt;&gt; +

&gt;&gt; +       for (i = 0; i &lt; cq_num; i++) {

&gt;&gt; +               comp_vectors[i] = tmp_comp_vector + i;

&gt;&gt; +       }

&gt;&gt; +

&gt;&gt; +       return;

&gt;&gt; +}

&gt;&gt; +

&gt;&gt;  static int _dtr_cm_alloc_rdma_res(struct dtr_cm *cm,

&gt;&gt;                                     enum dtr_alloc_rdma_res_causes *cause)

&gt;&gt;  {

&gt;&gt; -       int err, i, rx_descs_max = 0, tx_descs_max = 0;

&gt;&gt; +       int err, i, cq_index = 0, rx_descs_max = 0, tx_descs_max = 0;

&gt;&gt; +       int cq_num = 2; /* recv_cq and send_cq */

&gt;&gt; +       int comp_vectors[2] = {0}; /* recv_cq and send_cq */

&gt;&gt; +       struct ib_device *device = cm-&gt;id-&gt;device;

&gt;&gt;         struct ib_cq_init_attr cq_attr = {};

&gt;&gt;         struct dtr_path *path = cm-&gt;path;

&gt;&gt;

&gt;&gt; @@ -2504,16 +2537,18 @@ static int _dtr_cm_alloc_rdma_res(struct dtr_cm *cm,

&gt;&gt;         /* in 4.9 ib_alloc_pd got the ability to specify flags as second param */

&gt;&gt;         /* so far we don't use flags, but if we start using them, we have to be

&gt;&gt;          * aware that the compat layer removes this parameter for old kernels */

&gt;&gt; -       cm-&gt;pd = ib_alloc_pd(cm-&gt;id-&gt;device, 0);

&gt;&gt; +       cm-&gt;pd = ib_alloc_pd(device, 0);

&gt;&gt;         if (IS_ERR(cm-&gt;pd)) {

&gt;&gt;                 *cause = IB_ALLOC_PD;

&gt;&gt;                 err = PTR_ERR(cm-&gt;pd);

&gt;&gt;                 goto pd_failed;

&gt;&gt;         }

&gt;&gt;

&gt;&gt; +       dtr_get_comp_vectors(path, cq_num, comp_vectors);

&gt;&gt;         /* create recv completion queue (CQ) */

&gt;&gt;         cq_attr.cqe = rx_descs_max;

&gt;&gt; -       cm-&gt;recv_cq = ib_create_cq(cm-&gt;id-&gt;device,

&gt;&gt; +       cq_attr.comp_vector = comp_vectors[cq_index] % device-&gt;num_comp_vectors;

&gt;&gt; +       cm-&gt;recv_cq = ib_create_cq(device,

&gt;&gt;                         dtr_rx_cq_event_handler, NULL, cm,

&gt;&gt;                         &amp;cq_attr);

&gt;&gt;         if (IS_ERR(cm-&gt;recv_cq)) {

&gt;&gt; @@ -2524,7 +2559,8 @@ static int _dtr_cm_alloc_rdma_res(struct dtr_cm *cm,

&gt;&gt;

&gt;&gt;         /* create send completion queue (CQ) */

&gt;&gt;         cq_attr.cqe = tx_descs_max;

&gt;&gt; -       cm-&gt;send_cq = ib_create_cq(cm-&gt;id-&gt;device,

&gt;&gt; +       cq_attr.comp_vector = comp_vectors[cq_index++] % device-&gt;num_comp_vectors;

&gt;&gt; +       cm-&gt;send_cq = ib_create_cq(device,

&gt;&gt;                         dtr_tx_cq_event_handler, NULL, cm,

&gt;&gt;                         &amp;cq_attr);

&gt;&gt;         if (IS_ERR(cm-&gt;send_cq)) {

&gt;&gt; --

&gt;&gt; 2.17.1

&gt;&gt;

</pre></div><br>