[PATCH] rdma: Fix multiple disk performance degradation

Philipp Reisner philipp.reisner at linbit.com
Wed Oct 16 18:48:26 CEST 2024


Hi Zhengbing,

My impression is that using the port number to distribute the IRQs is
a hack. What about allocating an array and tracking how many CMs got
assigned to each IRQ, and always using the IRQ number that is used by
the least number of other CMs?

best regards,
 Philipp

On Sat, Sep 14, 2024 at 8:12 AM zhengbing.huang
<zhengbing.huang at easystack.cn> wrote:
>
> In the performance test of rdma mode, we found that when
> two drbd disks were simultaneously subjected to high-pressure I/O write,
> the IOPS of each drbd disk would be reduced by half.
>
> The reason is that if the cq_attr.comp_vector parameter
> is not specified when rdma create send_cq and recv_cq,
> cq will be allocated to the same irq for processing.
> At this point, if multiple disks are stress tested at the same time,
> irq will not be processed in a timely manner
> and performance will decrease.
>
> The solution is to use the network port number as the index of the comp_vector,
> so that the cq of each disk can be distributed across different IRQs
>
> Signed-off-by: zhengbing.huang <zhengbing.huang at easystack.cn>
> ---
>  drbd/drbd_transport_rdma.c | 44 ++++++++++++++++++++++++++++++++++----
>  1 file changed, 40 insertions(+), 4 deletions(-)
>
> diff --git a/drbd/drbd_transport_rdma.c b/drbd/drbd_transport_rdma.c
> index 813787c28..8915e60d1 100644
> --- a/drbd/drbd_transport_rdma.c
> +++ b/drbd/drbd_transport_rdma.c
> @@ -2486,10 +2486,43 @@ static int dtr_init_flow(struct dtr_path *path, enum drbd_stream stream)
>         return err;
>  }
>
> +static int dtr_get_my_port(struct dtr_path *path)
> +{
> +       int port = 0;
> +       struct sockaddr_storage *addr = (struct sockaddr_storage *)&path->path.my_addr;
> +
> +       if (addr->ss_family == AF_INET6) {
> +               const struct sockaddr_in6 *v6a = (const struct sockaddr_in6 *)addr;
> +
> +               port = be16_to_cpu(v6a->sin6_port);
> +       } else /* AF_INET, AF_SSOCKS, AF_SDP */ {
> +               const struct sockaddr_in *v4a = (const struct sockaddr_in *)addr;
> +
> +               port = be16_to_cpu(v4a->sin_port);
> +       }
> +
> +       return port;
> +}
> +
> +static void dtr_get_comp_vectors(struct dtr_path *path, int cq_num, int *comp_vectors)
> +{
> +       int i;
> +       int tmp_comp_vector = dtr_get_my_port(path) * cq_num;
> +
> +       for (i = 0; i < cq_num; i++) {
> +               comp_vectors[i] = tmp_comp_vector + i;
> +       }
> +
> +       return;
> +}
> +
>  static int _dtr_cm_alloc_rdma_res(struct dtr_cm *cm,
>                                     enum dtr_alloc_rdma_res_causes *cause)
>  {
> -       int err, i, rx_descs_max = 0, tx_descs_max = 0;
> +       int err, i, cq_index = 0, rx_descs_max = 0, tx_descs_max = 0;
> +       int cq_num = 2; /* recv_cq and send_cq */
> +       int comp_vectors[2] = {0}; /* recv_cq and send_cq */
> +       struct ib_device *device = cm->id->device;
>         struct ib_cq_init_attr cq_attr = {};
>         struct dtr_path *path = cm->path;
>
> @@ -2504,16 +2537,18 @@ static int _dtr_cm_alloc_rdma_res(struct dtr_cm *cm,
>         /* in 4.9 ib_alloc_pd got the ability to specify flags as second param */
>         /* so far we don't use flags, but if we start using them, we have to be
>          * aware that the compat layer removes this parameter for old kernels */
> -       cm->pd = ib_alloc_pd(cm->id->device, 0);
> +       cm->pd = ib_alloc_pd(device, 0);
>         if (IS_ERR(cm->pd)) {
>                 *cause = IB_ALLOC_PD;
>                 err = PTR_ERR(cm->pd);
>                 goto pd_failed;
>         }
>
> +       dtr_get_comp_vectors(path, cq_num, comp_vectors);
>         /* create recv completion queue (CQ) */
>         cq_attr.cqe = rx_descs_max;
> -       cm->recv_cq = ib_create_cq(cm->id->device,
> +       cq_attr.comp_vector = comp_vectors[cq_index] % device->num_comp_vectors;
> +       cm->recv_cq = ib_create_cq(device,
>                         dtr_rx_cq_event_handler, NULL, cm,
>                         &cq_attr);
>         if (IS_ERR(cm->recv_cq)) {
> @@ -2524,7 +2559,8 @@ static int _dtr_cm_alloc_rdma_res(struct dtr_cm *cm,
>
>         /* create send completion queue (CQ) */
>         cq_attr.cqe = tx_descs_max;
> -       cm->send_cq = ib_create_cq(cm->id->device,
> +       cq_attr.comp_vector = comp_vectors[cq_index++] % device->num_comp_vectors;
> +       cm->send_cq = ib_create_cq(device,
>                         dtr_tx_cq_event_handler, NULL, cm,
>                         &cq_attr);
>         if (IS_ERR(cm->send_cq)) {
> --
> 2.17.1
>


More information about the drbd-dev mailing list