IB/iser: Add more RX CQs to scale out processing of SCSI responses
authorAlex Tabachnik <alext@mellanox.com>
Sun, 23 Sep 2012 15:17:44 +0000 (15:17 +0000)
committerRoland Dreier <roland@purestorage.com>
Thu, 4 Oct 2012 04:26:49 +0000 (21:26 -0700)
RX/TX CQs will now be selected from a per HCA pool.  For the RX flow
this has the effect of using different interrupt vectors when using
low level drivers (such as mlx4) that map the "vector" param provided
by the ULP on CQ creation to a dedicated IRQ/MSI-X vector.  This
allows the RX flow processing of IO responses to be distributed across
multiple CPUs.

QPs (--> iSER sessions) are assigned to CQs in round robin order using
the CQ with the minimum number of sessions attached to it.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Alex Tabachnik <alext@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
drivers/infiniband/ulp/iser/iscsi_iser.h
drivers/infiniband/ulp/iser/iser_verbs.c

index 296be431a0e93b773b5618efd0b07d018d187b3b..ef7d3be46c316b20361fa1e406c1cae38177e366 100644 (file)
@@ -177,6 +177,7 @@ struct iser_data_buf {
 
 /* fwd declarations */
 struct iser_device;
+struct iser_cq_desc;
 struct iscsi_iser_conn;
 struct iscsi_iser_task;
 struct iscsi_endpoint;
@@ -226,16 +227,21 @@ struct iser_rx_desc {
        char                         pad[ISER_RX_PAD_SIZE];
 } __attribute__((packed));
 
+#define ISER_MAX_CQ 4
+
 struct iser_device {
        struct ib_device             *ib_device;
        struct ib_pd                 *pd;
-       struct ib_cq                 *rx_cq;
-       struct ib_cq                 *tx_cq;
+       struct ib_cq                 *rx_cq[ISER_MAX_CQ];
+       struct ib_cq                 *tx_cq[ISER_MAX_CQ];
        struct ib_mr                 *mr;
-       struct tasklet_struct        cq_tasklet;
+       struct tasklet_struct        cq_tasklet[ISER_MAX_CQ];
        struct ib_event_handler      event_handler;
        struct list_head             ig_list; /* entry in ig devices list */
        int                          refcount;
+       int                          cq_active_qps[ISER_MAX_CQ];
+       int                          cqs_used;
+       struct iser_cq_desc          *cq_desc;
 };
 
 struct iser_conn {
@@ -287,6 +293,11 @@ struct iser_page_vec {
        int data_size;
 };
 
+struct iser_cq_desc {
+       struct iser_device           *device;
+       int                          cq_index;
+};
+
 struct iser_global {
        struct mutex      device_list_mutex;/*                   */
        struct list_head  device_list;       /* all iSER devices */
index 2dddabd8fcf9882fe93f59f9dcbf5faaf862f016..95a49affee44cbaa8223da2943722eac43d09300 100644 (file)
@@ -70,32 +70,50 @@ static void iser_event_handler(struct ib_event_handler *handler,
  */
 static int iser_create_device_ib_res(struct iser_device *device)
 {
+       int i, j;
+       struct iser_cq_desc *cq_desc;
+
+       device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors);
+       iser_err("using %d CQs, device %s supports %d vectors\n", device->cqs_used,
+                device->ib_device->name, device->ib_device->num_comp_vectors);
+
+       device->cq_desc = kmalloc(sizeof(struct iser_cq_desc) * device->cqs_used,
+                                 GFP_KERNEL);
+       if (device->cq_desc == NULL)
+               goto cq_desc_err;
+       cq_desc = device->cq_desc;
+
        device->pd = ib_alloc_pd(device->ib_device);
        if (IS_ERR(device->pd))
                goto pd_err;
 
-       device->rx_cq = ib_create_cq(device->ib_device,
-                                 iser_cq_callback,
-                                 iser_cq_event_callback,
-                                 (void *)device,
-                                 ISER_MAX_RX_CQ_LEN, 0);
-       if (IS_ERR(device->rx_cq))
-               goto rx_cq_err;
+       for (i = 0; i < device->cqs_used; i++) {
+               cq_desc[i].device   = device;
+               cq_desc[i].cq_index = i;
+
+               device->rx_cq[i] = ib_create_cq(device->ib_device,
+                                         iser_cq_callback,
+                                         iser_cq_event_callback,
+                                         (void *)&cq_desc[i],
+                                         ISER_MAX_RX_CQ_LEN, i);
+               if (IS_ERR(device->rx_cq[i]))
+                       goto cq_err;
 
-       device->tx_cq = ib_create_cq(device->ib_device,
-                                 NULL, iser_cq_event_callback,
-                                 (void *)device,
-                                 ISER_MAX_TX_CQ_LEN, 0);
+               device->tx_cq[i] = ib_create_cq(device->ib_device,
+                                         NULL, iser_cq_event_callback,
+                                         (void *)&cq_desc[i],
+                                         ISER_MAX_TX_CQ_LEN, i);
 
-       if (IS_ERR(device->tx_cq))
-               goto tx_cq_err;
+               if (IS_ERR(device->tx_cq[i]))
+                       goto cq_err;
 
-       if (ib_req_notify_cq(device->rx_cq, IB_CQ_NEXT_COMP))
-               goto cq_arm_err;
+               if (ib_req_notify_cq(device->rx_cq[i], IB_CQ_NEXT_COMP))
+                       goto cq_err;
 
-       tasklet_init(&device->cq_tasklet,
-                    iser_cq_tasklet_fn,
-                    (unsigned long)device);
+               tasklet_init(&device->cq_tasklet[i],
+                            iser_cq_tasklet_fn,
+                       (unsigned long)&cq_desc[i]);
+       }
 
        device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
                                   IB_ACCESS_REMOTE_WRITE |
@@ -113,14 +131,19 @@ static int iser_create_device_ib_res(struct iser_device *device)
 handler_err:
        ib_dereg_mr(device->mr);
 dma_mr_err:
-       tasklet_kill(&device->cq_tasklet);
-cq_arm_err:
-       ib_destroy_cq(device->tx_cq);
-tx_cq_err:
-       ib_destroy_cq(device->rx_cq);
-rx_cq_err:
+       for (j = 0; j < device->cqs_used; j++)
+               tasklet_kill(&device->cq_tasklet[j]);
+cq_err:
+       for (j = 0; j < i; j++) {
+               if (device->tx_cq[j])
+                       ib_destroy_cq(device->tx_cq[j]);
+               if (device->rx_cq[j])
+                       ib_destroy_cq(device->rx_cq[j]);
+       }
        ib_dealloc_pd(device->pd);
 pd_err:
+       kfree(device->cq_desc);
+cq_desc_err:
        iser_err("failed to allocate an IB resource\n");
        return -1;
 }
@@ -131,18 +154,24 @@ pd_err:
  */
 static void iser_free_device_ib_res(struct iser_device *device)
 {
+       int i;
        BUG_ON(device->mr == NULL);
 
-       tasklet_kill(&device->cq_tasklet);
+       for (i = 0; i < device->cqs_used; i++) {
+               tasklet_kill(&device->cq_tasklet[i]);
+               (void)ib_destroy_cq(device->tx_cq[i]);
+               (void)ib_destroy_cq(device->rx_cq[i]);
+               device->tx_cq[i] = NULL;
+               device->rx_cq[i] = NULL;
+       }
+
        (void)ib_unregister_event_handler(&device->event_handler);
        (void)ib_dereg_mr(device->mr);
-       (void)ib_destroy_cq(device->tx_cq);
-       (void)ib_destroy_cq(device->rx_cq);
        (void)ib_dealloc_pd(device->pd);
 
+       kfree(device->cq_desc);
+
        device->mr = NULL;
-       device->tx_cq = NULL;
-       device->rx_cq = NULL;
        device->pd = NULL;
 }
 
@@ -157,6 +186,7 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
        struct ib_qp_init_attr  init_attr;
        int                     req_err, resp_err, ret = -ENOMEM;
        struct ib_fmr_pool_param params;
+       int index, min_index = 0;
 
        BUG_ON(ib_conn->device == NULL);
 
@@ -220,10 +250,20 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
 
        memset(&init_attr, 0, sizeof init_attr);
 
+       mutex_lock(&ig.connlist_mutex);
+       /* select the CQ with the minimal number of usages */
+       for (index = 0; index < device->cqs_used; index++)
+               if (device->cq_active_qps[index] <
+                   device->cq_active_qps[min_index])
+                       min_index = index;
+       device->cq_active_qps[min_index]++;
+       mutex_unlock(&ig.connlist_mutex);
+       iser_err("cq index %d used for ib_conn %p\n", min_index, ib_conn);
+
        init_attr.event_handler = iser_qp_event_callback;
        init_attr.qp_context    = (void *)ib_conn;
-       init_attr.send_cq       = device->tx_cq;
-       init_attr.recv_cq       = device->rx_cq;
+       init_attr.send_cq       = device->tx_cq[min_index];
+       init_attr.recv_cq       = device->rx_cq[min_index];
        init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS;
        init_attr.cap.max_recv_wr  = ISER_QP_MAX_RECV_DTOS;
        init_attr.cap.max_send_sge = 2;
@@ -252,6 +292,7 @@ out_err:
  */
 static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id)
 {
+       int cq_index;
        BUG_ON(ib_conn == NULL);
 
        iser_err("freeing conn %p cma_id %p fmr pool %p qp %p\n",
@@ -262,9 +303,12 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id)
        if (ib_conn->fmr_pool != NULL)
                ib_destroy_fmr_pool(ib_conn->fmr_pool);
 
-       if (ib_conn->qp != NULL)
-               rdma_destroy_qp(ib_conn->cma_id);
+       if (ib_conn->qp != NULL) {
+               cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index;
+               ib_conn->device->cq_active_qps[cq_index]--;
 
+               rdma_destroy_qp(ib_conn->cma_id);
+       }
        /* if cma handler context, the caller acts s.t the cma destroy the id */
        if (ib_conn->cma_id != NULL && can_destroy_id)
                rdma_destroy_id(ib_conn->cma_id);
@@ -791,9 +835,9 @@ static void iser_handle_comp_error(struct iser_tx_desc *desc,
        }
 }
 
-static int iser_drain_tx_cq(struct iser_device  *device)
+static int iser_drain_tx_cq(struct iser_device  *device, int cq_index)
 {
-       struct ib_cq  *cq = device->tx_cq;
+       struct ib_cq  *cq = device->tx_cq[cq_index];
        struct ib_wc  wc;
        struct iser_tx_desc *tx_desc;
        struct iser_conn *ib_conn;
@@ -822,8 +866,10 @@ static int iser_drain_tx_cq(struct iser_device  *device)
 
 static void iser_cq_tasklet_fn(unsigned long data)
 {
-        struct iser_device  *device = (struct iser_device *)data;
-        struct ib_cq        *cq = device->rx_cq;
+       struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)data;
+       struct iser_device  *device = cq_desc->device;
+       int cq_index = cq_desc->cq_index;
+       struct ib_cq         *cq = device->rx_cq[cq_index];
         struct ib_wc        wc;
         struct iser_rx_desc *desc;
         unsigned long       xfer_len;
@@ -851,19 +897,21 @@ static void iser_cq_tasklet_fn(unsigned long data)
                }
                completed_rx++;
                if (!(completed_rx & 63))
-                       completed_tx += iser_drain_tx_cq(device);
+                       completed_tx += iser_drain_tx_cq(device, cq_index);
        }
        /* #warning "it is assumed here that arming CQ only once its empty" *
         * " would not cause interrupts to be missed"                       */
        ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 
-       completed_tx += iser_drain_tx_cq(device);
+       completed_tx += iser_drain_tx_cq(device, cq_index);
        iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);
 }
 
 static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
 {
-       struct iser_device  *device = (struct iser_device *)cq_context;
+       struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)cq_context;
+       struct iser_device  *device = cq_desc->device;
+       int cq_index = cq_desc->cq_index;
 
-       tasklet_schedule(&device->cq_tasklet);
+       tasklet_schedule(&device->cq_tasklet[cq_index]);
 }
This page took 0.036866 seconds and 5 git commands to generate.