aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorAlex Tabachnik <alext@mellanox.com>2012-09-23 11:17:44 -0400
committerRoland Dreier <roland@purestorage.com>2012-10-04 00:26:49 -0400
commit5a33a6694942bc86e487d00cd8feebeae5f14546 (patch)
treec12494add1644126fb8435e85bae72dd49269952 /drivers/infiniband
parent7a9a2970b5c1c2ce73d4bb84edaa7ebf13e0c841 (diff)
IB/iser: Add more RX CQs to scale out processing of SCSI responses
RX/TX CQs will now be selected from a per HCA pool. For the RX flow this has the effect of using different interrupt vectors when using low level drivers (such as mlx4) that map the "vector" param provided by the ULP on CQ creation to a dedicated IRQ/MSI-X vector. This allows the RX flow processing of IO responses to be distributed across multiple CPUs. QPs (--> iSER sessions) are assigned to CQs in round robin order using the CQ with the minimum number of sessions attached to it. Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: Alex Tabachnik <alext@mellanox.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h17
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c130
2 files changed, 103 insertions, 44 deletions
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 296be431a0e9..ef7d3be46c31 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -177,6 +177,7 @@ struct iser_data_buf {
177 177
178/* fwd declarations */ 178/* fwd declarations */
179struct iser_device; 179struct iser_device;
180struct iser_cq_desc;
180struct iscsi_iser_conn; 181struct iscsi_iser_conn;
181struct iscsi_iser_task; 182struct iscsi_iser_task;
182struct iscsi_endpoint; 183struct iscsi_endpoint;
@@ -226,16 +227,21 @@ struct iser_rx_desc {
226 char pad[ISER_RX_PAD_SIZE]; 227 char pad[ISER_RX_PAD_SIZE];
227} __attribute__((packed)); 228} __attribute__((packed));
228 229
230#define ISER_MAX_CQ 4
231
229struct iser_device { 232struct iser_device {
230 struct ib_device *ib_device; 233 struct ib_device *ib_device;
231 struct ib_pd *pd; 234 struct ib_pd *pd;
232 struct ib_cq *rx_cq; 235 struct ib_cq *rx_cq[ISER_MAX_CQ];
233 struct ib_cq *tx_cq; 236 struct ib_cq *tx_cq[ISER_MAX_CQ];
234 struct ib_mr *mr; 237 struct ib_mr *mr;
235 struct tasklet_struct cq_tasklet; 238 struct tasklet_struct cq_tasklet[ISER_MAX_CQ];
236 struct ib_event_handler event_handler; 239 struct ib_event_handler event_handler;
237 struct list_head ig_list; /* entry in ig devices list */ 240 struct list_head ig_list; /* entry in ig devices list */
238 int refcount; 241 int refcount;
242 int cq_active_qps[ISER_MAX_CQ];
243 int cqs_used;
244 struct iser_cq_desc *cq_desc;
239}; 245};
240 246
241struct iser_conn { 247struct iser_conn {
@@ -287,6 +293,11 @@ struct iser_page_vec {
287 int data_size; 293 int data_size;
288}; 294};
289 295
296struct iser_cq_desc {
297 struct iser_device *device;
298 int cq_index;
299};
300
290struct iser_global { 301struct iser_global {
291 struct mutex device_list_mutex;/* */ 302 struct mutex device_list_mutex;/* */
292 struct list_head device_list; /* all iSER devices */ 303 struct list_head device_list; /* all iSER devices */
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 2dddabd8fcf9..95a49affee44 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -70,32 +70,50 @@ static void iser_event_handler(struct ib_event_handler *handler,
70 */ 70 */
71static int iser_create_device_ib_res(struct iser_device *device) 71static int iser_create_device_ib_res(struct iser_device *device)
72{ 72{
73 int i, j;
74 struct iser_cq_desc *cq_desc;
75
76 device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors);
77 iser_err("using %d CQs, device %s supports %d vectors\n", device->cqs_used,
78 device->ib_device->name, device->ib_device->num_comp_vectors);
79
80 device->cq_desc = kmalloc(sizeof(struct iser_cq_desc) * device->cqs_used,
81 GFP_KERNEL);
82 if (device->cq_desc == NULL)
83 goto cq_desc_err;
84 cq_desc = device->cq_desc;
85
73 device->pd = ib_alloc_pd(device->ib_device); 86 device->pd = ib_alloc_pd(device->ib_device);
74 if (IS_ERR(device->pd)) 87 if (IS_ERR(device->pd))
75 goto pd_err; 88 goto pd_err;
76 89
77 device->rx_cq = ib_create_cq(device->ib_device, 90 for (i = 0; i < device->cqs_used; i++) {
78 iser_cq_callback, 91 cq_desc[i].device = device;
79 iser_cq_event_callback, 92 cq_desc[i].cq_index = i;
80 (void *)device, 93
81 ISER_MAX_RX_CQ_LEN, 0); 94 device->rx_cq[i] = ib_create_cq(device->ib_device,
82 if (IS_ERR(device->rx_cq)) 95 iser_cq_callback,
83 goto rx_cq_err; 96 iser_cq_event_callback,
97 (void *)&cq_desc[i],
98 ISER_MAX_RX_CQ_LEN, i);
99 if (IS_ERR(device->rx_cq[i]))
100 goto cq_err;
84 101
85 device->tx_cq = ib_create_cq(device->ib_device, 102 device->tx_cq[i] = ib_create_cq(device->ib_device,
86 NULL, iser_cq_event_callback, 103 NULL, iser_cq_event_callback,
87 (void *)device, 104 (void *)&cq_desc[i],
88 ISER_MAX_TX_CQ_LEN, 0); 105 ISER_MAX_TX_CQ_LEN, i);
89 106
90 if (IS_ERR(device->tx_cq)) 107 if (IS_ERR(device->tx_cq[i]))
91 goto tx_cq_err; 108 goto cq_err;
92 109
93 if (ib_req_notify_cq(device->rx_cq, IB_CQ_NEXT_COMP)) 110 if (ib_req_notify_cq(device->rx_cq[i], IB_CQ_NEXT_COMP))
94 goto cq_arm_err; 111 goto cq_err;
95 112
96 tasklet_init(&device->cq_tasklet, 113 tasklet_init(&device->cq_tasklet[i],
97 iser_cq_tasklet_fn, 114 iser_cq_tasklet_fn,
98 (unsigned long)device); 115 (unsigned long)&cq_desc[i]);
116 }
99 117
100 device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | 118 device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
101 IB_ACCESS_REMOTE_WRITE | 119 IB_ACCESS_REMOTE_WRITE |
@@ -113,14 +131,19 @@ static int iser_create_device_ib_res(struct iser_device *device)
113handler_err: 131handler_err:
114 ib_dereg_mr(device->mr); 132 ib_dereg_mr(device->mr);
115dma_mr_err: 133dma_mr_err:
116 tasklet_kill(&device->cq_tasklet); 134 for (j = 0; j < device->cqs_used; j++)
117cq_arm_err: 135 tasklet_kill(&device->cq_tasklet[j]);
118 ib_destroy_cq(device->tx_cq); 136cq_err:
119tx_cq_err: 137 for (j = 0; j < i; j++) {
120 ib_destroy_cq(device->rx_cq); 138 if (device->tx_cq[j])
121rx_cq_err: 139 ib_destroy_cq(device->tx_cq[j]);
140 if (device->rx_cq[j])
141 ib_destroy_cq(device->rx_cq[j]);
142 }
122 ib_dealloc_pd(device->pd); 143 ib_dealloc_pd(device->pd);
123pd_err: 144pd_err:
145 kfree(device->cq_desc);
146cq_desc_err:
124 iser_err("failed to allocate an IB resource\n"); 147 iser_err("failed to allocate an IB resource\n");
125 return -1; 148 return -1;
126} 149}
@@ -131,18 +154,24 @@ pd_err:
131 */ 154 */
132static void iser_free_device_ib_res(struct iser_device *device) 155static void iser_free_device_ib_res(struct iser_device *device)
133{ 156{
157 int i;
134 BUG_ON(device->mr == NULL); 158 BUG_ON(device->mr == NULL);
135 159
136 tasklet_kill(&device->cq_tasklet); 160 for (i = 0; i < device->cqs_used; i++) {
161 tasklet_kill(&device->cq_tasklet[i]);
162 (void)ib_destroy_cq(device->tx_cq[i]);
163 (void)ib_destroy_cq(device->rx_cq[i]);
164 device->tx_cq[i] = NULL;
165 device->rx_cq[i] = NULL;
166 }
167
137 (void)ib_unregister_event_handler(&device->event_handler); 168 (void)ib_unregister_event_handler(&device->event_handler);
138 (void)ib_dereg_mr(device->mr); 169 (void)ib_dereg_mr(device->mr);
139 (void)ib_destroy_cq(device->tx_cq);
140 (void)ib_destroy_cq(device->rx_cq);
141 (void)ib_dealloc_pd(device->pd); 170 (void)ib_dealloc_pd(device->pd);
142 171
172 kfree(device->cq_desc);
173
143 device->mr = NULL; 174 device->mr = NULL;
144 device->tx_cq = NULL;
145 device->rx_cq = NULL;
146 device->pd = NULL; 175 device->pd = NULL;
147} 176}
148 177
@@ -157,6 +186,7 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
157 struct ib_qp_init_attr init_attr; 186 struct ib_qp_init_attr init_attr;
158 int req_err, resp_err, ret = -ENOMEM; 187 int req_err, resp_err, ret = -ENOMEM;
159 struct ib_fmr_pool_param params; 188 struct ib_fmr_pool_param params;
189 int index, min_index = 0;
160 190
161 BUG_ON(ib_conn->device == NULL); 191 BUG_ON(ib_conn->device == NULL);
162 192
@@ -220,10 +250,20 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
220 250
221 memset(&init_attr, 0, sizeof init_attr); 251 memset(&init_attr, 0, sizeof init_attr);
222 252
253 mutex_lock(&ig.connlist_mutex);
254 /* select the CQ with the minimal number of usages */
255 for (index = 0; index < device->cqs_used; index++)
256 if (device->cq_active_qps[index] <
257 device->cq_active_qps[min_index])
258 min_index = index;
259 device->cq_active_qps[min_index]++;
260 mutex_unlock(&ig.connlist_mutex);
261 iser_err("cq index %d used for ib_conn %p\n", min_index, ib_conn);
262
223 init_attr.event_handler = iser_qp_event_callback; 263 init_attr.event_handler = iser_qp_event_callback;
224 init_attr.qp_context = (void *)ib_conn; 264 init_attr.qp_context = (void *)ib_conn;
225 init_attr.send_cq = device->tx_cq; 265 init_attr.send_cq = device->tx_cq[min_index];
226 init_attr.recv_cq = device->rx_cq; 266 init_attr.recv_cq = device->rx_cq[min_index];
227 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; 267 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS;
228 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 268 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
229 init_attr.cap.max_send_sge = 2; 269 init_attr.cap.max_send_sge = 2;
@@ -252,6 +292,7 @@ out_err:
252 */ 292 */
253static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id) 293static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id)
254{ 294{
295 int cq_index;
255 BUG_ON(ib_conn == NULL); 296 BUG_ON(ib_conn == NULL);
256 297
257 iser_err("freeing conn %p cma_id %p fmr pool %p qp %p\n", 298 iser_err("freeing conn %p cma_id %p fmr pool %p qp %p\n",
@@ -262,9 +303,12 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id)
262 if (ib_conn->fmr_pool != NULL) 303 if (ib_conn->fmr_pool != NULL)
263 ib_destroy_fmr_pool(ib_conn->fmr_pool); 304 ib_destroy_fmr_pool(ib_conn->fmr_pool);
264 305
265 if (ib_conn->qp != NULL) 306 if (ib_conn->qp != NULL) {
266 rdma_destroy_qp(ib_conn->cma_id); 307 cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index;
308 ib_conn->device->cq_active_qps[cq_index]--;
267 309
310 rdma_destroy_qp(ib_conn->cma_id);
311 }
268 /* if cma handler context, the caller acts s.t the cma destroy the id */ 312 /* if cma handler context, the caller acts s.t the cma destroy the id */
269 if (ib_conn->cma_id != NULL && can_destroy_id) 313 if (ib_conn->cma_id != NULL && can_destroy_id)
270 rdma_destroy_id(ib_conn->cma_id); 314 rdma_destroy_id(ib_conn->cma_id);
@@ -791,9 +835,9 @@ static void iser_handle_comp_error(struct iser_tx_desc *desc,
791 } 835 }
792} 836}
793 837
794static int iser_drain_tx_cq(struct iser_device *device) 838static int iser_drain_tx_cq(struct iser_device *device, int cq_index)
795{ 839{
796 struct ib_cq *cq = device->tx_cq; 840 struct ib_cq *cq = device->tx_cq[cq_index];
797 struct ib_wc wc; 841 struct ib_wc wc;
798 struct iser_tx_desc *tx_desc; 842 struct iser_tx_desc *tx_desc;
799 struct iser_conn *ib_conn; 843 struct iser_conn *ib_conn;
@@ -822,8 +866,10 @@ static int iser_drain_tx_cq(struct iser_device *device)
822 866
823static void iser_cq_tasklet_fn(unsigned long data) 867static void iser_cq_tasklet_fn(unsigned long data)
824{ 868{
825 struct iser_device *device = (struct iser_device *)data; 869 struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)data;
826 struct ib_cq *cq = device->rx_cq; 870 struct iser_device *device = cq_desc->device;
871 int cq_index = cq_desc->cq_index;
872 struct ib_cq *cq = device->rx_cq[cq_index];
827 struct ib_wc wc; 873 struct ib_wc wc;
828 struct iser_rx_desc *desc; 874 struct iser_rx_desc *desc;
829 unsigned long xfer_len; 875 unsigned long xfer_len;
@@ -851,19 +897,21 @@ static void iser_cq_tasklet_fn(unsigned long data)
851 } 897 }
852 completed_rx++; 898 completed_rx++;
853 if (!(completed_rx & 63)) 899 if (!(completed_rx & 63))
854 completed_tx += iser_drain_tx_cq(device); 900 completed_tx += iser_drain_tx_cq(device, cq_index);
855 } 901 }
856 /* #warning "it is assumed here that arming CQ only once its empty" * 902 /* #warning "it is assumed here that arming CQ only once its empty" *
857 * " would not cause interrupts to be missed" */ 903 * " would not cause interrupts to be missed" */
858 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 904 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
859 905
860 completed_tx += iser_drain_tx_cq(device); 906 completed_tx += iser_drain_tx_cq(device, cq_index);
861 iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx); 907 iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);
862} 908}
863 909
864static void iser_cq_callback(struct ib_cq *cq, void *cq_context) 910static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
865{ 911{
866 struct iser_device *device = (struct iser_device *)cq_context; 912 struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)cq_context;
913 struct iser_device *device = cq_desc->device;
914 int cq_index = cq_desc->cq_index;
867 915
868 tasklet_schedule(&device->cq_tasklet); 916 tasklet_schedule(&device->cq_tasklet[cq_index]);
869} 917}