aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/nfs/nfs-rdma.txt9
-rw-r--r--MAINTAINERS9
-rw-r--r--drivers/infiniband/core/umem.c7
-rw-r--r--drivers/infiniband/core/uverbs_main.c22
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c457
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c9
-rw-r--r--drivers/infiniband/hw/mlx4/main.c26
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h14
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c7
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c44
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h31
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c18
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c195
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c73
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c520
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c44
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h66
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c66
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c523
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c220
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c9
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c188
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cmd.c42
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/eq.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c10
-rw-r--r--include/linux/mlx4/device.h4
28 files changed, 1513 insertions, 1134 deletions
diff --git a/Documentation/filesystems/nfs/nfs-rdma.txt b/Documentation/filesystems/nfs/nfs-rdma.txt
index 724043858b08..95c13aa575ff 100644
--- a/Documentation/filesystems/nfs/nfs-rdma.txt
+++ b/Documentation/filesystems/nfs/nfs-rdma.txt
@@ -187,8 +187,10 @@ Check RDMA and NFS Setup
187 To further test the InfiniBand software stack, use IPoIB (this 187 To further test the InfiniBand software stack, use IPoIB (this
188 assumes you have two IB hosts named host1 and host2): 188 assumes you have two IB hosts named host1 and host2):
189 189
190 host1$ ifconfig ib0 a.b.c.x 190 host1$ ip link set dev ib0 up
191 host2$ ifconfig ib0 a.b.c.y 191 host1$ ip address add dev ib0 a.b.c.x
192 host2$ ip link set dev ib0 up
193 host2$ ip address add dev ib0 a.b.c.y
192 host1$ ping a.b.c.y 194 host1$ ping a.b.c.y
193 host2$ ping a.b.c.x 195 host2$ ping a.b.c.x
194 196
@@ -229,7 +231,8 @@ NFS/RDMA Setup
229 231
230 $ modprobe ib_mthca 232 $ modprobe ib_mthca
231 $ modprobe ib_ipoib 233 $ modprobe ib_ipoib
232 $ ifconfig ib0 a.b.c.d 234 $ ip li set dev ib0 up
235 $ ip addr add dev ib0 a.b.c.d
233 236
234 NOTE: use unique addresses for the client and server 237 NOTE: use unique addresses for the client and server
235 238
diff --git a/MAINTAINERS b/MAINTAINERS
index ad5b42913527..ea0001760035 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8803,6 +8803,15 @@ W: http://www.emulex.com
8803S: Supported 8803S: Supported
8804F: drivers/net/ethernet/emulex/benet/ 8804F: drivers/net/ethernet/emulex/benet/
8805 8805
8806EMULEX ONECONNECT ROCE DRIVER
8807M: Selvin Xavier <selvin.xavier@emulex.com>
8808M: Devesh Sharma <devesh.sharma@emulex.com>
8809M: Mitesh Ahuja <mitesh.ahuja@emulex.com>
8810L: linux-rdma@vger.kernel.org
8811W: http://www.emulex.com
8812S: Supported
8813F: drivers/infiniband/hw/ocrdma/
8814
8806SFC NETWORK DRIVER 8815SFC NETWORK DRIVER
8807M: Solarflare linux maintainers <linux-net-drivers@solarflare.com> 8816M: Solarflare linux maintainers <linux-net-drivers@solarflare.com>
8808M: Shradha Shah <sshah@solarflare.com> 8817M: Shradha Shah <sshah@solarflare.com>
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 8c014b5dab4c..38acb3cfc545 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -99,12 +99,15 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
99 if (dmasync) 99 if (dmasync)
100 dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); 100 dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
101 101
102 if (!size)
103 return ERR_PTR(-EINVAL);
104
102 /* 105 /*
103 * If the combination of the addr and size requested for this memory 106 * If the combination of the addr and size requested for this memory
104 * region causes an integer overflow, return error. 107 * region causes an integer overflow, return error.
105 */ 108 */
106 if ((PAGE_ALIGN(addr + size) <= size) || 109 if (((addr + size) < addr) ||
107 (PAGE_ALIGN(addr + size) <= addr)) 110 PAGE_ALIGN(addr + size) < (addr + size))
108 return ERR_PTR(-EINVAL); 111 return ERR_PTR(-EINVAL);
109 112
110 if (!can_do_mlock()) 113 if (!can_do_mlock())
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 259dcc7779f5..88cce9bb72fe 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -246,6 +246,17 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
246 kfree(uqp); 246 kfree(uqp);
247 } 247 }
248 248
249 list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
250 struct ib_srq *srq = uobj->object;
251 struct ib_uevent_object *uevent =
252 container_of(uobj, struct ib_uevent_object, uobject);
253
254 idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
255 ib_destroy_srq(srq);
256 ib_uverbs_release_uevent(file, uevent);
257 kfree(uevent);
258 }
259
249 list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { 260 list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
250 struct ib_cq *cq = uobj->object; 261 struct ib_cq *cq = uobj->object;
251 struct ib_uverbs_event_file *ev_file = cq->cq_context; 262 struct ib_uverbs_event_file *ev_file = cq->cq_context;
@@ -258,17 +269,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
258 kfree(ucq); 269 kfree(ucq);
259 } 270 }
260 271
261 list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
262 struct ib_srq *srq = uobj->object;
263 struct ib_uevent_object *uevent =
264 container_of(uobj, struct ib_uevent_object, uobject);
265
266 idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
267 ib_destroy_srq(srq);
268 ib_uverbs_release_uevent(file, uevent);
269 kfree(uevent);
270 }
271
272 list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { 272 list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
273 struct ib_mr *mr = uobj->object; 273 struct ib_mr *mr = uobj->object;
274 274
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index a31e031afd87..0f00204d2ece 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -58,14 +58,19 @@ struct mlx4_alias_guid_work_context {
58 int query_id; 58 int query_id;
59 struct list_head list; 59 struct list_head list;
60 int block_num; 60 int block_num;
61 ib_sa_comp_mask guid_indexes;
62 u8 method;
61}; 63};
62 64
63struct mlx4_next_alias_guid_work { 65struct mlx4_next_alias_guid_work {
64 u8 port; 66 u8 port;
65 u8 block_num; 67 u8 block_num;
68 u8 method;
66 struct mlx4_sriov_alias_guid_info_rec_det rec_det; 69 struct mlx4_sriov_alias_guid_info_rec_det rec_det;
67}; 70};
68 71
72static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
73 int *resched_delay_sec);
69 74
70void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num, 75void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
71 u8 port_num, u8 *p_data) 76 u8 port_num, u8 *p_data)
@@ -118,6 +123,57 @@ ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index)
118 return IB_SA_COMP_MASK(4 + index); 123 return IB_SA_COMP_MASK(4 + index);
119} 124}
120 125
126void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave,
127 int port, int slave_init)
128{
129 __be64 curr_guid, required_guid;
130 int record_num = slave / 8;
131 int index = slave % 8;
132 int port_index = port - 1;
133 unsigned long flags;
134 int do_work = 0;
135
136 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
137 if (dev->sriov.alias_guid.ports_guid[port_index].state_flags &
138 GUID_STATE_NEED_PORT_INIT)
139 goto unlock;
140 if (!slave_init) {
141 curr_guid = *(__be64 *)&dev->sriov.
142 alias_guid.ports_guid[port_index].
143 all_rec_per_port[record_num].
144 all_recs[GUID_REC_SIZE * index];
145 if (curr_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL) ||
146 !curr_guid)
147 goto unlock;
148 required_guid = cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
149 } else {
150 required_guid = mlx4_get_admin_guid(dev->dev, slave, port);
151 if (required_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
152 goto unlock;
153 }
154 *(__be64 *)&dev->sriov.alias_guid.ports_guid[port_index].
155 all_rec_per_port[record_num].
156 all_recs[GUID_REC_SIZE * index] = required_guid;
157 dev->sriov.alias_guid.ports_guid[port_index].
158 all_rec_per_port[record_num].guid_indexes
159 |= mlx4_ib_get_aguid_comp_mask_from_ix(index);
160 dev->sriov.alias_guid.ports_guid[port_index].
161 all_rec_per_port[record_num].status
162 = MLX4_GUID_INFO_STATUS_IDLE;
163 /* set to run immediately */
164 dev->sriov.alias_guid.ports_guid[port_index].
165 all_rec_per_port[record_num].time_to_run = 0;
166 dev->sriov.alias_guid.ports_guid[port_index].
167 all_rec_per_port[record_num].
168 guids_retry_schedule[index] = 0;
169 do_work = 1;
170unlock:
171 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
172
173 if (do_work)
174 mlx4_ib_init_alias_guid_work(dev, port_index);
175}
176
121/* 177/*
122 * Whenever new GUID is set/unset (guid table change) create event and 178 * Whenever new GUID is set/unset (guid table change) create event and
123 * notify the relevant slave (master also should be notified). 179 * notify the relevant slave (master also should be notified).
@@ -138,10 +194,15 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
138 enum slave_port_state prev_state; 194 enum slave_port_state prev_state;
139 __be64 tmp_cur_ag, form_cache_ag; 195 __be64 tmp_cur_ag, form_cache_ag;
140 enum slave_port_gen_event gen_event; 196 enum slave_port_gen_event gen_event;
197 struct mlx4_sriov_alias_guid_info_rec_det *rec;
198 unsigned long flags;
199 __be64 required_value;
141 200
142 if (!mlx4_is_master(dev->dev)) 201 if (!mlx4_is_master(dev->dev))
143 return; 202 return;
144 203
204 rec = &dev->sriov.alias_guid.ports_guid[port_num - 1].
205 all_rec_per_port[block_num];
145 guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid. 206 guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
146 ports_guid[port_num - 1]. 207 ports_guid[port_num - 1].
147 all_rec_per_port[block_num].guid_indexes); 208 all_rec_per_port[block_num].guid_indexes);
@@ -166,8 +227,27 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
166 */ 227 */
167 if (tmp_cur_ag != form_cache_ag) 228 if (tmp_cur_ag != form_cache_ag)
168 continue; 229 continue;
169 mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
170 230
231 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
232 required_value = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE];
233
234 if (required_value == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
235 required_value = 0;
236
237 if (tmp_cur_ag == required_value) {
238 rec->guid_indexes = rec->guid_indexes &
239 ~mlx4_ib_get_aguid_comp_mask_from_ix(i);
240 } else {
241 /* may notify port down if value is 0 */
242 if (tmp_cur_ag != MLX4_NOT_SET_GUID) {
243 spin_unlock_irqrestore(&dev->sriov.
244 alias_guid.ag_work_lock, flags);
245 continue;
246 }
247 }
248 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock,
249 flags);
250 mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
171 /*2 cases: Valid GUID, and Invalid Guid*/ 251 /*2 cases: Valid GUID, and Invalid Guid*/
172 252
173 if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/ 253 if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/
@@ -188,10 +268,14 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
188 set_and_calc_slave_port_state(dev->dev, slave_id, port_num, 268 set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
189 MLX4_PORT_STATE_IB_EVENT_GID_INVALID, 269 MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
190 &gen_event); 270 &gen_event);
191 pr_debug("sending PORT DOWN event to slave: %d, port: %d\n", 271 if (gen_event == SLAVE_PORT_GEN_EVENT_DOWN) {
192 slave_id, port_num); 272 pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
193 mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num, 273 slave_id, port_num);
194 MLX4_PORT_CHANGE_SUBTYPE_DOWN); 274 mlx4_gen_port_state_change_eqe(dev->dev,
275 slave_id,
276 port_num,
277 MLX4_PORT_CHANGE_SUBTYPE_DOWN);
278 }
195 } 279 }
196 } 280 }
197} 281}
@@ -206,6 +290,9 @@ static void aliasguid_query_handler(int status,
206 int i; 290 int i;
207 struct mlx4_sriov_alias_guid_info_rec_det *rec; 291 struct mlx4_sriov_alias_guid_info_rec_det *rec;
208 unsigned long flags, flags1; 292 unsigned long flags, flags1;
293 ib_sa_comp_mask declined_guid_indexes = 0;
294 ib_sa_comp_mask applied_guid_indexes = 0;
295 unsigned int resched_delay_sec = 0;
209 296
210 if (!context) 297 if (!context)
211 return; 298 return;
@@ -216,9 +303,9 @@ static void aliasguid_query_handler(int status,
216 all_rec_per_port[cb_ctx->block_num]; 303 all_rec_per_port[cb_ctx->block_num];
217 304
218 if (status) { 305 if (status) {
219 rec->status = MLX4_GUID_INFO_STATUS_IDLE;
220 pr_debug("(port: %d) failed: status = %d\n", 306 pr_debug("(port: %d) failed: status = %d\n",
221 cb_ctx->port, status); 307 cb_ctx->port, status);
308 rec->time_to_run = ktime_get_real_ns() + 1 * NSEC_PER_SEC;
222 goto out; 309 goto out;
223 } 310 }
224 311
@@ -235,57 +322,101 @@ static void aliasguid_query_handler(int status,
235 rec = &dev->sriov.alias_guid.ports_guid[port_index]. 322 rec = &dev->sriov.alias_guid.ports_guid[port_index].
236 all_rec_per_port[guid_rec->block_num]; 323 all_rec_per_port[guid_rec->block_num];
237 324
238 rec->status = MLX4_GUID_INFO_STATUS_SET; 325 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
239 rec->method = MLX4_GUID_INFO_RECORD_SET;
240
241 for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) { 326 for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
242 __be64 tmp_cur_ag; 327 __be64 sm_response, required_val;
243 tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE]; 328
329 if (!(cb_ctx->guid_indexes &
330 mlx4_ib_get_aguid_comp_mask_from_ix(i)))
331 continue;
332 sm_response = *(__be64 *)&guid_rec->guid_info_list
333 [i * GUID_REC_SIZE];
334 required_val = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE];
335 if (cb_ctx->method == MLX4_GUID_INFO_RECORD_DELETE) {
336 if (required_val ==
337 cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
338 goto next_entry;
339
340 /* A new value was set till we got the response */
341 pr_debug("need to set new value %llx, record num %d, block_num:%d\n",
342 be64_to_cpu(required_val),
343 i, guid_rec->block_num);
344 goto entry_declined;
345 }
346
244 /* check if the SM didn't assign one of the records. 347 /* check if the SM didn't assign one of the records.
245 * if it didn't, if it was not sysadmin request: 348 * if it didn't, re-ask for.
246 * ask the SM to give a new GUID, (instead of the driver request).
247 */ 349 */
248 if (tmp_cur_ag == MLX4_NOT_SET_GUID) { 350 if (sm_response == MLX4_NOT_SET_GUID) {
249 mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in " 351 if (rec->guids_retry_schedule[i] == 0)
250 "block_num: %d was declined by SM, " 352 mlx4_ib_warn(&dev->ib_dev,
251 "ownership by %d (0 = driver, 1=sysAdmin," 353 "%s:Record num %d in block_num: %d was declined by SM\n",
252 " 2=None)\n", __func__, i, 354 __func__, i,
253 guid_rec->block_num, rec->ownership); 355 guid_rec->block_num);
254 if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) { 356 goto entry_declined;
255 /* if it is driver assign, asks for new GUID from SM*/
256 *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
257 MLX4_NOT_SET_GUID;
258
259 /* Mark the record as not assigned, and let it
260 * be sent again in the next work sched.*/
261 rec->status = MLX4_GUID_INFO_STATUS_IDLE;
262 rec->guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
263 }
264 } else { 357 } else {
265 /* properly assigned record. */ 358 /* properly assigned record. */
266 /* We save the GUID we just got from the SM in the 359 /* We save the GUID we just got from the SM in the
267 * admin_guid in order to be persistent, and in the 360 * admin_guid in order to be persistent, and in the
268 * request from the sm the process will ask for the same GUID */ 361 * request from the sm the process will ask for the same GUID */
269 if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN && 362 if (required_val &&
270 tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) { 363 sm_response != required_val) {
271 /* the sysadmin assignment failed.*/ 364 /* Warn only on first retry */
272 mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set" 365 if (rec->guids_retry_schedule[i] == 0)
273 " admin guid after SysAdmin " 366 mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
274 "configuration. " 367 " admin guid after SysAdmin "
275 "Record num %d in block_num:%d " 368 "configuration. "
276 "was declined by SM, " 369 "Record num %d in block_num:%d "
277 "new val(0x%llx) was kept\n", 370 "was declined by SM, "
278 __func__, i, 371 "new val(0x%llx) was kept, SM returned (0x%llx)\n",
279 guid_rec->block_num, 372 __func__, i,
280 be64_to_cpu(*(__be64 *) & 373 guid_rec->block_num,
281 rec->all_recs[i * GUID_REC_SIZE])); 374 be64_to_cpu(required_val),
375 be64_to_cpu(sm_response));
376 goto entry_declined;
282 } else { 377 } else {
283 memcpy(&rec->all_recs[i * GUID_REC_SIZE], 378 *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
284 &guid_rec->guid_info_list[i * GUID_REC_SIZE], 379 sm_response;
285 GUID_REC_SIZE); 380 if (required_val == 0)
381 mlx4_set_admin_guid(dev->dev,
382 sm_response,
383 (guid_rec->block_num
384 * NUM_ALIAS_GUID_IN_REC) + i,
385 cb_ctx->port);
386 goto next_entry;
286 } 387 }
287 } 388 }
389entry_declined:
390 declined_guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
391 rec->guids_retry_schedule[i] =
392 (rec->guids_retry_schedule[i] == 0) ? 1 :
393 min((unsigned int)60,
394 rec->guids_retry_schedule[i] * 2);
395 /* using the minimum value among all entries in that record */
396 resched_delay_sec = (resched_delay_sec == 0) ?
397 rec->guids_retry_schedule[i] :
398 min(resched_delay_sec,
399 rec->guids_retry_schedule[i]);
400 continue;
401
402next_entry:
403 rec->guids_retry_schedule[i] = 0;
288 } 404 }
405
406 applied_guid_indexes = cb_ctx->guid_indexes & ~declined_guid_indexes;
407 if (declined_guid_indexes ||
408 rec->guid_indexes & ~(applied_guid_indexes)) {
409 pr_debug("record=%d wasn't fully set, guid_indexes=0x%llx applied_indexes=0x%llx, declined_indexes=0x%llx\n",
410 guid_rec->block_num,
411 be64_to_cpu((__force __be64)rec->guid_indexes),
412 be64_to_cpu((__force __be64)applied_guid_indexes),
413 be64_to_cpu((__force __be64)declined_guid_indexes));
414 rec->time_to_run = ktime_get_real_ns() +
415 resched_delay_sec * NSEC_PER_SEC;
416 } else {
417 rec->status = MLX4_GUID_INFO_STATUS_SET;
418 }
419 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
289 /* 420 /*
290 The func is call here to close the cases when the 421 The func is call here to close the cases when the
291 sm doesn't send smp, so in the sa response the driver 422 sm doesn't send smp, so in the sa response the driver
@@ -297,10 +428,13 @@ static void aliasguid_query_handler(int status,
297out: 428out:
298 spin_lock_irqsave(&dev->sriov.going_down_lock, flags); 429 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
299 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); 430 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
300 if (!dev->sriov.is_going_down) 431 if (!dev->sriov.is_going_down) {
432 get_low_record_time_index(dev, port_index, &resched_delay_sec);
301 queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq, 433 queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq,
302 &dev->sriov.alias_guid.ports_guid[port_index]. 434 &dev->sriov.alias_guid.ports_guid[port_index].
303 alias_guid_work, 0); 435 alias_guid_work,
436 msecs_to_jiffies(resched_delay_sec * 1000));
437 }
304 if (cb_ctx->sa_query) { 438 if (cb_ctx->sa_query) {
305 list_del(&cb_ctx->list); 439 list_del(&cb_ctx->list);
306 kfree(cb_ctx); 440 kfree(cb_ctx);
@@ -317,9 +451,7 @@ static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
317 ib_sa_comp_mask comp_mask = 0; 451 ib_sa_comp_mask comp_mask = 0;
318 452
319 dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status 453 dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status
320 = MLX4_GUID_INFO_STATUS_IDLE; 454 = MLX4_GUID_INFO_STATUS_SET;
321 dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].method
322 = MLX4_GUID_INFO_RECORD_SET;
323 455
324 /* calculate the comp_mask for that record.*/ 456 /* calculate the comp_mask for that record.*/
325 for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { 457 for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
@@ -333,19 +465,21 @@ static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
333 need to assign GUIDs, then don't put it up for assignment. 465 need to assign GUIDs, then don't put it up for assignment.
334 */ 466 */
335 if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val || 467 if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val ||
336 (!index && !i) || 468 (!index && !i))
337 MLX4_GUID_NONE_ASSIGN == dev->sriov.alias_guid.
338 ports_guid[port - 1].all_rec_per_port[index].ownership)
339 continue; 469 continue;
340 comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i); 470 comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
341 } 471 }
342 dev->sriov.alias_guid.ports_guid[port - 1]. 472 dev->sriov.alias_guid.ports_guid[port - 1].
343 all_rec_per_port[index].guid_indexes = comp_mask; 473 all_rec_per_port[index].guid_indexes |= comp_mask;
474 if (dev->sriov.alias_guid.ports_guid[port - 1].
475 all_rec_per_port[index].guid_indexes)
476 dev->sriov.alias_guid.ports_guid[port - 1].
477 all_rec_per_port[index].status = MLX4_GUID_INFO_STATUS_IDLE;
478
344} 479}
345 480
346static int set_guid_rec(struct ib_device *ibdev, 481static int set_guid_rec(struct ib_device *ibdev,
347 u8 port, int index, 482 struct mlx4_next_alias_guid_work *rec)
348 struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
349{ 483{
350 int err; 484 int err;
351 struct mlx4_ib_dev *dev = to_mdev(ibdev); 485 struct mlx4_ib_dev *dev = to_mdev(ibdev);
@@ -354,6 +488,9 @@ static int set_guid_rec(struct ib_device *ibdev,
354 struct ib_port_attr attr; 488 struct ib_port_attr attr;
355 struct mlx4_alias_guid_work_context *callback_context; 489 struct mlx4_alias_guid_work_context *callback_context;
356 unsigned long resched_delay, flags, flags1; 490 unsigned long resched_delay, flags, flags1;
491 u8 port = rec->port + 1;
492 int index = rec->block_num;
493 struct mlx4_sriov_alias_guid_info_rec_det *rec_det = &rec->rec_det;
357 struct list_head *head = 494 struct list_head *head =
358 &dev->sriov.alias_guid.ports_guid[port - 1].cb_list; 495 &dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
359 496
@@ -380,6 +517,8 @@ static int set_guid_rec(struct ib_device *ibdev,
380 callback_context->port = port; 517 callback_context->port = port;
381 callback_context->dev = dev; 518 callback_context->dev = dev;
382 callback_context->block_num = index; 519 callback_context->block_num = index;
520 callback_context->guid_indexes = rec_det->guid_indexes;
521 callback_context->method = rec->method;
383 522
384 memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec)); 523 memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
385 524
@@ -399,7 +538,7 @@ static int set_guid_rec(struct ib_device *ibdev,
399 callback_context->query_id = 538 callback_context->query_id =
400 ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client, 539 ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client,
401 ibdev, port, &guid_info_rec, 540 ibdev, port, &guid_info_rec,
402 comp_mask, rec_det->method, 1000, 541 comp_mask, rec->method, 1000,
403 GFP_KERNEL, aliasguid_query_handler, 542 GFP_KERNEL, aliasguid_query_handler,
404 callback_context, 543 callback_context,
405 &callback_context->sa_query); 544 &callback_context->sa_query);
@@ -434,6 +573,30 @@ out:
434 return err; 573 return err;
435} 574}
436 575
576static void mlx4_ib_guid_port_init(struct mlx4_ib_dev *dev, int port)
577{
578 int j, k, entry;
579 __be64 guid;
580
581 /*Check if the SM doesn't need to assign the GUIDs*/
582 for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
583 for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
584 entry = j * NUM_ALIAS_GUID_IN_REC + k;
585 /* no request for the 0 entry (hw guid) */
586 if (!entry || entry > dev->dev->persist->num_vfs ||
587 !mlx4_is_slave_active(dev->dev, entry))
588 continue;
589 guid = mlx4_get_admin_guid(dev->dev, entry, port);
590 *(__be64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
591 all_rec_per_port[j].all_recs
592 [GUID_REC_SIZE * k] = guid;
593 pr_debug("guid was set, entry=%d, val=0x%llx, port=%d\n",
594 entry,
595 be64_to_cpu(guid),
596 port);
597 }
598 }
599}
437void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port) 600void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
438{ 601{
439 int i; 602 int i;
@@ -443,6 +606,13 @@ void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
443 606
444 spin_lock_irqsave(&dev->sriov.going_down_lock, flags); 607 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
445 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); 608 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
609
610 if (dev->sriov.alias_guid.ports_guid[port - 1].state_flags &
611 GUID_STATE_NEED_PORT_INIT) {
612 mlx4_ib_guid_port_init(dev, port);
613 dev->sriov.alias_guid.ports_guid[port - 1].state_flags &=
614 (~GUID_STATE_NEED_PORT_INIT);
615 }
446 for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++) 616 for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++)
447 invalidate_guid_record(dev, port, i); 617 invalidate_guid_record(dev, port, i);
448 618
@@ -462,60 +632,107 @@ void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
462 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); 632 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
463} 633}
464 634
465/* The function returns the next record that was 635static void set_required_record(struct mlx4_ib_dev *dev, u8 port,
466 * not configured (or failed to be configured) */ 636 struct mlx4_next_alias_guid_work *next_rec,
467static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port, 637 int record_index)
468 struct mlx4_next_alias_guid_work *rec)
469{ 638{
470 int j; 639 int i;
471 unsigned long flags; 640 int lowset_time_entry = -1;
641 int lowest_time = 0;
642 ib_sa_comp_mask delete_guid_indexes = 0;
643 ib_sa_comp_mask set_guid_indexes = 0;
644 struct mlx4_sriov_alias_guid_info_rec_det *rec =
645 &dev->sriov.alias_guid.ports_guid[port].
646 all_rec_per_port[record_index];
472 647
473 for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) { 648 for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
474 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags); 649 if (!(rec->guid_indexes &
475 if (dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status == 650 mlx4_ib_get_aguid_comp_mask_from_ix(i)))
476 MLX4_GUID_INFO_STATUS_IDLE) { 651 continue;
477 memcpy(&rec->rec_det, 652
478 &dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j], 653 if (*(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] ==
479 sizeof (struct mlx4_sriov_alias_guid_info_rec_det)); 654 cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
480 rec->port = port; 655 delete_guid_indexes |=
481 rec->block_num = j; 656 mlx4_ib_get_aguid_comp_mask_from_ix(i);
482 dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status = 657 else
483 MLX4_GUID_INFO_STATUS_PENDING; 658 set_guid_indexes |=
484 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags); 659 mlx4_ib_get_aguid_comp_mask_from_ix(i);
485 return 0; 660
661 if (lowset_time_entry == -1 || rec->guids_retry_schedule[i] <=
662 lowest_time) {
663 lowset_time_entry = i;
664 lowest_time = rec->guids_retry_schedule[i];
486 } 665 }
487 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
488 } 666 }
489 return -ENOENT; 667
668 memcpy(&next_rec->rec_det, rec, sizeof(*rec));
669 next_rec->port = port;
670 next_rec->block_num = record_index;
671
672 if (*(__be64 *)&rec->all_recs[lowset_time_entry * GUID_REC_SIZE] ==
673 cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL)) {
674 next_rec->rec_det.guid_indexes = delete_guid_indexes;
675 next_rec->method = MLX4_GUID_INFO_RECORD_DELETE;
676 } else {
677 next_rec->rec_det.guid_indexes = set_guid_indexes;
678 next_rec->method = MLX4_GUID_INFO_RECORD_SET;
679 }
490} 680}
491 681
492static void set_administratively_guid_record(struct mlx4_ib_dev *dev, int port, 682/* return index of record that should be updated based on lowest
493 int rec_index, 683 * rescheduled time
494 struct mlx4_sriov_alias_guid_info_rec_det *rec_det) 684 */
685static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
686 int *resched_delay_sec)
495{ 687{
496 dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].guid_indexes = 688 int record_index = -1;
497 rec_det->guid_indexes; 689 u64 low_record_time = 0;
498 memcpy(dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].all_recs, 690 struct mlx4_sriov_alias_guid_info_rec_det rec;
499 rec_det->all_recs, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE); 691 int j;
500 dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].status = 692
501 rec_det->status; 693 for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
694 rec = dev->sriov.alias_guid.ports_guid[port].
695 all_rec_per_port[j];
696 if (rec.status == MLX4_GUID_INFO_STATUS_IDLE &&
697 rec.guid_indexes) {
698 if (record_index == -1 ||
699 rec.time_to_run < low_record_time) {
700 record_index = j;
701 low_record_time = rec.time_to_run;
702 }
703 }
704 }
705 if (resched_delay_sec) {
706 u64 curr_time = ktime_get_real_ns();
707
708 *resched_delay_sec = (low_record_time < curr_time) ? 0 :
709 div_u64((low_record_time - curr_time), NSEC_PER_SEC);
710 }
711
712 return record_index;
502} 713}
503 714
504static void set_all_slaves_guids(struct mlx4_ib_dev *dev, int port) 715/* The function returns the next record that was
716 * not configured (or failed to be configured) */
717static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
718 struct mlx4_next_alias_guid_work *rec)
505{ 719{
506 int j; 720 unsigned long flags;
507 struct mlx4_sriov_alias_guid_info_rec_det rec_det ; 721 int record_index;
508 722 int ret = 0;
509 for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT ; j++) { 723
510 memset(rec_det.all_recs, 0, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE); 724 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
511 rec_det.guid_indexes = (!j ? 0 : IB_SA_GUIDINFO_REC_GID0) | 725 record_index = get_low_record_time_index(dev, port, NULL);
512 IB_SA_GUIDINFO_REC_GID1 | IB_SA_GUIDINFO_REC_GID2 | 726
513 IB_SA_GUIDINFO_REC_GID3 | IB_SA_GUIDINFO_REC_GID4 | 727 if (record_index < 0) {
514 IB_SA_GUIDINFO_REC_GID5 | IB_SA_GUIDINFO_REC_GID6 | 728 ret = -ENOENT;
515 IB_SA_GUIDINFO_REC_GID7; 729 goto out;
516 rec_det.status = MLX4_GUID_INFO_STATUS_IDLE;
517 set_administratively_guid_record(dev, port, j, &rec_det);
518 } 730 }
731
732 set_required_record(dev, port, rec, record_index);
733out:
734 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
735 return ret;
519} 736}
520 737
521static void alias_guid_work(struct work_struct *work) 738static void alias_guid_work(struct work_struct *work)
@@ -545,9 +762,7 @@ static void alias_guid_work(struct work_struct *work)
545 goto out; 762 goto out;
546 } 763 }
547 764
548 set_guid_rec(&dev->ib_dev, rec->port + 1, rec->block_num, 765 set_guid_rec(&dev->ib_dev, rec);
549 &rec->rec_det);
550
551out: 766out:
552 kfree(rec); 767 kfree(rec);
553} 768}
@@ -562,6 +777,12 @@ void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port)
562 spin_lock_irqsave(&dev->sriov.going_down_lock, flags); 777 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
563 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); 778 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
564 if (!dev->sriov.is_going_down) { 779 if (!dev->sriov.is_going_down) {
780 /* If there is pending one should cancell then run, otherwise
781 * won't run till previous one is ended as same work
782 * struct is used.
783 */
784 cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[port].
785 alias_guid_work);
565 queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq, 786 queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq,
566 &dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0); 787 &dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0);
567 } 788 }
@@ -609,7 +830,7 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
609{ 830{
610 char alias_wq_name[15]; 831 char alias_wq_name[15];
611 int ret = 0; 832 int ret = 0;
612 int i, j, k; 833 int i, j;
613 union ib_gid gid; 834 union ib_gid gid;
614 835
615 if (!mlx4_is_master(dev->dev)) 836 if (!mlx4_is_master(dev->dev))
@@ -633,33 +854,25 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
633 for (i = 0 ; i < dev->num_ports; i++) { 854 for (i = 0 ; i < dev->num_ports; i++) {
634 memset(&dev->sriov.alias_guid.ports_guid[i], 0, 855 memset(&dev->sriov.alias_guid.ports_guid[i], 0,
635 sizeof (struct mlx4_sriov_alias_guid_port_rec_det)); 856 sizeof (struct mlx4_sriov_alias_guid_port_rec_det));
636 /*Check if the SM doesn't need to assign the GUIDs*/ 857 dev->sriov.alias_guid.ports_guid[i].state_flags |=
858 GUID_STATE_NEED_PORT_INIT;
637 for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) { 859 for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
638 if (mlx4_ib_sm_guid_assign) { 860 /* mark each val as it was deleted */
639 dev->sriov.alias_guid.ports_guid[i]. 861 memset(dev->sriov.alias_guid.ports_guid[i].
640 all_rec_per_port[j]. 862 all_rec_per_port[j].all_recs, 0xFF,
641 ownership = MLX4_GUID_DRIVER_ASSIGN; 863 sizeof(dev->sriov.alias_guid.ports_guid[i].
642 continue; 864 all_rec_per_port[j].all_recs));
643 }
644 dev->sriov.alias_guid.ports_guid[i].all_rec_per_port[j].
645 ownership = MLX4_GUID_NONE_ASSIGN;
646 /*mark each val as it was deleted,
647 till the sysAdmin will give it valid val*/
648 for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
649 *(__be64 *)&dev->sriov.alias_guid.ports_guid[i].
650 all_rec_per_port[j].all_recs[GUID_REC_SIZE * k] =
651 cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
652 }
653 } 865 }
654 INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list); 866 INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list);
655 /*prepare the records, set them to be allocated by sm*/ 867 /*prepare the records, set them to be allocated by sm*/
868 if (mlx4_ib_sm_guid_assign)
869 for (j = 1; j < NUM_ALIAS_GUID_PER_PORT; j++)
870 mlx4_set_admin_guid(dev->dev, 0, j, i + 1);
656 for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) 871 for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++)
657 invalidate_guid_record(dev, i + 1, j); 872 invalidate_guid_record(dev, i + 1, j);
658 873
659 dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid; 874 dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
660 dev->sriov.alias_guid.ports_guid[i].port = i; 875 dev->sriov.alias_guid.ports_guid[i].port = i;
661 if (mlx4_ib_sm_guid_assign)
662 set_all_slaves_guids(dev, i);
663 876
664 snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i); 877 snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i);
665 dev->sriov.alias_guid.ports_guid[i].wq = 878 dev->sriov.alias_guid.ports_guid[i].wq =
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 59040265e361..9cd2b002d7ae 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -1430,6 +1430,10 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
1430 tun_qp->ring[i].addr, 1430 tun_qp->ring[i].addr,
1431 rx_buf_size, 1431 rx_buf_size,
1432 DMA_FROM_DEVICE); 1432 DMA_FROM_DEVICE);
1433 if (ib_dma_mapping_error(ctx->ib_dev, tun_qp->ring[i].map)) {
1434 kfree(tun_qp->ring[i].addr);
1435 goto err;
1436 }
1433 } 1437 }
1434 1438
1435 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { 1439 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
@@ -1442,6 +1446,11 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
1442 tun_qp->tx_ring[i].buf.addr, 1446 tun_qp->tx_ring[i].buf.addr,
1443 tx_buf_size, 1447 tx_buf_size,
1444 DMA_TO_DEVICE); 1448 DMA_TO_DEVICE);
1449 if (ib_dma_mapping_error(ctx->ib_dev,
1450 tun_qp->tx_ring[i].buf.map)) {
1451 kfree(tun_qp->tx_ring[i].buf.addr);
1452 goto tx_err;
1453 }
1445 tun_qp->tx_ring[i].ah = NULL; 1454 tun_qp->tx_ring[i].ah = NULL;
1446 } 1455 }
1447 spin_lock_init(&tun_qp->tx_lock); 1456 spin_lock_init(&tun_qp->tx_lock);
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 976bea794b5f..57070c529dfb 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -66,9 +66,9 @@ MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
66MODULE_LICENSE("Dual BSD/GPL"); 66MODULE_LICENSE("Dual BSD/GPL");
67MODULE_VERSION(DRV_VERSION); 67MODULE_VERSION(DRV_VERSION);
68 68
69int mlx4_ib_sm_guid_assign = 1; 69int mlx4_ib_sm_guid_assign = 0;
70module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444); 70module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
71MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)"); 71MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 0)");
72 72
73static const char mlx4_ib_version[] = 73static const char mlx4_ib_version[] =
74 DRV_NAME ": Mellanox ConnectX InfiniBand driver v" 74 DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
@@ -2791,9 +2791,31 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
2791 case MLX4_DEV_EVENT_SLAVE_INIT: 2791 case MLX4_DEV_EVENT_SLAVE_INIT:
2792 /* here, p is the slave id */ 2792 /* here, p is the slave id */
2793 do_slave_init(ibdev, p, 1); 2793 do_slave_init(ibdev, p, 1);
2794 if (mlx4_is_master(dev)) {
2795 int i;
2796
2797 for (i = 1; i <= ibdev->num_ports; i++) {
2798 if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
2799 == IB_LINK_LAYER_INFINIBAND)
2800 mlx4_ib_slave_alias_guid_event(ibdev,
2801 p, i,
2802 1);
2803 }
2804 }
2794 return; 2805 return;
2795 2806
2796 case MLX4_DEV_EVENT_SLAVE_SHUTDOWN: 2807 case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
2808 if (mlx4_is_master(dev)) {
2809 int i;
2810
2811 for (i = 1; i <= ibdev->num_ports; i++) {
2812 if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
2813 == IB_LINK_LAYER_INFINIBAND)
2814 mlx4_ib_slave_alias_guid_event(ibdev,
2815 p, i,
2816 0);
2817 }
2818 }
2797 /* here, p is the slave id */ 2819 /* here, p is the slave id */
2798 do_slave_init(ibdev, p, 0); 2820 do_slave_init(ibdev, p, 0);
2799 return; 2821 return;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index f829fd935b79..fce3934372a1 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -342,14 +342,9 @@ struct mlx4_ib_ah {
342enum mlx4_guid_alias_rec_status { 342enum mlx4_guid_alias_rec_status {
343 MLX4_GUID_INFO_STATUS_IDLE, 343 MLX4_GUID_INFO_STATUS_IDLE,
344 MLX4_GUID_INFO_STATUS_SET, 344 MLX4_GUID_INFO_STATUS_SET,
345 MLX4_GUID_INFO_STATUS_PENDING,
346}; 345};
347 346
348enum mlx4_guid_alias_rec_ownership { 347#define GUID_STATE_NEED_PORT_INIT 0x01
349 MLX4_GUID_DRIVER_ASSIGN,
350 MLX4_GUID_SYSADMIN_ASSIGN,
351 MLX4_GUID_NONE_ASSIGN, /*init state of each record*/
352};
353 348
354enum mlx4_guid_alias_rec_method { 349enum mlx4_guid_alias_rec_method {
355 MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET, 350 MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET,
@@ -360,8 +355,8 @@ struct mlx4_sriov_alias_guid_info_rec_det {
360 u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC]; 355 u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC];
361 ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/ 356 ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/
362 enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/ 357 enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/
363 u8 method; /*set or delete*/ 358 unsigned int guids_retry_schedule[NUM_ALIAS_GUID_IN_REC];
364 enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/ 359 u64 time_to_run;
365}; 360};
366 361
367struct mlx4_sriov_alias_guid_port_rec_det { 362struct mlx4_sriov_alias_guid_port_rec_det {
@@ -369,6 +364,7 @@ struct mlx4_sriov_alias_guid_port_rec_det {
369 struct workqueue_struct *wq; 364 struct workqueue_struct *wq;
370 struct delayed_work alias_guid_work; 365 struct delayed_work alias_guid_work;
371 u8 port; 366 u8 port;
367 u32 state_flags;
372 struct mlx4_sriov_alias_guid *parent; 368 struct mlx4_sriov_alias_guid *parent;
373 struct list_head cb_list; 369 struct list_head cb_list;
374}; 370};
@@ -802,6 +798,8 @@ int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
802void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, 798void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
803 struct attribute *attr); 799 struct attribute *attr);
804ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index); 800ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index);
801void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave,
802 int port, int slave_init);
805 803
806int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ; 804int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ;
807 805
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index ed2bd6701f9b..02fc91c68027 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -566,6 +566,10 @@ static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
566 ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr, 566 ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
567 sizeof (struct mlx4_ib_proxy_sqp_hdr), 567 sizeof (struct mlx4_ib_proxy_sqp_hdr),
568 DMA_FROM_DEVICE); 568 DMA_FROM_DEVICE);
569 if (ib_dma_mapping_error(dev, qp->sqp_proxy_rcv[i].map)) {
570 kfree(qp->sqp_proxy_rcv[i].addr);
571 goto err;
572 }
569 } 573 }
570 return 0; 574 return 0;
571 575
@@ -2605,8 +2609,7 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
2605 2609
2606 memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen); 2610 memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
2607 2611
2608 *lso_hdr_sz = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 | 2612 *lso_hdr_sz = cpu_to_be32(wr->wr.ud.mss << 16 | wr->wr.ud.hlen);
2609 wr->wr.ud.hlen);
2610 *lso_seg_len = halign; 2613 *lso_seg_len = halign;
2611 return 0; 2614 return 0;
2612} 2615}
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index d10c2b8a5dad..6797108ce873 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -46,21 +46,17 @@
46static ssize_t show_admin_alias_guid(struct device *dev, 46static ssize_t show_admin_alias_guid(struct device *dev,
47 struct device_attribute *attr, char *buf) 47 struct device_attribute *attr, char *buf)
48{ 48{
49 int record_num;/*0-15*/
50 int guid_index_in_rec; /*0 - 7*/
51 struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry = 49 struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
52 container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry); 50 container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
53 struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; 51 struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
54 struct mlx4_ib_dev *mdev = port->dev; 52 struct mlx4_ib_dev *mdev = port->dev;
53 __be64 sysadmin_ag_val;
55 54
56 record_num = mlx4_ib_iov_dentry->entry_num / 8 ; 55 sysadmin_ag_val = mlx4_get_admin_guid(mdev->dev,
57 guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ; 56 mlx4_ib_iov_dentry->entry_num,
57 port->num);
58 58
59 return sprintf(buf, "%llx\n", 59 return sprintf(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val));
60 be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid.
61 ports_guid[port->num - 1].
62 all_rec_per_port[record_num].
63 all_recs[8 * guid_index_in_rec]));
64} 60}
65 61
66/* store_admin_alias_guid stores the (new) administratively assigned value of that GUID. 62/* store_admin_alias_guid stores the (new) administratively assigned value of that GUID.
@@ -80,6 +76,7 @@ static ssize_t store_admin_alias_guid(struct device *dev,
80 struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; 76 struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
81 struct mlx4_ib_dev *mdev = port->dev; 77 struct mlx4_ib_dev *mdev = port->dev;
82 u64 sysadmin_ag_val; 78 u64 sysadmin_ag_val;
79 unsigned long flags;
83 80
84 record_num = mlx4_ib_iov_dentry->entry_num / 8; 81 record_num = mlx4_ib_iov_dentry->entry_num / 8;
85 guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8; 82 guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8;
@@ -87,6 +84,7 @@ static ssize_t store_admin_alias_guid(struct device *dev,
87 pr_err("GUID 0 block 0 is RO\n"); 84 pr_err("GUID 0 block 0 is RO\n");
88 return count; 85 return count;
89 } 86 }
87 spin_lock_irqsave(&mdev->sriov.alias_guid.ag_work_lock, flags);
90 sscanf(buf, "%llx", &sysadmin_ag_val); 88 sscanf(buf, "%llx", &sysadmin_ag_val);
91 *(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1]. 89 *(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1].
92 all_rec_per_port[record_num]. 90 all_rec_per_port[record_num].
@@ -96,33 +94,15 @@ static ssize_t store_admin_alias_guid(struct device *dev,
96 /* Change the state to be pending for update */ 94 /* Change the state to be pending for update */
97 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status 95 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status
98 = MLX4_GUID_INFO_STATUS_IDLE ; 96 = MLX4_GUID_INFO_STATUS_IDLE ;
99 97 mlx4_set_admin_guid(mdev->dev, cpu_to_be64(sysadmin_ag_val),
100 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method 98 mlx4_ib_iov_dentry->entry_num,
101 = MLX4_GUID_INFO_RECORD_SET; 99 port->num);
102
103 switch (sysadmin_ag_val) {
104 case MLX4_GUID_FOR_DELETE_VAL:
105 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
106 = MLX4_GUID_INFO_RECORD_DELETE;
107 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
108 = MLX4_GUID_SYSADMIN_ASSIGN;
109 break;
110 /* The sysadmin requests the SM to re-assign */
111 case MLX4_NOT_SET_GUID:
112 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
113 = MLX4_GUID_DRIVER_ASSIGN;
114 break;
115 /* The sysadmin requests a specific value.*/
116 default:
117 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
118 = MLX4_GUID_SYSADMIN_ASSIGN;
119 break;
120 }
121 100
122 /* set the record index */ 101 /* set the record index */
123 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes 102 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes
124 = mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec); 103 |= mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
125 104
105 spin_unlock_irqrestore(&mdev->sriov.alias_guid.ag_work_lock, flags);
126 mlx4_ib_init_alias_guid_work(mdev, port->num - 1); 106 mlx4_ib_init_alias_guid_work(mdev, port->num - 1);
127 107
128 return count; 108 return count;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index d7562beb5423..bd94b0a6e9e5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -87,7 +87,6 @@ enum {
87 IPOIB_FLAG_ADMIN_UP = 2, 87 IPOIB_FLAG_ADMIN_UP = 2,
88 IPOIB_PKEY_ASSIGNED = 3, 88 IPOIB_PKEY_ASSIGNED = 3,
89 IPOIB_FLAG_SUBINTERFACE = 5, 89 IPOIB_FLAG_SUBINTERFACE = 5,
90 IPOIB_MCAST_RUN = 6,
91 IPOIB_STOP_REAPER = 7, 90 IPOIB_STOP_REAPER = 7,
92 IPOIB_FLAG_ADMIN_CM = 9, 91 IPOIB_FLAG_ADMIN_CM = 9,
93 IPOIB_FLAG_UMCAST = 10, 92 IPOIB_FLAG_UMCAST = 10,
@@ -98,9 +97,15 @@ enum {
98 97
99 IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */ 98 IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */
100 IPOIB_MCAST_FLAG_SENDONLY = 1, 99 IPOIB_MCAST_FLAG_SENDONLY = 1,
101 IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ 100 /*
101 * For IPOIB_MCAST_FLAG_BUSY
102 * When set, in flight join and mcast->mc is unreliable
103 * When clear and mcast->mc IS_ERR_OR_NULL, need to restart or
104 * haven't started yet
105 * When clear and mcast->mc is valid pointer, join was successful
106 */
107 IPOIB_MCAST_FLAG_BUSY = 2,
102 IPOIB_MCAST_FLAG_ATTACHED = 3, 108 IPOIB_MCAST_FLAG_ATTACHED = 3,
103 IPOIB_MCAST_JOIN_STARTED = 4,
104 109
105 MAX_SEND_CQE = 16, 110 MAX_SEND_CQE = 16,
106 IPOIB_CM_COPYBREAK = 256, 111 IPOIB_CM_COPYBREAK = 256,
@@ -148,6 +153,7 @@ struct ipoib_mcast {
148 153
149 unsigned long created; 154 unsigned long created;
150 unsigned long backoff; 155 unsigned long backoff;
156 unsigned long delay_until;
151 157
152 unsigned long flags; 158 unsigned long flags;
153 unsigned char logcount; 159 unsigned char logcount;
@@ -292,6 +298,11 @@ struct ipoib_neigh_table {
292 struct completion deleted; 298 struct completion deleted;
293}; 299};
294 300
301struct ipoib_qp_state_validate {
302 struct work_struct work;
303 struct ipoib_dev_priv *priv;
304};
305
295/* 306/*
296 * Device private locking: network stack tx_lock protects members used 307 * Device private locking: network stack tx_lock protects members used
297 * in TX fast path, lock protects everything else. lock nests inside 308 * in TX fast path, lock protects everything else. lock nests inside
@@ -317,6 +328,7 @@ struct ipoib_dev_priv {
317 struct list_head multicast_list; 328 struct list_head multicast_list;
318 struct rb_root multicast_tree; 329 struct rb_root multicast_tree;
319 330
331 struct workqueue_struct *wq;
320 struct delayed_work mcast_task; 332 struct delayed_work mcast_task;
321 struct work_struct carrier_on_task; 333 struct work_struct carrier_on_task;
322 struct work_struct flush_light; 334 struct work_struct flush_light;
@@ -426,11 +438,6 @@ struct ipoib_neigh {
426#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) 438#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN)
427#define IPOIB_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES) 439#define IPOIB_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES)
428 440
429static inline int ipoib_ud_need_sg(unsigned int ib_mtu)
430{
431 return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE;
432}
433
434void ipoib_neigh_dtor(struct ipoib_neigh *neigh); 441void ipoib_neigh_dtor(struct ipoib_neigh *neigh);
435static inline void ipoib_neigh_put(struct ipoib_neigh *neigh) 442static inline void ipoib_neigh_put(struct ipoib_neigh *neigh)
436{ 443{
@@ -477,10 +484,10 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work);
477void ipoib_pkey_event(struct work_struct *work); 484void ipoib_pkey_event(struct work_struct *work);
478void ipoib_ib_dev_cleanup(struct net_device *dev); 485void ipoib_ib_dev_cleanup(struct net_device *dev);
479 486
480int ipoib_ib_dev_open(struct net_device *dev, int flush); 487int ipoib_ib_dev_open(struct net_device *dev);
481int ipoib_ib_dev_up(struct net_device *dev); 488int ipoib_ib_dev_up(struct net_device *dev);
482int ipoib_ib_dev_down(struct net_device *dev, int flush); 489int ipoib_ib_dev_down(struct net_device *dev);
483int ipoib_ib_dev_stop(struct net_device *dev, int flush); 490int ipoib_ib_dev_stop(struct net_device *dev);
484void ipoib_pkey_dev_check_presence(struct net_device *dev); 491void ipoib_pkey_dev_check_presence(struct net_device *dev);
485 492
486int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); 493int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
@@ -492,7 +499,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
492 499
493void ipoib_mcast_restart_task(struct work_struct *work); 500void ipoib_mcast_restart_task(struct work_struct *work);
494int ipoib_mcast_start_thread(struct net_device *dev); 501int ipoib_mcast_start_thread(struct net_device *dev);
495int ipoib_mcast_stop_thread(struct net_device *dev, int flush); 502int ipoib_mcast_stop_thread(struct net_device *dev);
496 503
497void ipoib_mcast_dev_down(struct net_device *dev); 504void ipoib_mcast_dev_down(struct net_device *dev);
498void ipoib_mcast_dev_flush(struct net_device *dev); 505void ipoib_mcast_dev_flush(struct net_device *dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 933efcea0d03..56959adb6c7d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -474,7 +474,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
474 } 474 }
475 475
476 spin_lock_irq(&priv->lock); 476 spin_lock_irq(&priv->lock);
477 queue_delayed_work(ipoib_workqueue, 477 queue_delayed_work(priv->wq,
478 &priv->cm.stale_task, IPOIB_CM_RX_DELAY); 478 &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
479 /* Add this entry to passive ids list head, but do not re-add it 479 /* Add this entry to passive ids list head, but do not re-add it
480 * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */ 480 * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
@@ -576,7 +576,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
576 spin_lock_irqsave(&priv->lock, flags); 576 spin_lock_irqsave(&priv->lock, flags);
577 list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); 577 list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
578 ipoib_cm_start_rx_drain(priv); 578 ipoib_cm_start_rx_drain(priv);
579 queue_work(ipoib_workqueue, &priv->cm.rx_reap_task); 579 queue_work(priv->wq, &priv->cm.rx_reap_task);
580 spin_unlock_irqrestore(&priv->lock, flags); 580 spin_unlock_irqrestore(&priv->lock, flags);
581 } else 581 } else
582 ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", 582 ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
@@ -603,7 +603,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
603 spin_lock_irqsave(&priv->lock, flags); 603 spin_lock_irqsave(&priv->lock, flags);
604 list_move(&p->list, &priv->cm.rx_reap_list); 604 list_move(&p->list, &priv->cm.rx_reap_list);
605 spin_unlock_irqrestore(&priv->lock, flags); 605 spin_unlock_irqrestore(&priv->lock, flags);
606 queue_work(ipoib_workqueue, &priv->cm.rx_reap_task); 606 queue_work(priv->wq, &priv->cm.rx_reap_task);
607 } 607 }
608 return; 608 return;
609 } 609 }
@@ -827,7 +827,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
827 827
828 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 828 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
829 list_move(&tx->list, &priv->cm.reap_list); 829 list_move(&tx->list, &priv->cm.reap_list);
830 queue_work(ipoib_workqueue, &priv->cm.reap_task); 830 queue_work(priv->wq, &priv->cm.reap_task);
831 } 831 }
832 832
833 clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags); 833 clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
@@ -1255,7 +1255,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
1255 1255
1256 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 1256 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
1257 list_move(&tx->list, &priv->cm.reap_list); 1257 list_move(&tx->list, &priv->cm.reap_list);
1258 queue_work(ipoib_workqueue, &priv->cm.reap_task); 1258 queue_work(priv->wq, &priv->cm.reap_task);
1259 } 1259 }
1260 1260
1261 spin_unlock_irqrestore(&priv->lock, flags); 1261 spin_unlock_irqrestore(&priv->lock, flags);
@@ -1284,7 +1284,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
1284 tx->dev = dev; 1284 tx->dev = dev;
1285 list_add(&tx->list, &priv->cm.start_list); 1285 list_add(&tx->list, &priv->cm.start_list);
1286 set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags); 1286 set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
1287 queue_work(ipoib_workqueue, &priv->cm.start_task); 1287 queue_work(priv->wq, &priv->cm.start_task);
1288 return tx; 1288 return tx;
1289} 1289}
1290 1290
@@ -1295,7 +1295,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
1295 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 1295 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
1296 spin_lock_irqsave(&priv->lock, flags); 1296 spin_lock_irqsave(&priv->lock, flags);
1297 list_move(&tx->list, &priv->cm.reap_list); 1297 list_move(&tx->list, &priv->cm.reap_list);
1298 queue_work(ipoib_workqueue, &priv->cm.reap_task); 1298 queue_work(priv->wq, &priv->cm.reap_task);
1299 ipoib_dbg(priv, "Reap connection for gid %pI6\n", 1299 ipoib_dbg(priv, "Reap connection for gid %pI6\n",
1300 tx->neigh->daddr + 4); 1300 tx->neigh->daddr + 4);
1301 tx->neigh = NULL; 1301 tx->neigh = NULL;
@@ -1417,7 +1417,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
1417 1417
1418 skb_queue_tail(&priv->cm.skb_queue, skb); 1418 skb_queue_tail(&priv->cm.skb_queue, skb);
1419 if (e) 1419 if (e)
1420 queue_work(ipoib_workqueue, &priv->cm.skb_task); 1420 queue_work(priv->wq, &priv->cm.skb_task);
1421} 1421}
1422 1422
1423static void ipoib_cm_rx_reap(struct work_struct *work) 1423static void ipoib_cm_rx_reap(struct work_struct *work)
@@ -1450,7 +1450,7 @@ static void ipoib_cm_stale_task(struct work_struct *work)
1450 } 1450 }
1451 1451
1452 if (!list_empty(&priv->cm.passive_ids)) 1452 if (!list_empty(&priv->cm.passive_ids))
1453 queue_delayed_work(ipoib_workqueue, 1453 queue_delayed_work(priv->wq,
1454 &priv->cm.stale_task, IPOIB_CM_RX_DELAY); 1454 &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
1455 spin_unlock_irq(&priv->lock); 1455 spin_unlock_irq(&priv->lock);
1456} 1456}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 72626c348174..63b92cbb29ad 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -94,39 +94,9 @@ void ipoib_free_ah(struct kref *kref)
94static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv, 94static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
95 u64 mapping[IPOIB_UD_RX_SG]) 95 u64 mapping[IPOIB_UD_RX_SG])
96{ 96{
97 if (ipoib_ud_need_sg(priv->max_ib_mtu)) { 97 ib_dma_unmap_single(priv->ca, mapping[0],
98 ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_UD_HEAD_SIZE, 98 IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
99 DMA_FROM_DEVICE); 99 DMA_FROM_DEVICE);
100 ib_dma_unmap_page(priv->ca, mapping[1], PAGE_SIZE,
101 DMA_FROM_DEVICE);
102 } else
103 ib_dma_unmap_single(priv->ca, mapping[0],
104 IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
105 DMA_FROM_DEVICE);
106}
107
108static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv,
109 struct sk_buff *skb,
110 unsigned int length)
111{
112 if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
113 skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
114 unsigned int size;
115 /*
116 * There is only two buffers needed for max_payload = 4K,
117 * first buf size is IPOIB_UD_HEAD_SIZE
118 */
119 skb->tail += IPOIB_UD_HEAD_SIZE;
120 skb->len += length;
121
122 size = length - IPOIB_UD_HEAD_SIZE;
123
124 skb_frag_size_set(frag, size);
125 skb->data_len += size;
126 skb->truesize += PAGE_SIZE;
127 } else
128 skb_put(skb, length);
129
130} 100}
131 101
132static int ipoib_ib_post_receive(struct net_device *dev, int id) 102static int ipoib_ib_post_receive(struct net_device *dev, int id)
@@ -156,18 +126,11 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
156 struct ipoib_dev_priv *priv = netdev_priv(dev); 126 struct ipoib_dev_priv *priv = netdev_priv(dev);
157 struct sk_buff *skb; 127 struct sk_buff *skb;
158 int buf_size; 128 int buf_size;
159 int tailroom;
160 u64 *mapping; 129 u64 *mapping;
161 130
162 if (ipoib_ud_need_sg(priv->max_ib_mtu)) { 131 buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
163 buf_size = IPOIB_UD_HEAD_SIZE;
164 tailroom = 128; /* reserve some tailroom for IP/TCP headers */
165 } else {
166 buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
167 tailroom = 0;
168 }
169 132
170 skb = dev_alloc_skb(buf_size + tailroom + 4); 133 skb = dev_alloc_skb(buf_size + IPOIB_ENCAP_LEN);
171 if (unlikely(!skb)) 134 if (unlikely(!skb))
172 return NULL; 135 return NULL;
173 136
@@ -184,23 +147,8 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
184 if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) 147 if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0])))
185 goto error; 148 goto error;
186 149
187 if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
188 struct page *page = alloc_page(GFP_ATOMIC);
189 if (!page)
190 goto partial_error;
191 skb_fill_page_desc(skb, 0, page, 0, PAGE_SIZE);
192 mapping[1] =
193 ib_dma_map_page(priv->ca, page,
194 0, PAGE_SIZE, DMA_FROM_DEVICE);
195 if (unlikely(ib_dma_mapping_error(priv->ca, mapping[1])))
196 goto partial_error;
197 }
198
199 priv->rx_ring[id].skb = skb; 150 priv->rx_ring[id].skb = skb;
200 return skb; 151 return skb;
201
202partial_error:
203 ib_dma_unmap_single(priv->ca, mapping[0], buf_size, DMA_FROM_DEVICE);
204error: 152error:
205 dev_kfree_skb_any(skb); 153 dev_kfree_skb_any(skb);
206 return NULL; 154 return NULL;
@@ -278,7 +226,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
278 wc->byte_len, wc->slid); 226 wc->byte_len, wc->slid);
279 227
280 ipoib_ud_dma_unmap_rx(priv, mapping); 228 ipoib_ud_dma_unmap_rx(priv, mapping);
281 ipoib_ud_skb_put_frags(priv, skb, wc->byte_len); 229
230 skb_put(skb, wc->byte_len);
282 231
283 /* First byte of dgid signals multicast when 0xff */ 232 /* First byte of dgid signals multicast when 0xff */
284 dgid = &((struct ib_grh *)skb->data)->dgid; 233 dgid = &((struct ib_grh *)skb->data)->dgid;
@@ -296,6 +245,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
296 skb_reset_mac_header(skb); 245 skb_reset_mac_header(skb);
297 skb_pull(skb, IPOIB_ENCAP_LEN); 246 skb_pull(skb, IPOIB_ENCAP_LEN);
298 247
248 skb->truesize = SKB_TRUESIZE(skb->len);
249
299 ++dev->stats.rx_packets; 250 ++dev->stats.rx_packets;
300 dev->stats.rx_bytes += skb->len; 251 dev->stats.rx_bytes += skb->len;
301 252
@@ -376,6 +327,51 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca,
376 } 327 }
377} 328}
378 329
330/*
331 * As the result of a completion error the QP Can be transferred to SQE states.
332 * The function checks if the (send)QP is in SQE state and
333 * moves it back to RTS state, that in order to have it functional again.
334 */
335static void ipoib_qp_state_validate_work(struct work_struct *work)
336{
337 struct ipoib_qp_state_validate *qp_work =
338 container_of(work, struct ipoib_qp_state_validate, work);
339
340 struct ipoib_dev_priv *priv = qp_work->priv;
341 struct ib_qp_attr qp_attr;
342 struct ib_qp_init_attr query_init_attr;
343 int ret;
344
345 ret = ib_query_qp(priv->qp, &qp_attr, IB_QP_STATE, &query_init_attr);
346 if (ret) {
347 ipoib_warn(priv, "%s: Failed to query QP ret: %d\n",
348 __func__, ret);
349 goto free_res;
350 }
351 pr_info("%s: QP: 0x%x is in state: %d\n",
352 __func__, priv->qp->qp_num, qp_attr.qp_state);
353
354 /* currently support only in SQE->RTS transition*/
355 if (qp_attr.qp_state == IB_QPS_SQE) {
356 qp_attr.qp_state = IB_QPS_RTS;
357
358 ret = ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE);
359 if (ret) {
360 pr_warn("failed(%d) modify QP:0x%x SQE->RTS\n",
361 ret, priv->qp->qp_num);
362 goto free_res;
363 }
364 pr_info("%s: QP: 0x%x moved from IB_QPS_SQE to IB_QPS_RTS\n",
365 __func__, priv->qp->qp_num);
366 } else {
367 pr_warn("QP (%d) will stay in state: %d\n",
368 priv->qp->qp_num, qp_attr.qp_state);
369 }
370
371free_res:
372 kfree(qp_work);
373}
374
379static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) 375static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
380{ 376{
381 struct ipoib_dev_priv *priv = netdev_priv(dev); 377 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -407,10 +403,22 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
407 netif_wake_queue(dev); 403 netif_wake_queue(dev);
408 404
409 if (wc->status != IB_WC_SUCCESS && 405 if (wc->status != IB_WC_SUCCESS &&
410 wc->status != IB_WC_WR_FLUSH_ERR) 406 wc->status != IB_WC_WR_FLUSH_ERR) {
407 struct ipoib_qp_state_validate *qp_work;
411 ipoib_warn(priv, "failed send event " 408 ipoib_warn(priv, "failed send event "
412 "(status=%d, wrid=%d vend_err %x)\n", 409 "(status=%d, wrid=%d vend_err %x)\n",
413 wc->status, wr_id, wc->vendor_err); 410 wc->status, wr_id, wc->vendor_err);
411 qp_work = kzalloc(sizeof(*qp_work), GFP_ATOMIC);
412 if (!qp_work) {
413 ipoib_warn(priv, "%s Failed alloc ipoib_qp_state_validate for qp: 0x%x\n",
414 __func__, priv->qp->qp_num);
415 return;
416 }
417
418 INIT_WORK(&qp_work->work, ipoib_qp_state_validate_work);
419 qp_work->priv = priv;
420 queue_work(priv->wq, &qp_work->work);
421 }
414} 422}
415 423
416static int poll_tx(struct ipoib_dev_priv *priv) 424static int poll_tx(struct ipoib_dev_priv *priv)
@@ -655,16 +663,33 @@ void ipoib_reap_ah(struct work_struct *work)
655 __ipoib_reap_ah(dev); 663 __ipoib_reap_ah(dev);
656 664
657 if (!test_bit(IPOIB_STOP_REAPER, &priv->flags)) 665 if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
658 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, 666 queue_delayed_work(priv->wq, &priv->ah_reap_task,
659 round_jiffies_relative(HZ)); 667 round_jiffies_relative(HZ));
660} 668}
661 669
670static void ipoib_flush_ah(struct net_device *dev)
671{
672 struct ipoib_dev_priv *priv = netdev_priv(dev);
673
674 cancel_delayed_work(&priv->ah_reap_task);
675 flush_workqueue(priv->wq);
676 ipoib_reap_ah(&priv->ah_reap_task.work);
677}
678
679static void ipoib_stop_ah(struct net_device *dev)
680{
681 struct ipoib_dev_priv *priv = netdev_priv(dev);
682
683 set_bit(IPOIB_STOP_REAPER, &priv->flags);
684 ipoib_flush_ah(dev);
685}
686
662static void ipoib_ib_tx_timer_func(unsigned long ctx) 687static void ipoib_ib_tx_timer_func(unsigned long ctx)
663{ 688{
664 drain_tx_cq((struct net_device *)ctx); 689 drain_tx_cq((struct net_device *)ctx);
665} 690}
666 691
667int ipoib_ib_dev_open(struct net_device *dev, int flush) 692int ipoib_ib_dev_open(struct net_device *dev)
668{ 693{
669 struct ipoib_dev_priv *priv = netdev_priv(dev); 694 struct ipoib_dev_priv *priv = netdev_priv(dev);
670 int ret; 695 int ret;
@@ -696,7 +721,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
696 } 721 }
697 722
698 clear_bit(IPOIB_STOP_REAPER, &priv->flags); 723 clear_bit(IPOIB_STOP_REAPER, &priv->flags);
699 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, 724 queue_delayed_work(priv->wq, &priv->ah_reap_task,
700 round_jiffies_relative(HZ)); 725 round_jiffies_relative(HZ));
701 726
702 if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) 727 if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
@@ -706,7 +731,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
706dev_stop: 731dev_stop:
707 if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) 732 if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
708 napi_enable(&priv->napi); 733 napi_enable(&priv->napi);
709 ipoib_ib_dev_stop(dev, flush); 734 ipoib_ib_dev_stop(dev);
710 return -1; 735 return -1;
711} 736}
712 737
@@ -738,7 +763,7 @@ int ipoib_ib_dev_up(struct net_device *dev)
738 return ipoib_mcast_start_thread(dev); 763 return ipoib_mcast_start_thread(dev);
739} 764}
740 765
741int ipoib_ib_dev_down(struct net_device *dev, int flush) 766int ipoib_ib_dev_down(struct net_device *dev)
742{ 767{
743 struct ipoib_dev_priv *priv = netdev_priv(dev); 768 struct ipoib_dev_priv *priv = netdev_priv(dev);
744 769
@@ -747,7 +772,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
747 clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags); 772 clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
748 netif_carrier_off(dev); 773 netif_carrier_off(dev);
749 774
750 ipoib_mcast_stop_thread(dev, flush); 775 ipoib_mcast_stop_thread(dev);
751 ipoib_mcast_dev_flush(dev); 776 ipoib_mcast_dev_flush(dev);
752 777
753 ipoib_flush_paths(dev); 778 ipoib_flush_paths(dev);
@@ -807,7 +832,7 @@ void ipoib_drain_cq(struct net_device *dev)
807 local_bh_enable(); 832 local_bh_enable();
808} 833}
809 834
810int ipoib_ib_dev_stop(struct net_device *dev, int flush) 835int ipoib_ib_dev_stop(struct net_device *dev)
811{ 836{
812 struct ipoib_dev_priv *priv = netdev_priv(dev); 837 struct ipoib_dev_priv *priv = netdev_priv(dev);
813 struct ib_qp_attr qp_attr; 838 struct ib_qp_attr qp_attr;
@@ -877,24 +902,7 @@ timeout:
877 if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) 902 if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
878 ipoib_warn(priv, "Failed to modify QP to RESET state\n"); 903 ipoib_warn(priv, "Failed to modify QP to RESET state\n");
879 904
880 /* Wait for all AHs to be reaped */ 905 ipoib_flush_ah(dev);
881 set_bit(IPOIB_STOP_REAPER, &priv->flags);
882 cancel_delayed_work(&priv->ah_reap_task);
883 if (flush)
884 flush_workqueue(ipoib_workqueue);
885
886 begin = jiffies;
887
888 while (!list_empty(&priv->dead_ahs)) {
889 __ipoib_reap_ah(dev);
890
891 if (time_after(jiffies, begin + HZ)) {
892 ipoib_warn(priv, "timing out; will leak address handles\n");
893 break;
894 }
895
896 msleep(1);
897 }
898 906
899 ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP); 907 ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
900 908
@@ -918,7 +926,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
918 (unsigned long) dev); 926 (unsigned long) dev);
919 927
920 if (dev->flags & IFF_UP) { 928 if (dev->flags & IFF_UP) {
921 if (ipoib_ib_dev_open(dev, 1)) { 929 if (ipoib_ib_dev_open(dev)) {
922 ipoib_transport_dev_cleanup(dev); 930 ipoib_transport_dev_cleanup(dev);
923 return -ENODEV; 931 return -ENODEV;
924 } 932 }
@@ -1037,15 +1045,16 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
1037 if (level == IPOIB_FLUSH_LIGHT) { 1045 if (level == IPOIB_FLUSH_LIGHT) {
1038 ipoib_mark_paths_invalid(dev); 1046 ipoib_mark_paths_invalid(dev);
1039 ipoib_mcast_dev_flush(dev); 1047 ipoib_mcast_dev_flush(dev);
1048 ipoib_flush_ah(dev);
1040 } 1049 }
1041 1050
1042 if (level >= IPOIB_FLUSH_NORMAL) 1051 if (level >= IPOIB_FLUSH_NORMAL)
1043 ipoib_ib_dev_down(dev, 0); 1052 ipoib_ib_dev_down(dev);
1044 1053
1045 if (level == IPOIB_FLUSH_HEAVY) { 1054 if (level == IPOIB_FLUSH_HEAVY) {
1046 if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) 1055 if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
1047 ipoib_ib_dev_stop(dev, 0); 1056 ipoib_ib_dev_stop(dev);
1048 if (ipoib_ib_dev_open(dev, 0) != 0) 1057 if (ipoib_ib_dev_open(dev) != 0)
1049 return; 1058 return;
1050 if (netif_queue_stopped(dev)) 1059 if (netif_queue_stopped(dev))
1051 netif_start_queue(dev); 1060 netif_start_queue(dev);
@@ -1097,9 +1106,17 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
1097 */ 1106 */
1098 ipoib_flush_paths(dev); 1107 ipoib_flush_paths(dev);
1099 1108
1100 ipoib_mcast_stop_thread(dev, 1); 1109 ipoib_mcast_stop_thread(dev);
1101 ipoib_mcast_dev_flush(dev); 1110 ipoib_mcast_dev_flush(dev);
1102 1111
1112 /*
1113 * All of our ah references aren't free until after
1114 * ipoib_mcast_dev_flush(), ipoib_flush_paths, and
1115 * the neighbor garbage collection is stopped and reaped.
1116 * That should all be done now, so make a final ah flush.
1117 */
1118 ipoib_stop_ah(dev);
1119
1103 ipoib_transport_dev_cleanup(dev); 1120 ipoib_transport_dev_cleanup(dev);
1104} 1121}
1105 1122
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 915ad04a827e..9e1b203d756d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,7 +108,7 @@ int ipoib_open(struct net_device *dev)
108 108
109 set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 109 set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
110 110
111 if (ipoib_ib_dev_open(dev, 1)) { 111 if (ipoib_ib_dev_open(dev)) {
112 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) 112 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
113 return 0; 113 return 0;
114 goto err_disable; 114 goto err_disable;
@@ -139,7 +139,7 @@ int ipoib_open(struct net_device *dev)
139 return 0; 139 return 0;
140 140
141err_stop: 141err_stop:
142 ipoib_ib_dev_stop(dev, 1); 142 ipoib_ib_dev_stop(dev);
143 143
144err_disable: 144err_disable:
145 clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 145 clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
@@ -157,8 +157,8 @@ static int ipoib_stop(struct net_device *dev)
157 157
158 netif_stop_queue(dev); 158 netif_stop_queue(dev);
159 159
160 ipoib_ib_dev_down(dev, 1); 160 ipoib_ib_dev_down(dev);
161 ipoib_ib_dev_stop(dev, 0); 161 ipoib_ib_dev_stop(dev);
162 162
163 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 163 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
164 struct ipoib_dev_priv *cpriv; 164 struct ipoib_dev_priv *cpriv;
@@ -640,8 +640,10 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
640 640
641 if (!path->query && path_rec_start(dev, path)) 641 if (!path->query && path_rec_start(dev, path))
642 goto err_path; 642 goto err_path;
643 643 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
644 __skb_queue_tail(&neigh->queue, skb); 644 __skb_queue_tail(&neigh->queue, skb);
645 else
646 goto err_drop;
645 } 647 }
646 648
647 spin_unlock_irqrestore(&priv->lock, flags); 649 spin_unlock_irqrestore(&priv->lock, flags);
@@ -676,7 +678,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
676 new_path = 1; 678 new_path = 1;
677 } 679 }
678 if (path) { 680 if (path) {
679 __skb_queue_tail(&path->queue, skb); 681 if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
682 __skb_queue_tail(&path->queue, skb);
683 } else {
684 ++dev->stats.tx_dropped;
685 dev_kfree_skb_any(skb);
686 }
680 687
681 if (!path->query && path_rec_start(dev, path)) { 688 if (!path->query && path_rec_start(dev, path)) {
682 spin_unlock_irqrestore(&priv->lock, flags); 689 spin_unlock_irqrestore(&priv->lock, flags);
@@ -839,7 +846,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
839 return; 846 return;
840 } 847 }
841 848
842 queue_work(ipoib_workqueue, &priv->restart_task); 849 queue_work(priv->wq, &priv->restart_task);
843} 850}
844 851
845static int ipoib_get_iflink(const struct net_device *dev) 852static int ipoib_get_iflink(const struct net_device *dev)
@@ -966,7 +973,7 @@ static void ipoib_reap_neigh(struct work_struct *work)
966 __ipoib_reap_neigh(priv); 973 __ipoib_reap_neigh(priv);
967 974
968 if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) 975 if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
969 queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task, 976 queue_delayed_work(priv->wq, &priv->neigh_reap_task,
970 arp_tbl.gc_interval); 977 arp_tbl.gc_interval);
971} 978}
972 979
@@ -1145,7 +1152,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
1145 1152
1146 /* start garbage collection */ 1153 /* start garbage collection */
1147 clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); 1154 clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1148 queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task, 1155 queue_delayed_work(priv->wq, &priv->neigh_reap_task,
1149 arp_tbl.gc_interval); 1156 arp_tbl.gc_interval);
1150 1157
1151 return 0; 1158 return 0;
@@ -1274,15 +1281,13 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
1274{ 1281{
1275 struct ipoib_dev_priv *priv = netdev_priv(dev); 1282 struct ipoib_dev_priv *priv = netdev_priv(dev);
1276 1283
1277 if (ipoib_neigh_hash_init(priv) < 0)
1278 goto out;
1279 /* Allocate RX/TX "rings" to hold queued skbs */ 1284 /* Allocate RX/TX "rings" to hold queued skbs */
1280 priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, 1285 priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
1281 GFP_KERNEL); 1286 GFP_KERNEL);
1282 if (!priv->rx_ring) { 1287 if (!priv->rx_ring) {
1283 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", 1288 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
1284 ca->name, ipoib_recvq_size); 1289 ca->name, ipoib_recvq_size);
1285 goto out_neigh_hash_cleanup; 1290 goto out;
1286 } 1291 }
1287 1292
1288 priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring); 1293 priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
@@ -1297,16 +1302,24 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
1297 if (ipoib_ib_dev_init(dev, ca, port)) 1302 if (ipoib_ib_dev_init(dev, ca, port))
1298 goto out_tx_ring_cleanup; 1303 goto out_tx_ring_cleanup;
1299 1304
1305 /*
1306 * Must be after ipoib_ib_dev_init so we can allocate a per
1307 * device wq there and use it here
1308 */
1309 if (ipoib_neigh_hash_init(priv) < 0)
1310 goto out_dev_uninit;
1311
1300 return 0; 1312 return 0;
1301 1313
1314out_dev_uninit:
1315 ipoib_ib_dev_cleanup(dev);
1316
1302out_tx_ring_cleanup: 1317out_tx_ring_cleanup:
1303 vfree(priv->tx_ring); 1318 vfree(priv->tx_ring);
1304 1319
1305out_rx_ring_cleanup: 1320out_rx_ring_cleanup:
1306 kfree(priv->rx_ring); 1321 kfree(priv->rx_ring);
1307 1322
1308out_neigh_hash_cleanup:
1309 ipoib_neigh_hash_uninit(dev);
1310out: 1323out:
1311 return -ENOMEM; 1324 return -ENOMEM;
1312} 1325}
@@ -1329,6 +1342,12 @@ void ipoib_dev_cleanup(struct net_device *dev)
1329 } 1342 }
1330 unregister_netdevice_many(&head); 1343 unregister_netdevice_many(&head);
1331 1344
1345 /*
1346 * Must be before ipoib_ib_dev_cleanup or we delete an in use
1347 * work queue
1348 */
1349 ipoib_neigh_hash_uninit(dev);
1350
1332 ipoib_ib_dev_cleanup(dev); 1351 ipoib_ib_dev_cleanup(dev);
1333 1352
1334 kfree(priv->rx_ring); 1353 kfree(priv->rx_ring);
@@ -1336,8 +1355,6 @@ void ipoib_dev_cleanup(struct net_device *dev)
1336 1355
1337 priv->rx_ring = NULL; 1356 priv->rx_ring = NULL;
1338 priv->tx_ring = NULL; 1357 priv->tx_ring = NULL;
1339
1340 ipoib_neigh_hash_uninit(dev);
1341} 1358}
1342 1359
1343static const struct header_ops ipoib_header_ops = { 1360static const struct header_ops ipoib_header_ops = {
@@ -1646,10 +1663,11 @@ sysfs_failed:
1646 1663
1647register_failed: 1664register_failed:
1648 ib_unregister_event_handler(&priv->event_handler); 1665 ib_unregister_event_handler(&priv->event_handler);
1666 flush_workqueue(ipoib_workqueue);
1649 /* Stop GC if started before flush */ 1667 /* Stop GC if started before flush */
1650 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); 1668 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1651 cancel_delayed_work(&priv->neigh_reap_task); 1669 cancel_delayed_work(&priv->neigh_reap_task);
1652 flush_workqueue(ipoib_workqueue); 1670 flush_workqueue(priv->wq);
1653 1671
1654event_failed: 1672event_failed:
1655 ipoib_dev_cleanup(priv->dev); 1673 ipoib_dev_cleanup(priv->dev);
@@ -1712,6 +1730,7 @@ static void ipoib_remove_one(struct ib_device *device)
1712 1730
1713 list_for_each_entry_safe(priv, tmp, dev_list, list) { 1731 list_for_each_entry_safe(priv, tmp, dev_list, list) {
1714 ib_unregister_event_handler(&priv->event_handler); 1732 ib_unregister_event_handler(&priv->event_handler);
1733 flush_workqueue(ipoib_workqueue);
1715 1734
1716 rtnl_lock(); 1735 rtnl_lock();
1717 dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); 1736 dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
@@ -1720,7 +1739,7 @@ static void ipoib_remove_one(struct ib_device *device)
1720 /* Stop GC */ 1739 /* Stop GC */
1721 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); 1740 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1722 cancel_delayed_work(&priv->neigh_reap_task); 1741 cancel_delayed_work(&priv->neigh_reap_task);
1723 flush_workqueue(ipoib_workqueue); 1742 flush_workqueue(priv->wq);
1724 1743
1725 unregister_netdev(priv->dev); 1744 unregister_netdev(priv->dev);
1726 free_netdev(priv->dev); 1745 free_netdev(priv->dev);
@@ -1755,14 +1774,16 @@ static int __init ipoib_init_module(void)
1755 return ret; 1774 return ret;
1756 1775
1757 /* 1776 /*
1758 * We create our own workqueue mainly because we want to be 1777 * We create a global workqueue here that is used for all flush
1759 * able to flush it when devices are being removed. We can't 1778 * operations. However, if you attempt to flush a workqueue
1760 * use schedule_work()/flush_scheduled_work() because both 1779 * from a task on that same workqueue, it deadlocks the system.
1761 * unregister_netdev() and linkwatch_event take the rtnl lock, 1780 * We want to be able to flush the tasks associated with a
1762 * so flush_scheduled_work() can deadlock during device 1781 * specific net device, so we also create a workqueue for each
1763 * removal. 1782 * netdevice. We queue up the tasks for that device only on
1783 * its private workqueue, and we only queue up flush events
1784 * on our global flush workqueue. This avoids the deadlocks.
1764 */ 1785 */
1765 ipoib_workqueue = create_singlethread_workqueue("ipoib"); 1786 ipoib_workqueue = create_singlethread_workqueue("ipoib_flush");
1766 if (!ipoib_workqueue) { 1787 if (!ipoib_workqueue) {
1767 ret = -ENOMEM; 1788 ret = -ENOMEM;
1768 goto err_fs; 1789 goto err_fs;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index ffb83b5f7e80..0d23e0568deb 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -55,8 +55,6 @@ MODULE_PARM_DESC(mcast_debug_level,
55 "Enable multicast debug tracing if > 0"); 55 "Enable multicast debug tracing if > 0");
56#endif 56#endif
57 57
58static DEFINE_MUTEX(mcast_mutex);
59
60struct ipoib_mcast_iter { 58struct ipoib_mcast_iter {
61 struct net_device *dev; 59 struct net_device *dev;
62 union ib_gid mgid; 60 union ib_gid mgid;
@@ -66,6 +64,48 @@ struct ipoib_mcast_iter {
66 unsigned int send_only; 64 unsigned int send_only;
67}; 65};
68 66
67/*
68 * This should be called with the priv->lock held
69 */
70static void __ipoib_mcast_schedule_join_thread(struct ipoib_dev_priv *priv,
71 struct ipoib_mcast *mcast,
72 bool delay)
73{
74 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
75 return;
76
77 /*
78 * We will be scheduling *something*, so cancel whatever is
79 * currently scheduled first
80 */
81 cancel_delayed_work(&priv->mcast_task);
82 if (mcast && delay) {
83 /*
84 * We had a failure and want to schedule a retry later
85 */
86 mcast->backoff *= 2;
87 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
88 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
89 mcast->delay_until = jiffies + (mcast->backoff * HZ);
90 /*
91 * Mark this mcast for its delay, but restart the
92 * task immediately. The join task will make sure to
93 * clear out all entries without delays, and then
94 * schedule itself to run again when the earliest
95 * delay expires
96 */
97 queue_delayed_work(priv->wq, &priv->mcast_task, 0);
98 } else if (delay) {
99 /*
100 * Special case of retrying after a failure to
101 * allocate the broadcast multicast group, wait
102 * 1 second and try again
103 */
104 queue_delayed_work(priv->wq, &priv->mcast_task, HZ);
105 } else
106 queue_delayed_work(priv->wq, &priv->mcast_task, 0);
107}
108
69static void ipoib_mcast_free(struct ipoib_mcast *mcast) 109static void ipoib_mcast_free(struct ipoib_mcast *mcast)
70{ 110{
71 struct net_device *dev = mcast->dev; 111 struct net_device *dev = mcast->dev;
@@ -103,6 +143,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
103 143
104 mcast->dev = dev; 144 mcast->dev = dev;
105 mcast->created = jiffies; 145 mcast->created = jiffies;
146 mcast->delay_until = jiffies;
106 mcast->backoff = 1; 147 mcast->backoff = 1;
107 148
108 INIT_LIST_HEAD(&mcast->list); 149 INIT_LIST_HEAD(&mcast->list);
@@ -185,17 +226,27 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
185 spin_unlock_irq(&priv->lock); 226 spin_unlock_irq(&priv->lock);
186 return -EAGAIN; 227 return -EAGAIN;
187 } 228 }
188 priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); 229 /*update priv member according to the new mcast*/
230 priv->broadcast->mcmember.qkey = mcmember->qkey;
231 priv->broadcast->mcmember.mtu = mcmember->mtu;
232 priv->broadcast->mcmember.traffic_class = mcmember->traffic_class;
233 priv->broadcast->mcmember.rate = mcmember->rate;
234 priv->broadcast->mcmember.sl = mcmember->sl;
235 priv->broadcast->mcmember.flow_label = mcmember->flow_label;
236 priv->broadcast->mcmember.hop_limit = mcmember->hop_limit;
237 /* assume if the admin and the mcast are the same both can be changed */
238 if (priv->mcast_mtu == priv->admin_mtu)
239 priv->admin_mtu =
240 priv->mcast_mtu =
241 IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
242 else
243 priv->mcast_mtu =
244 IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
245
189 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); 246 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
190 spin_unlock_irq(&priv->lock); 247 spin_unlock_irq(&priv->lock);
191 priv->tx_wr.wr.ud.remote_qkey = priv->qkey; 248 priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
192 set_qkey = 1; 249 set_qkey = 1;
193
194 if (!ipoib_cm_admin_enabled(dev)) {
195 rtnl_lock();
196 dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
197 rtnl_unlock();
198 }
199 } 250 }
200 251
201 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 252 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -270,107 +321,35 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
270 return 0; 321 return 0;
271} 322}
272 323
273static int
274ipoib_mcast_sendonly_join_complete(int status,
275 struct ib_sa_multicast *multicast)
276{
277 struct ipoib_mcast *mcast = multicast->context;
278 struct net_device *dev = mcast->dev;
279
280 /* We trap for port events ourselves. */
281 if (status == -ENETRESET)
282 return 0;
283
284 if (!status)
285 status = ipoib_mcast_join_finish(mcast, &multicast->rec);
286
287 if (status) {
288 if (mcast->logcount++ < 20)
289 ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n",
290 mcast->mcmember.mgid.raw, status);
291
292 /* Flush out any queued packets */
293 netif_tx_lock_bh(dev);
294 while (!skb_queue_empty(&mcast->pkt_queue)) {
295 ++dev->stats.tx_dropped;
296 dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
297 }
298 netif_tx_unlock_bh(dev);
299
300 /* Clear the busy flag so we try again */
301 status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
302 &mcast->flags);
303 }
304 return status;
305}
306
307static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
308{
309 struct net_device *dev = mcast->dev;
310 struct ipoib_dev_priv *priv = netdev_priv(dev);
311 struct ib_sa_mcmember_rec rec = {
312#if 0 /* Some SMs don't support send-only yet */
313 .join_state = 4
314#else
315 .join_state = 1
316#endif
317 };
318 int ret = 0;
319
320 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
321 ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
322 return -ENODEV;
323 }
324
325 if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
326 ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
327 return -EBUSY;
328 }
329
330 rec.mgid = mcast->mcmember.mgid;
331 rec.port_gid = priv->local_gid;
332 rec.pkey = cpu_to_be16(priv->pkey);
333
334 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
335 priv->port, &rec,
336 IB_SA_MCMEMBER_REC_MGID |
337 IB_SA_MCMEMBER_REC_PORT_GID |
338 IB_SA_MCMEMBER_REC_PKEY |
339 IB_SA_MCMEMBER_REC_JOIN_STATE,
340 GFP_ATOMIC,
341 ipoib_mcast_sendonly_join_complete,
342 mcast);
343 if (IS_ERR(mcast->mc)) {
344 ret = PTR_ERR(mcast->mc);
345 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
346 ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
347 ret);
348 } else {
349 ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n",
350 mcast->mcmember.mgid.raw);
351 }
352
353 return ret;
354}
355
356void ipoib_mcast_carrier_on_task(struct work_struct *work) 324void ipoib_mcast_carrier_on_task(struct work_struct *work)
357{ 325{
358 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 326 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
359 carrier_on_task); 327 carrier_on_task);
360 struct ib_port_attr attr; 328 struct ib_port_attr attr;
361 329
362 /*
363 * Take rtnl_lock to avoid racing with ipoib_stop() and
364 * turning the carrier back on while a device is being
365 * removed.
366 */
367 if (ib_query_port(priv->ca, priv->port, &attr) || 330 if (ib_query_port(priv->ca, priv->port, &attr) ||
368 attr.state != IB_PORT_ACTIVE) { 331 attr.state != IB_PORT_ACTIVE) {
369 ipoib_dbg(priv, "Keeping carrier off until IB port is active\n"); 332 ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
370 return; 333 return;
371 } 334 }
372 335
373 rtnl_lock(); 336 /*
337 * Take rtnl_lock to avoid racing with ipoib_stop() and
338 * turning the carrier back on while a device is being
339 * removed. However, ipoib_stop() will attempt to flush
340 * the workqueue while holding the rtnl lock, so loop
341 * on trylock until either we get the lock or we see
342 * FLAG_OPER_UP go away as that signals that we are bailing
343 * and can safely ignore the carrier on work.
344 */
345 while (!rtnl_trylock()) {
346 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
347 return;
348 else
349 msleep(20);
350 }
351 if (!ipoib_cm_admin_enabled(priv->dev))
352 dev_set_mtu(priv->dev, min(priv->mcast_mtu, priv->admin_mtu));
374 netif_carrier_on(priv->dev); 353 netif_carrier_on(priv->dev);
375 rtnl_unlock(); 354 rtnl_unlock();
376} 355}
@@ -382,7 +361,9 @@ static int ipoib_mcast_join_complete(int status,
382 struct net_device *dev = mcast->dev; 361 struct net_device *dev = mcast->dev;
383 struct ipoib_dev_priv *priv = netdev_priv(dev); 362 struct ipoib_dev_priv *priv = netdev_priv(dev);
384 363
385 ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n", 364 ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n",
365 test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ?
366 "sendonly " : "",
386 mcast->mcmember.mgid.raw, status); 367 mcast->mcmember.mgid.raw, status);
387 368
388 /* We trap for port events ourselves. */ 369 /* We trap for port events ourselves. */
@@ -396,49 +377,74 @@ static int ipoib_mcast_join_complete(int status,
396 377
397 if (!status) { 378 if (!status) {
398 mcast->backoff = 1; 379 mcast->backoff = 1;
399 mutex_lock(&mcast_mutex); 380 mcast->delay_until = jiffies;
400 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
401 queue_delayed_work(ipoib_workqueue,
402 &priv->mcast_task, 0);
403 mutex_unlock(&mcast_mutex);
404 381
405 /* 382 /*
406 * Defer carrier on work to ipoib_workqueue to avoid a 383 * Defer carrier on work to priv->wq to avoid a
407 * deadlock on rtnl_lock here. 384 * deadlock on rtnl_lock here. Requeue our multicast
385 * work too, which will end up happening right after
386 * our carrier on task work and will allow us to
387 * send out all of the non-broadcast joins
408 */ 388 */
409 if (mcast == priv->broadcast) 389 if (mcast == priv->broadcast) {
410 queue_work(ipoib_workqueue, &priv->carrier_on_task); 390 spin_lock_irq(&priv->lock);
411 391 queue_work(priv->wq, &priv->carrier_on_task);
412 status = 0; 392 __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
413 goto out; 393 goto out_locked;
414 } 394 }
395 } else {
396 if (mcast->logcount++ < 20) {
397 if (status == -ETIMEDOUT || status == -EAGAIN) {
398 ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n",
399 test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
400 mcast->mcmember.mgid.raw, status);
401 } else {
402 ipoib_warn(priv, "%smulticast join failed for %pI6, status %d\n",
403 test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
404 mcast->mcmember.mgid.raw, status);
405 }
406 }
415 407
416 if (mcast->logcount++ < 20) { 408 if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
417 if (status == -ETIMEDOUT || status == -EAGAIN) { 409 mcast->backoff >= 2) {
418 ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n", 410 /*
419 mcast->mcmember.mgid.raw, status); 411 * We only retry sendonly joins once before we drop
412 * the packet and quit trying to deal with the
413 * group. However, we leave the group in the
414 * mcast list as an unjoined group. If we want to
415 * try joining again, we simply queue up a packet
416 * and restart the join thread. The empty queue
417 * is why the join thread ignores this group.
418 */
419 mcast->backoff = 1;
420 netif_tx_lock_bh(dev);
421 while (!skb_queue_empty(&mcast->pkt_queue)) {
422 ++dev->stats.tx_dropped;
423 dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
424 }
425 netif_tx_unlock_bh(dev);
420 } else { 426 } else {
421 ipoib_warn(priv, "multicast join failed for %pI6, status %d\n", 427 spin_lock_irq(&priv->lock);
422 mcast->mcmember.mgid.raw, status); 428 /* Requeue this join task with a backoff delay */
429 __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
430 goto out_locked;
423 } 431 }
424 } 432 }
425 433out:
426 mcast->backoff *= 2;
427 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
428 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
429
430 /* Clear the busy flag so we try again */
431 status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
432
433 mutex_lock(&mcast_mutex);
434 spin_lock_irq(&priv->lock); 434 spin_lock_irq(&priv->lock);
435 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 435out_locked:
436 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 436 /*
437 mcast->backoff * HZ); 437 * Make sure to set mcast->mc before we clear the busy flag to avoid
438 * racing with code that checks for BUSY before checking mcast->mc
439 */
440 if (status)
441 mcast->mc = NULL;
442 else
443 mcast->mc = multicast;
444 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
438 spin_unlock_irq(&priv->lock); 445 spin_unlock_irq(&priv->lock);
439 mutex_unlock(&mcast_mutex);
440out:
441 complete(&mcast->done); 446 complete(&mcast->done);
447
442 return status; 448 return status;
443} 449}
444 450
@@ -446,6 +452,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
446 int create) 452 int create)
447{ 453{
448 struct ipoib_dev_priv *priv = netdev_priv(dev); 454 struct ipoib_dev_priv *priv = netdev_priv(dev);
455 struct ib_sa_multicast *multicast;
449 struct ib_sa_mcmember_rec rec = { 456 struct ib_sa_mcmember_rec rec = {
450 .join_state = 1 457 .join_state = 1
451 }; 458 };
@@ -487,29 +494,18 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
487 rec.hop_limit = priv->broadcast->mcmember.hop_limit; 494 rec.hop_limit = priv->broadcast->mcmember.hop_limit;
488 } 495 }
489 496
490 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 497 multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
491 init_completion(&mcast->done);
492 set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags);
493
494 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
495 &rec, comp_mask, GFP_KERNEL, 498 &rec, comp_mask, GFP_KERNEL,
496 ipoib_mcast_join_complete, mcast); 499 ipoib_mcast_join_complete, mcast);
497 if (IS_ERR(mcast->mc)) { 500 if (IS_ERR(multicast)) {
501 ret = PTR_ERR(multicast);
502 ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
503 spin_lock_irq(&priv->lock);
504 /* Requeue this join task with a backoff delay */
505 __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
498 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 506 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
507 spin_unlock_irq(&priv->lock);
499 complete(&mcast->done); 508 complete(&mcast->done);
500 ret = PTR_ERR(mcast->mc);
501 ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
502
503 mcast->backoff *= 2;
504 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
505 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
506
507 mutex_lock(&mcast_mutex);
508 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
509 queue_delayed_work(ipoib_workqueue,
510 &priv->mcast_task,
511 mcast->backoff * HZ);
512 mutex_unlock(&mcast_mutex);
513 } 509 }
514} 510}
515 511
@@ -519,8 +515,11 @@ void ipoib_mcast_join_task(struct work_struct *work)
519 container_of(work, struct ipoib_dev_priv, mcast_task.work); 515 container_of(work, struct ipoib_dev_priv, mcast_task.work);
520 struct net_device *dev = priv->dev; 516 struct net_device *dev = priv->dev;
521 struct ib_port_attr port_attr; 517 struct ib_port_attr port_attr;
518 unsigned long delay_until = 0;
519 struct ipoib_mcast *mcast = NULL;
520 int create = 1;
522 521
523 if (!test_bit(IPOIB_MCAST_RUN, &priv->flags)) 522 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
524 return; 523 return;
525 524
526 if (ib_query_port(priv->ca, priv->port, &port_attr) || 525 if (ib_query_port(priv->ca, priv->port, &port_attr) ||
@@ -536,93 +535,118 @@ void ipoib_mcast_join_task(struct work_struct *work)
536 else 535 else
537 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); 536 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
538 537
538 spin_lock_irq(&priv->lock);
539 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
540 goto out;
541
539 if (!priv->broadcast) { 542 if (!priv->broadcast) {
540 struct ipoib_mcast *broadcast; 543 struct ipoib_mcast *broadcast;
541 544
542 if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 545 broadcast = ipoib_mcast_alloc(dev, 0);
543 return;
544
545 broadcast = ipoib_mcast_alloc(dev, 1);
546 if (!broadcast) { 546 if (!broadcast) {
547 ipoib_warn(priv, "failed to allocate broadcast group\n"); 547 ipoib_warn(priv, "failed to allocate broadcast group\n");
548 mutex_lock(&mcast_mutex); 548 /*
549 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 549 * Restart us after a 1 second delay to retry
550 queue_delayed_work(ipoib_workqueue, 550 * creating our broadcast group and attaching to
551 &priv->mcast_task, HZ); 551 * it. Until this succeeds, this ipoib dev is
552 mutex_unlock(&mcast_mutex); 552 * completely stalled (multicast wise).
553 return; 553 */
554 __ipoib_mcast_schedule_join_thread(priv, NULL, 1);
555 goto out;
554 } 556 }
555 557
556 spin_lock_irq(&priv->lock);
557 memcpy(broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4, 558 memcpy(broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
558 sizeof (union ib_gid)); 559 sizeof (union ib_gid));
559 priv->broadcast = broadcast; 560 priv->broadcast = broadcast;
560 561
561 __ipoib_mcast_add(dev, priv->broadcast); 562 __ipoib_mcast_add(dev, priv->broadcast);
562 spin_unlock_irq(&priv->lock);
563 } 563 }
564 564
565 if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { 565 if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
566 if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) 566 if (IS_ERR_OR_NULL(priv->broadcast->mc) &&
567 ipoib_mcast_join(dev, priv->broadcast, 0); 567 !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) {
568 return; 568 mcast = priv->broadcast;
569 } 569 create = 0;
570 570 if (mcast->backoff > 1 &&
571 while (1) { 571 time_before(jiffies, mcast->delay_until)) {
572 struct ipoib_mcast *mcast = NULL; 572 delay_until = mcast->delay_until;
573 573 mcast = NULL;
574 spin_lock_irq(&priv->lock);
575 list_for_each_entry(mcast, &priv->multicast_list, list) {
576 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
577 && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
578 && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
579 /* Found the next unjoined group */
580 break;
581 } 574 }
582 } 575 }
583 spin_unlock_irq(&priv->lock); 576 goto out;
577 }
584 578
585 if (&mcast->list == &priv->multicast_list) { 579 /*
586 /* All done */ 580 * We'll never get here until the broadcast group is both allocated
587 break; 581 * and attached
582 */
583 list_for_each_entry(mcast, &priv->multicast_list, list) {
584 if (IS_ERR_OR_NULL(mcast->mc) &&
585 !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) &&
586 (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ||
587 !skb_queue_empty(&mcast->pkt_queue))) {
588 if (mcast->backoff == 1 ||
589 time_after_eq(jiffies, mcast->delay_until)) {
590 /* Found the next unjoined group */
591 init_completion(&mcast->done);
592 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
593 if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
594 create = 0;
595 else
596 create = 1;
597 spin_unlock_irq(&priv->lock);
598 ipoib_mcast_join(dev, mcast, create);
599 spin_lock_irq(&priv->lock);
600 } else if (!delay_until ||
601 time_before(mcast->delay_until, delay_until))
602 delay_until = mcast->delay_until;
588 } 603 }
589
590 ipoib_mcast_join(dev, mcast, 1);
591 return;
592 } 604 }
593 605
594 ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); 606 mcast = NULL;
607 ipoib_dbg_mcast(priv, "successfully started all multicast joins\n");
595 608
596 clear_bit(IPOIB_MCAST_RUN, &priv->flags); 609out:
610 if (delay_until) {
611 cancel_delayed_work(&priv->mcast_task);
612 queue_delayed_work(priv->wq, &priv->mcast_task,
613 delay_until - jiffies);
614 }
615 if (mcast) {
616 init_completion(&mcast->done);
617 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
618 }
619 spin_unlock_irq(&priv->lock);
620 if (mcast)
621 ipoib_mcast_join(dev, mcast, create);
597} 622}
598 623
599int ipoib_mcast_start_thread(struct net_device *dev) 624int ipoib_mcast_start_thread(struct net_device *dev)
600{ 625{
601 struct ipoib_dev_priv *priv = netdev_priv(dev); 626 struct ipoib_dev_priv *priv = netdev_priv(dev);
627 unsigned long flags;
602 628
603 ipoib_dbg_mcast(priv, "starting multicast thread\n"); 629 ipoib_dbg_mcast(priv, "starting multicast thread\n");
604 630
605 mutex_lock(&mcast_mutex); 631 spin_lock_irqsave(&priv->lock, flags);
606 if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags)) 632 __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
607 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0); 633 spin_unlock_irqrestore(&priv->lock, flags);
608 mutex_unlock(&mcast_mutex);
609 634
610 return 0; 635 return 0;
611} 636}
612 637
613int ipoib_mcast_stop_thread(struct net_device *dev, int flush) 638int ipoib_mcast_stop_thread(struct net_device *dev)
614{ 639{
615 struct ipoib_dev_priv *priv = netdev_priv(dev); 640 struct ipoib_dev_priv *priv = netdev_priv(dev);
641 unsigned long flags;
616 642
617 ipoib_dbg_mcast(priv, "stopping multicast thread\n"); 643 ipoib_dbg_mcast(priv, "stopping multicast thread\n");
618 644
619 mutex_lock(&mcast_mutex); 645 spin_lock_irqsave(&priv->lock, flags);
620 clear_bit(IPOIB_MCAST_RUN, &priv->flags);
621 cancel_delayed_work(&priv->mcast_task); 646 cancel_delayed_work(&priv->mcast_task);
622 mutex_unlock(&mcast_mutex); 647 spin_unlock_irqrestore(&priv->lock, flags);
623 648
624 if (flush) 649 flush_workqueue(priv->wq);
625 flush_workqueue(ipoib_workqueue);
626 650
627 return 0; 651 return 0;
628} 652}
@@ -633,6 +657,9 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
633 int ret = 0; 657 int ret = 0;
634 658
635 if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) 659 if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
660 ipoib_warn(priv, "ipoib_mcast_leave on an in-flight join\n");
661
662 if (!IS_ERR_OR_NULL(mcast->mc))
636 ib_sa_free_multicast(mcast->mc); 663 ib_sa_free_multicast(mcast->mc);
637 664
638 if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 665 if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
@@ -644,7 +671,9 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
644 be16_to_cpu(mcast->mcmember.mlid)); 671 be16_to_cpu(mcast->mcmember.mlid));
645 if (ret) 672 if (ret)
646 ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret); 673 ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
647 } 674 } else if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
675 ipoib_dbg(priv, "leaving with no mcmember but not a "
676 "SENDONLY join\n");
648 677
649 return 0; 678 return 0;
650} 679}
@@ -667,49 +696,37 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
667 } 696 }
668 697
669 mcast = __ipoib_mcast_find(dev, mgid); 698 mcast = __ipoib_mcast_find(dev, mgid);
670 if (!mcast) { 699 if (!mcast || !mcast->ah) {
671 /* Let's create a new send only group now */
672 ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
673 mgid);
674
675 mcast = ipoib_mcast_alloc(dev, 0);
676 if (!mcast) { 700 if (!mcast) {
677 ipoib_warn(priv, "unable to allocate memory for " 701 /* Let's create a new send only group now */
678 "multicast structure\n"); 702 ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
679 ++dev->stats.tx_dropped; 703 mgid);
680 dev_kfree_skb_any(skb); 704
681 goto out; 705 mcast = ipoib_mcast_alloc(dev, 0);
682 } 706 if (!mcast) {
683 707 ipoib_warn(priv, "unable to allocate memory "
684 set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); 708 "for multicast structure\n");
685 memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid)); 709 ++dev->stats.tx_dropped;
686 __ipoib_mcast_add(dev, mcast); 710 dev_kfree_skb_any(skb);
687 list_add_tail(&mcast->list, &priv->multicast_list); 711 goto unlock;
688 } 712 }
689 713
690 if (!mcast->ah) { 714 set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
715 memcpy(mcast->mcmember.mgid.raw, mgid,
716 sizeof (union ib_gid));
717 __ipoib_mcast_add(dev, mcast);
718 list_add_tail(&mcast->list, &priv->multicast_list);
719 }
691 if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) 720 if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE)
692 skb_queue_tail(&mcast->pkt_queue, skb); 721 skb_queue_tail(&mcast->pkt_queue, skb);
693 else { 722 else {
694 ++dev->stats.tx_dropped; 723 ++dev->stats.tx_dropped;
695 dev_kfree_skb_any(skb); 724 dev_kfree_skb_any(skb);
696 } 725 }
697 726 if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
698 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) 727 __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
699 ipoib_dbg_mcast(priv, "no address vector, " 728 }
700 "but multicast join already started\n"); 729 } else {
701 else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
702 ipoib_mcast_sendonly_join(mcast);
703
704 /*
705 * If lookup completes between here and out:, don't
706 * want to send packet twice.
707 */
708 mcast = NULL;
709 }
710
711out:
712 if (mcast && mcast->ah) {
713 struct ipoib_neigh *neigh; 730 struct ipoib_neigh *neigh;
714 731
715 spin_unlock_irqrestore(&priv->lock, flags); 732 spin_unlock_irqrestore(&priv->lock, flags);
@@ -759,9 +776,12 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
759 776
760 spin_unlock_irqrestore(&priv->lock, flags); 777 spin_unlock_irqrestore(&priv->lock, flags);
761 778
762 /* seperate between the wait to the leave*/ 779 /*
780 * make sure the in-flight joins have finished before we attempt
781 * to leave
782 */
763 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) 783 list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
764 if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags)) 784 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
765 wait_for_completion(&mcast->done); 785 wait_for_completion(&mcast->done);
766 786
767 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 787 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
@@ -792,9 +812,14 @@ void ipoib_mcast_restart_task(struct work_struct *work)
792 unsigned long flags; 812 unsigned long flags;
793 struct ib_sa_mcmember_rec rec; 813 struct ib_sa_mcmember_rec rec;
794 814
795 ipoib_dbg_mcast(priv, "restarting multicast task\n"); 815 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
816 /*
817 * shortcut...on shutdown flush is called next, just
818 * let it do all the work
819 */
820 return;
796 821
797 ipoib_mcast_stop_thread(dev, 0); 822 ipoib_dbg_mcast(priv, "restarting multicast task\n");
798 823
799 local_irq_save(flags); 824 local_irq_save(flags);
800 netif_addr_lock(dev); 825 netif_addr_lock(dev);
@@ -880,14 +905,27 @@ void ipoib_mcast_restart_task(struct work_struct *work)
880 netif_addr_unlock(dev); 905 netif_addr_unlock(dev);
881 local_irq_restore(flags); 906 local_irq_restore(flags);
882 907
883 /* We have to cancel outside of the spinlock */ 908 /*
909 * make sure the in-flight joins have finished before we attempt
910 * to leave
911 */
912 list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
913 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
914 wait_for_completion(&mcast->done);
915
884 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 916 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
885 ipoib_mcast_leave(mcast->dev, mcast); 917 ipoib_mcast_leave(mcast->dev, mcast);
886 ipoib_mcast_free(mcast); 918 ipoib_mcast_free(mcast);
887 } 919 }
888 920
889 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 921 /*
890 ipoib_mcast_start_thread(dev); 922 * Double check that we are still up
923 */
924 if (test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
925 spin_lock_irqsave(&priv->lock, flags);
926 __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
927 spin_unlock_irqrestore(&priv->lock, flags);
928 }
891} 929}
892 930
893#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 931#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index c56d5d44c53b..e5cc43074196 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -157,6 +157,16 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
157 goto out_free_pd; 157 goto out_free_pd;
158 } 158 }
159 159
160 /*
161 * the various IPoIB tasks assume they will never race against
162 * themselves, so always use a single thread workqueue
163 */
164 priv->wq = create_singlethread_workqueue("ipoib_wq");
165 if (!priv->wq) {
166 printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
167 goto out_free_mr;
168 }
169
160 size = ipoib_recvq_size + 1; 170 size = ipoib_recvq_size + 1;
161 ret = ipoib_cm_dev_init(dev); 171 ret = ipoib_cm_dev_init(dev);
162 if (!ret) { 172 if (!ret) {
@@ -165,12 +175,13 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
165 size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */ 175 size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */
166 else 176 else
167 size += ipoib_recvq_size * ipoib_max_conn_qp; 177 size += ipoib_recvq_size * ipoib_max_conn_qp;
168 } 178 } else
179 goto out_free_wq;
169 180
170 priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); 181 priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
171 if (IS_ERR(priv->recv_cq)) { 182 if (IS_ERR(priv->recv_cq)) {
172 printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name); 183 printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name);
173 goto out_free_mr; 184 goto out_cm_dev_cleanup;
174 } 185 }
175 186
176 priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL, 187 priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL,
@@ -216,15 +227,10 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
216 priv->tx_wr.send_flags = IB_SEND_SIGNALED; 227 priv->tx_wr.send_flags = IB_SEND_SIGNALED;
217 228
218 priv->rx_sge[0].lkey = priv->mr->lkey; 229 priv->rx_sge[0].lkey = priv->mr->lkey;
219 if (ipoib_ud_need_sg(priv->max_ib_mtu)) { 230
220 priv->rx_sge[0].length = IPOIB_UD_HEAD_SIZE; 231 priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
221 priv->rx_sge[1].length = PAGE_SIZE; 232 priv->rx_wr.num_sge = 1;
222 priv->rx_sge[1].lkey = priv->mr->lkey; 233
223 priv->rx_wr.num_sge = IPOIB_UD_RX_SG;
224 } else {
225 priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
226 priv->rx_wr.num_sge = 1;
227 }
228 priv->rx_wr.next = NULL; 234 priv->rx_wr.next = NULL;
229 priv->rx_wr.sg_list = priv->rx_sge; 235 priv->rx_wr.sg_list = priv->rx_sge;
230 236
@@ -236,12 +242,19 @@ out_free_send_cq:
236out_free_recv_cq: 242out_free_recv_cq:
237 ib_destroy_cq(priv->recv_cq); 243 ib_destroy_cq(priv->recv_cq);
238 244
245out_cm_dev_cleanup:
246 ipoib_cm_dev_cleanup(dev);
247
248out_free_wq:
249 destroy_workqueue(priv->wq);
250 priv->wq = NULL;
251
239out_free_mr: 252out_free_mr:
240 ib_dereg_mr(priv->mr); 253 ib_dereg_mr(priv->mr);
241 ipoib_cm_dev_cleanup(dev);
242 254
243out_free_pd: 255out_free_pd:
244 ib_dealloc_pd(priv->pd); 256 ib_dealloc_pd(priv->pd);
257
245 return -ENODEV; 258 return -ENODEV;
246} 259}
247 260
@@ -265,11 +278,18 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
265 278
266 ipoib_cm_dev_cleanup(dev); 279 ipoib_cm_dev_cleanup(dev);
267 280
281 if (priv->wq) {
282 flush_workqueue(priv->wq);
283 destroy_workqueue(priv->wq);
284 priv->wq = NULL;
285 }
286
268 if (ib_dereg_mr(priv->mr)) 287 if (ib_dereg_mr(priv->mr))
269 ipoib_warn(priv, "ib_dereg_mr failed\n"); 288 ipoib_warn(priv, "ib_dereg_mr failed\n");
270 289
271 if (ib_dealloc_pd(priv->pd)) 290 if (ib_dealloc_pd(priv->pd))
272 ipoib_warn(priv, "ib_dealloc_pd failed\n"); 291 ipoib_warn(priv, "ib_dealloc_pd failed\n");
292
273} 293}
274 294
275void ipoib_event(struct ib_event_handler *handler, 295void ipoib_event(struct ib_event_handler *handler,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index b47aea1094b2..262ba1f8ee50 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -69,7 +69,7 @@
69 69
70#define DRV_NAME "iser" 70#define DRV_NAME "iser"
71#define PFX DRV_NAME ": " 71#define PFX DRV_NAME ": "
72#define DRV_VER "1.5" 72#define DRV_VER "1.6"
73 73
74#define iser_dbg(fmt, arg...) \ 74#define iser_dbg(fmt, arg...) \
75 do { \ 75 do { \
@@ -218,22 +218,21 @@ enum iser_data_dir {
218/** 218/**
219 * struct iser_data_buf - iSER data buffer 219 * struct iser_data_buf - iSER data buffer
220 * 220 *
221 * @buf: pointer to the sg list 221 * @sg: pointer to the sg list
222 * @size: num entries of this sg 222 * @size: num entries of this sg
223 * @data_len: total beffer byte len 223 * @data_len: total beffer byte len
224 * @dma_nents: returned by dma_map_sg 224 * @dma_nents: returned by dma_map_sg
225 * @copy_buf: allocated copy buf for SGs unaligned 225 * @orig_sg: pointer to the original sg list (in case
226 * for rdma which are copied 226 * we used a copy)
227 * @sg_single: SG-ified clone of a non SG SC or 227 * @orig_size: num entris of orig sg list
228 * unaligned SG
229 */ 228 */
230struct iser_data_buf { 229struct iser_data_buf {
231 void *buf; 230 struct scatterlist *sg;
232 unsigned int size; 231 unsigned int size;
233 unsigned long data_len; 232 unsigned long data_len;
234 unsigned int dma_nents; 233 unsigned int dma_nents;
235 char *copy_buf; 234 struct scatterlist *orig_sg;
236 struct scatterlist sg_single; 235 unsigned int orig_size;
237 }; 236 };
238 237
239/* fwd declarations */ 238/* fwd declarations */
@@ -244,35 +243,14 @@ struct iscsi_endpoint;
244/** 243/**
245 * struct iser_mem_reg - iSER memory registration info 244 * struct iser_mem_reg - iSER memory registration info
246 * 245 *
247 * @lkey: MR local key 246 * @sge: memory region sg element
248 * @rkey: MR remote key 247 * @rkey: memory region remote key
249 * @va: MR start address (buffer va)
250 * @len: MR length
251 * @mem_h: pointer to registration context (FMR/Fastreg) 248 * @mem_h: pointer to registration context (FMR/Fastreg)
252 */ 249 */
253struct iser_mem_reg { 250struct iser_mem_reg {
254 u32 lkey; 251 struct ib_sge sge;
255 u32 rkey; 252 u32 rkey;
256 u64 va; 253 void *mem_h;
257 u64 len;
258 void *mem_h;
259};
260
261/**
262 * struct iser_regd_buf - iSER buffer registration desc
263 *
264 * @reg: memory registration info
265 * @virt_addr: virtual address of buffer
266 * @device: reference to iser device
267 * @direction: dma direction (for dma_unmap)
268 * @data_size: data buffer size in bytes
269 */
270struct iser_regd_buf {
271 struct iser_mem_reg reg;
272 void *virt_addr;
273 struct iser_device *device;
274 enum dma_data_direction direction;
275 unsigned int data_size;
276}; 254};
277 255
278enum iser_desc_type { 256enum iser_desc_type {
@@ -534,11 +512,9 @@ struct iser_conn {
534 * @sc: link to scsi command 512 * @sc: link to scsi command
535 * @command_sent: indicate if command was sent 513 * @command_sent: indicate if command was sent
536 * @dir: iser data direction 514 * @dir: iser data direction
537 * @rdma_regd: task rdma registration desc 515 * @rdma_reg: task rdma registration desc
538 * @data: iser data buffer desc 516 * @data: iser data buffer desc
539 * @data_copy: iser data copy buffer desc (bounce buffer)
540 * @prot: iser protection buffer desc 517 * @prot: iser protection buffer desc
541 * @prot_copy: iser protection copy buffer desc (bounce buffer)
542 */ 518 */
543struct iscsi_iser_task { 519struct iscsi_iser_task {
544 struct iser_tx_desc desc; 520 struct iser_tx_desc desc;
@@ -547,11 +523,9 @@ struct iscsi_iser_task {
547 struct scsi_cmnd *sc; 523 struct scsi_cmnd *sc;
548 int command_sent; 524 int command_sent;
549 int dir[ISER_DIRS_NUM]; 525 int dir[ISER_DIRS_NUM];
550 struct iser_regd_buf rdma_regd[ISER_DIRS_NUM]; 526 struct iser_mem_reg rdma_reg[ISER_DIRS_NUM];
551 struct iser_data_buf data[ISER_DIRS_NUM]; 527 struct iser_data_buf data[ISER_DIRS_NUM];
552 struct iser_data_buf data_copy[ISER_DIRS_NUM];
553 struct iser_data_buf prot[ISER_DIRS_NUM]; 528 struct iser_data_buf prot[ISER_DIRS_NUM];
554 struct iser_data_buf prot_copy[ISER_DIRS_NUM];
555}; 529};
556 530
557struct iser_page_vec { 531struct iser_page_vec {
@@ -621,7 +595,6 @@ void iser_free_rx_descriptors(struct iser_conn *iser_conn);
621 595
622void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, 596void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
623 struct iser_data_buf *mem, 597 struct iser_data_buf *mem,
624 struct iser_data_buf *mem_copy,
625 enum iser_data_dir cmd_dir); 598 enum iser_data_dir cmd_dir);
626 599
627int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task, 600int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,
@@ -634,10 +607,6 @@ int iser_connect(struct iser_conn *iser_conn,
634 struct sockaddr *dst_addr, 607 struct sockaddr *dst_addr,
635 int non_blocking); 608 int non_blocking);
636 609
637int iser_reg_page_vec(struct ib_conn *ib_conn,
638 struct iser_page_vec *page_vec,
639 struct iser_mem_reg *mem_reg);
640
641void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, 610void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
642 enum iser_data_dir cmd_dir); 611 enum iser_data_dir cmd_dir);
643void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, 612void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
@@ -667,4 +636,9 @@ int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max);
667void iser_free_fastreg_pool(struct ib_conn *ib_conn); 636void iser_free_fastreg_pool(struct ib_conn *ib_conn);
668u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, 637u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
669 enum iser_data_dir cmd_dir, sector_t *sector); 638 enum iser_data_dir cmd_dir, sector_t *sector);
639struct fast_reg_descriptor *
640iser_reg_desc_get(struct ib_conn *ib_conn);
641void
642iser_reg_desc_put(struct ib_conn *ib_conn,
643 struct fast_reg_descriptor *desc);
670#endif 644#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 20e859a6f1a6..3e2118e8ed87 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -50,7 +50,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
50{ 50{
51 struct iscsi_iser_task *iser_task = task->dd_data; 51 struct iscsi_iser_task *iser_task = task->dd_data;
52 struct iser_device *device = iser_task->iser_conn->ib_conn.device; 52 struct iser_device *device = iser_task->iser_conn->ib_conn.device;
53 struct iser_regd_buf *regd_buf; 53 struct iser_mem_reg *mem_reg;
54 int err; 54 int err;
55 struct iser_hdr *hdr = &iser_task->desc.iser_header; 55 struct iser_hdr *hdr = &iser_task->desc.iser_header;
56 struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN]; 56 struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN];
@@ -78,15 +78,15 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
78 iser_err("Failed to set up Data-IN RDMA\n"); 78 iser_err("Failed to set up Data-IN RDMA\n");
79 return err; 79 return err;
80 } 80 }
81 regd_buf = &iser_task->rdma_regd[ISER_DIR_IN]; 81 mem_reg = &iser_task->rdma_reg[ISER_DIR_IN];
82 82
83 hdr->flags |= ISER_RSV; 83 hdr->flags |= ISER_RSV;
84 hdr->read_stag = cpu_to_be32(regd_buf->reg.rkey); 84 hdr->read_stag = cpu_to_be32(mem_reg->rkey);
85 hdr->read_va = cpu_to_be64(regd_buf->reg.va); 85 hdr->read_va = cpu_to_be64(mem_reg->sge.addr);
86 86
87 iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n", 87 iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n",
88 task->itt, regd_buf->reg.rkey, 88 task->itt, mem_reg->rkey,
89 (unsigned long long)regd_buf->reg.va); 89 (unsigned long long)mem_reg->sge.addr);
90 90
91 return 0; 91 return 0;
92} 92}
@@ -104,7 +104,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
104{ 104{
105 struct iscsi_iser_task *iser_task = task->dd_data; 105 struct iscsi_iser_task *iser_task = task->dd_data;
106 struct iser_device *device = iser_task->iser_conn->ib_conn.device; 106 struct iser_device *device = iser_task->iser_conn->ib_conn.device;
107 struct iser_regd_buf *regd_buf; 107 struct iser_mem_reg *mem_reg;
108 int err; 108 int err;
109 struct iser_hdr *hdr = &iser_task->desc.iser_header; 109 struct iser_hdr *hdr = &iser_task->desc.iser_header;
110 struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT]; 110 struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
@@ -134,25 +134,25 @@ iser_prepare_write_cmd(struct iscsi_task *task,
134 return err; 134 return err;
135 } 135 }
136 136
137 regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT]; 137 mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT];
138 138
139 if (unsol_sz < edtl) { 139 if (unsol_sz < edtl) {
140 hdr->flags |= ISER_WSV; 140 hdr->flags |= ISER_WSV;
141 hdr->write_stag = cpu_to_be32(regd_buf->reg.rkey); 141 hdr->write_stag = cpu_to_be32(mem_reg->rkey);
142 hdr->write_va = cpu_to_be64(regd_buf->reg.va + unsol_sz); 142 hdr->write_va = cpu_to_be64(mem_reg->sge.addr + unsol_sz);
143 143
144 iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X " 144 iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
145 "VA:%#llX + unsol:%d\n", 145 "VA:%#llX + unsol:%d\n",
146 task->itt, regd_buf->reg.rkey, 146 task->itt, mem_reg->rkey,
147 (unsigned long long)regd_buf->reg.va, unsol_sz); 147 (unsigned long long)mem_reg->sge.addr, unsol_sz);
148 } 148 }
149 149
150 if (imm_sz > 0) { 150 if (imm_sz > 0) {
151 iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n", 151 iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
152 task->itt, imm_sz); 152 task->itt, imm_sz);
153 tx_dsg->addr = regd_buf->reg.va; 153 tx_dsg->addr = mem_reg->sge.addr;
154 tx_dsg->length = imm_sz; 154 tx_dsg->length = imm_sz;
155 tx_dsg->lkey = regd_buf->reg.lkey; 155 tx_dsg->lkey = mem_reg->sge.lkey;
156 iser_task->desc.num_sge = 2; 156 iser_task->desc.num_sge = 2;
157 } 157 }
158 158
@@ -401,16 +401,16 @@ int iser_send_command(struct iscsi_conn *conn,
401 } 401 }
402 402
403 if (scsi_sg_count(sc)) { /* using a scatter list */ 403 if (scsi_sg_count(sc)) { /* using a scatter list */
404 data_buf->buf = scsi_sglist(sc); 404 data_buf->sg = scsi_sglist(sc);
405 data_buf->size = scsi_sg_count(sc); 405 data_buf->size = scsi_sg_count(sc);
406 } 406 }
407 data_buf->data_len = scsi_bufflen(sc); 407 data_buf->data_len = scsi_bufflen(sc);
408 408
409 if (scsi_prot_sg_count(sc)) { 409 if (scsi_prot_sg_count(sc)) {
410 prot_buf->buf = scsi_prot_sglist(sc); 410 prot_buf->sg = scsi_prot_sglist(sc);
411 prot_buf->size = scsi_prot_sg_count(sc); 411 prot_buf->size = scsi_prot_sg_count(sc);
412 prot_buf->data_len = data_buf->data_len >> 412 prot_buf->data_len = (data_buf->data_len >>
413 ilog2(sc->device->sector_size) * 8; 413 ilog2(sc->device->sector_size)) * 8;
414 } 414 }
415 415
416 if (hdr->flags & ISCSI_FLAG_CMD_READ) { 416 if (hdr->flags & ISCSI_FLAG_CMD_READ) {
@@ -450,7 +450,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
450 struct iser_conn *iser_conn = conn->dd_data; 450 struct iser_conn *iser_conn = conn->dd_data;
451 struct iscsi_iser_task *iser_task = task->dd_data; 451 struct iscsi_iser_task *iser_task = task->dd_data;
452 struct iser_tx_desc *tx_desc = NULL; 452 struct iser_tx_desc *tx_desc = NULL;
453 struct iser_regd_buf *regd_buf; 453 struct iser_mem_reg *mem_reg;
454 unsigned long buf_offset; 454 unsigned long buf_offset;
455 unsigned long data_seg_len; 455 unsigned long data_seg_len;
456 uint32_t itt; 456 uint32_t itt;
@@ -477,11 +477,11 @@ int iser_send_data_out(struct iscsi_conn *conn,
477 /* build the tx desc */ 477 /* build the tx desc */
478 iser_initialize_task_headers(task, tx_desc); 478 iser_initialize_task_headers(task, tx_desc);
479 479
480 regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT]; 480 mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT];
481 tx_dsg = &tx_desc->tx_sg[1]; 481 tx_dsg = &tx_desc->tx_sg[1];
482 tx_dsg->addr = regd_buf->reg.va + buf_offset; 482 tx_dsg->addr = mem_reg->sge.addr + buf_offset;
483 tx_dsg->length = data_seg_len; 483 tx_dsg->length = data_seg_len;
484 tx_dsg->lkey = regd_buf->reg.lkey; 484 tx_dsg->lkey = mem_reg->sge.lkey;
485 tx_desc->num_sge = 2; 485 tx_desc->num_sge = 2;
486 486
487 if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) { 487 if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) {
@@ -658,10 +658,10 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
658 iser_task->prot[ISER_DIR_IN].data_len = 0; 658 iser_task->prot[ISER_DIR_IN].data_len = 0;
659 iser_task->prot[ISER_DIR_OUT].data_len = 0; 659 iser_task->prot[ISER_DIR_OUT].data_len = 0;
660 660
661 memset(&iser_task->rdma_regd[ISER_DIR_IN], 0, 661 memset(&iser_task->rdma_reg[ISER_DIR_IN], 0,
662 sizeof(struct iser_regd_buf)); 662 sizeof(struct iser_mem_reg));
663 memset(&iser_task->rdma_regd[ISER_DIR_OUT], 0, 663 memset(&iser_task->rdma_reg[ISER_DIR_OUT], 0,
664 sizeof(struct iser_regd_buf)); 664 sizeof(struct iser_mem_reg));
665} 665}
666 666
667void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) 667void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
@@ -674,35 +674,31 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
674 /* if we were reading, copy back to unaligned sglist, 674 /* if we were reading, copy back to unaligned sglist,
675 * anyway dma_unmap and free the copy 675 * anyway dma_unmap and free the copy
676 */ 676 */
677 if (iser_task->data_copy[ISER_DIR_IN].copy_buf != NULL) { 677 if (iser_task->data[ISER_DIR_IN].orig_sg) {
678 is_rdma_data_aligned = 0; 678 is_rdma_data_aligned = 0;
679 iser_finalize_rdma_unaligned_sg(iser_task, 679 iser_finalize_rdma_unaligned_sg(iser_task,
680 &iser_task->data[ISER_DIR_IN], 680 &iser_task->data[ISER_DIR_IN],
681 &iser_task->data_copy[ISER_DIR_IN],
682 ISER_DIR_IN); 681 ISER_DIR_IN);
683 } 682 }
684 683
685 if (iser_task->data_copy[ISER_DIR_OUT].copy_buf != NULL) { 684 if (iser_task->data[ISER_DIR_OUT].orig_sg) {
686 is_rdma_data_aligned = 0; 685 is_rdma_data_aligned = 0;
687 iser_finalize_rdma_unaligned_sg(iser_task, 686 iser_finalize_rdma_unaligned_sg(iser_task,
688 &iser_task->data[ISER_DIR_OUT], 687 &iser_task->data[ISER_DIR_OUT],
689 &iser_task->data_copy[ISER_DIR_OUT],
690 ISER_DIR_OUT); 688 ISER_DIR_OUT);
691 } 689 }
692 690
693 if (iser_task->prot_copy[ISER_DIR_IN].copy_buf != NULL) { 691 if (iser_task->prot[ISER_DIR_IN].orig_sg) {
694 is_rdma_prot_aligned = 0; 692 is_rdma_prot_aligned = 0;
695 iser_finalize_rdma_unaligned_sg(iser_task, 693 iser_finalize_rdma_unaligned_sg(iser_task,
696 &iser_task->prot[ISER_DIR_IN], 694 &iser_task->prot[ISER_DIR_IN],
697 &iser_task->prot_copy[ISER_DIR_IN],
698 ISER_DIR_IN); 695 ISER_DIR_IN);
699 } 696 }
700 697
701 if (iser_task->prot_copy[ISER_DIR_OUT].copy_buf != NULL) { 698 if (iser_task->prot[ISER_DIR_OUT].orig_sg) {
702 is_rdma_prot_aligned = 0; 699 is_rdma_prot_aligned = 0;
703 iser_finalize_rdma_unaligned_sg(iser_task, 700 iser_finalize_rdma_unaligned_sg(iser_task,
704 &iser_task->prot[ISER_DIR_OUT], 701 &iser_task->prot[ISER_DIR_OUT],
705 &iser_task->prot_copy[ISER_DIR_OUT],
706 ISER_DIR_OUT); 702 ISER_DIR_OUT);
707 } 703 }
708 704
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 341040bf0984..f0cdc961eb11 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -39,68 +39,173 @@
39 39
40#include "iscsi_iser.h" 40#include "iscsi_iser.h"
41 41
42#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */ 42static void
43iser_free_bounce_sg(struct iser_data_buf *data)
44{
45 struct scatterlist *sg;
46 int count;
43 47
44/** 48 for_each_sg(data->sg, sg, data->size, count)
45 * iser_start_rdma_unaligned_sg 49 __free_page(sg_page(sg));
46 */ 50
47static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, 51 kfree(data->sg);
48 struct iser_data_buf *data, 52
49 struct iser_data_buf *data_copy, 53 data->sg = data->orig_sg;
50 enum iser_data_dir cmd_dir) 54 data->size = data->orig_size;
55 data->orig_sg = NULL;
56 data->orig_size = 0;
57}
58
59static int
60iser_alloc_bounce_sg(struct iser_data_buf *data)
51{ 61{
52 struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
53 struct scatterlist *sgl = (struct scatterlist *)data->buf;
54 struct scatterlist *sg; 62 struct scatterlist *sg;
55 char *mem = NULL; 63 struct page *page;
56 unsigned long cmd_data_len = 0; 64 unsigned long length = data->data_len;
57 int dma_nents, i; 65 int i = 0, nents = DIV_ROUND_UP(length, PAGE_SIZE);
58 66
59 for_each_sg(sgl, sg, data->size, i) 67 sg = kcalloc(nents, sizeof(*sg), GFP_ATOMIC);
60 cmd_data_len += ib_sg_dma_len(dev, sg); 68 if (!sg)
69 goto err;
61 70
62 if (cmd_data_len > ISER_KMALLOC_THRESHOLD) 71 sg_init_table(sg, nents);
63 mem = (void *)__get_free_pages(GFP_ATOMIC, 72 while (length) {
64 ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT); 73 u32 page_len = min_t(u32, length, PAGE_SIZE);
65 else
66 mem = kmalloc(cmd_data_len, GFP_ATOMIC);
67 74
68 if (mem == NULL) { 75 page = alloc_page(GFP_ATOMIC);
69 iser_err("Failed to allocate mem size %d %d for copying sglist\n", 76 if (!page)
70 data->size, (int)cmd_data_len); 77 goto err;
71 return -ENOMEM; 78
79 sg_set_page(&sg[i], page, page_len, 0);
80 length -= page_len;
81 i++;
72 } 82 }
73 83
74 if (cmd_dir == ISER_DIR_OUT) { 84 data->orig_sg = data->sg;
75 /* copy the unaligned sg the buffer which is used for RDMA */ 85 data->orig_size = data->size;
76 char *p, *from; 86 data->sg = sg;
77 87 data->size = nents;
78 sgl = (struct scatterlist *)data->buf; 88
79 p = mem; 89 return 0;
80 for_each_sg(sgl, sg, data->size, i) { 90
81 from = kmap_atomic(sg_page(sg)); 91err:
82 memcpy(p, 92 for (; i > 0; i--)
83 from + sg->offset, 93 __free_page(sg_page(&sg[i - 1]));
84 sg->length); 94 kfree(sg);
85 kunmap_atomic(from); 95
86 p += sg->length; 96 return -ENOMEM;
97}
98
99static void
100iser_copy_bounce(struct iser_data_buf *data, bool to_buffer)
101{
102 struct scatterlist *osg, *bsg = data->sg;
103 void *oaddr, *baddr;
104 unsigned int left = data->data_len;
105 unsigned int bsg_off = 0;
106 int i;
107
108 for_each_sg(data->orig_sg, osg, data->orig_size, i) {
109 unsigned int copy_len, osg_off = 0;
110
111 oaddr = kmap_atomic(sg_page(osg)) + osg->offset;
112 copy_len = min(left, osg->length);
113 while (copy_len) {
114 unsigned int len = min(copy_len, bsg->length - bsg_off);
115
116 baddr = kmap_atomic(sg_page(bsg)) + bsg->offset;
117 if (to_buffer)
118 memcpy(baddr + bsg_off, oaddr + osg_off, len);
119 else
120 memcpy(oaddr + osg_off, baddr + bsg_off, len);
121
122 kunmap_atomic(baddr - bsg->offset);
123 osg_off += len;
124 bsg_off += len;
125 copy_len -= len;
126
127 if (bsg_off >= bsg->length) {
128 bsg = sg_next(bsg);
129 bsg_off = 0;
130 }
87 } 131 }
132 kunmap_atomic(oaddr - osg->offset);
133 left -= osg_off;
88 } 134 }
135}
136
137static inline void
138iser_copy_from_bounce(struct iser_data_buf *data)
139{
140 iser_copy_bounce(data, false);
141}
142
143static inline void
144iser_copy_to_bounce(struct iser_data_buf *data)
145{
146 iser_copy_bounce(data, true);
147}
148
149struct fast_reg_descriptor *
150iser_reg_desc_get(struct ib_conn *ib_conn)
151{
152 struct fast_reg_descriptor *desc;
153 unsigned long flags;
154
155 spin_lock_irqsave(&ib_conn->lock, flags);
156 desc = list_first_entry(&ib_conn->fastreg.pool,
157 struct fast_reg_descriptor, list);
158 list_del(&desc->list);
159 spin_unlock_irqrestore(&ib_conn->lock, flags);
160
161 return desc;
162}
163
164void
165iser_reg_desc_put(struct ib_conn *ib_conn,
166 struct fast_reg_descriptor *desc)
167{
168 unsigned long flags;
89 169
90 sg_init_one(&data_copy->sg_single, mem, cmd_data_len); 170 spin_lock_irqsave(&ib_conn->lock, flags);
91 data_copy->buf = &data_copy->sg_single; 171 list_add(&desc->list, &ib_conn->fastreg.pool);
92 data_copy->size = 1; 172 spin_unlock_irqrestore(&ib_conn->lock, flags);
93 data_copy->copy_buf = mem; 173}
94 174
95 dma_nents = ib_dma_map_sg(dev, &data_copy->sg_single, 1, 175/**
96 (cmd_dir == ISER_DIR_OUT) ? 176 * iser_start_rdma_unaligned_sg
97 DMA_TO_DEVICE : DMA_FROM_DEVICE); 177 */
98 BUG_ON(dma_nents == 0); 178static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
179 struct iser_data_buf *data,
180 enum iser_data_dir cmd_dir)
181{
182 struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
183 int rc;
184
185 rc = iser_alloc_bounce_sg(data);
186 if (rc) {
187 iser_err("Failed to allocate bounce for data len %lu\n",
188 data->data_len);
189 return rc;
190 }
191
192 if (cmd_dir == ISER_DIR_OUT)
193 iser_copy_to_bounce(data);
99 194
100 data_copy->dma_nents = dma_nents; 195 data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size,
101 data_copy->data_len = cmd_data_len; 196 (cmd_dir == ISER_DIR_OUT) ?
197 DMA_TO_DEVICE : DMA_FROM_DEVICE);
198 if (!data->dma_nents) {
199 iser_err("Got dma_nents %d, something went wrong...\n",
200 data->dma_nents);
201 rc = -ENOMEM;
202 goto err;
203 }
102 204
103 return 0; 205 return 0;
206err:
207 iser_free_bounce_sg(data);
208 return rc;
104} 209}
105 210
106/** 211/**
@@ -109,51 +214,18 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
109 214
110void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, 215void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
111 struct iser_data_buf *data, 216 struct iser_data_buf *data,
112 struct iser_data_buf *data_copy,
113 enum iser_data_dir cmd_dir) 217 enum iser_data_dir cmd_dir)
114{ 218{
115 struct ib_device *dev; 219 struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
116 unsigned long cmd_data_len;
117
118 dev = iser_task->iser_conn->ib_conn.device->ib_device;
119 220
120 ib_dma_unmap_sg(dev, &data_copy->sg_single, 1, 221 ib_dma_unmap_sg(dev, data->sg, data->size,
121 (cmd_dir == ISER_DIR_OUT) ? 222 (cmd_dir == ISER_DIR_OUT) ?
122 DMA_TO_DEVICE : DMA_FROM_DEVICE); 223 DMA_TO_DEVICE : DMA_FROM_DEVICE);
123 224
124 if (cmd_dir == ISER_DIR_IN) { 225 if (cmd_dir == ISER_DIR_IN)
125 char *mem; 226 iser_copy_from_bounce(data);
126 struct scatterlist *sgl, *sg;
127 unsigned char *p, *to;
128 unsigned int sg_size;
129 int i;
130
131 /* copy back read RDMA to unaligned sg */
132 mem = data_copy->copy_buf;
133
134 sgl = (struct scatterlist *)data->buf;
135 sg_size = data->size;
136
137 p = mem;
138 for_each_sg(sgl, sg, sg_size, i) {
139 to = kmap_atomic(sg_page(sg));
140 memcpy(to + sg->offset,
141 p,
142 sg->length);
143 kunmap_atomic(to);
144 p += sg->length;
145 }
146 }
147 227
148 cmd_data_len = data->data_len; 228 iser_free_bounce_sg(data);
149
150 if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
151 free_pages((unsigned long)data_copy->copy_buf,
152 ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
153 else
154 kfree(data_copy->copy_buf);
155
156 data_copy->copy_buf = NULL;
157} 229}
158 230
159#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0) 231#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
@@ -175,7 +247,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
175 struct ib_device *ibdev, u64 *pages, 247 struct ib_device *ibdev, u64 *pages,
176 int *offset, int *data_size) 248 int *offset, int *data_size)
177{ 249{
178 struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf; 250 struct scatterlist *sg, *sgl = data->sg;
179 u64 start_addr, end_addr, page, chunk_start = 0; 251 u64 start_addr, end_addr, page, chunk_start = 0;
180 unsigned long total_sz = 0; 252 unsigned long total_sz = 0;
181 unsigned int dma_len; 253 unsigned int dma_len;
@@ -227,14 +299,14 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
227static int iser_data_buf_aligned_len(struct iser_data_buf *data, 299static int iser_data_buf_aligned_len(struct iser_data_buf *data,
228 struct ib_device *ibdev) 300 struct ib_device *ibdev)
229{ 301{
230 struct scatterlist *sgl, *sg, *next_sg = NULL; 302 struct scatterlist *sg, *sgl, *next_sg = NULL;
231 u64 start_addr, end_addr; 303 u64 start_addr, end_addr;
232 int i, ret_len, start_check = 0; 304 int i, ret_len, start_check = 0;
233 305
234 if (data->dma_nents == 1) 306 if (data->dma_nents == 1)
235 return 1; 307 return 1;
236 308
237 sgl = (struct scatterlist *)data->buf; 309 sgl = data->sg;
238 start_addr = ib_sg_dma_address(ibdev, sgl); 310 start_addr = ib_sg_dma_address(ibdev, sgl);
239 311
240 for_each_sg(sgl, sg, data->dma_nents, i) { 312 for_each_sg(sgl, sg, data->dma_nents, i) {
@@ -266,11 +338,10 @@ static int iser_data_buf_aligned_len(struct iser_data_buf *data,
266static void iser_data_buf_dump(struct iser_data_buf *data, 338static void iser_data_buf_dump(struct iser_data_buf *data,
267 struct ib_device *ibdev) 339 struct ib_device *ibdev)
268{ 340{
269 struct scatterlist *sgl = (struct scatterlist *)data->buf;
270 struct scatterlist *sg; 341 struct scatterlist *sg;
271 int i; 342 int i;
272 343
273 for_each_sg(sgl, sg, data->dma_nents, i) 344 for_each_sg(data->sg, sg, data->dma_nents, i)
274 iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p " 345 iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
275 "off:0x%x sz:0x%x dma_len:0x%x\n", 346 "off:0x%x sz:0x%x dma_len:0x%x\n",
276 i, (unsigned long)ib_sg_dma_address(ibdev, sg), 347 i, (unsigned long)ib_sg_dma_address(ibdev, sg),
@@ -288,31 +359,6 @@ static void iser_dump_page_vec(struct iser_page_vec *page_vec)
288 iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]); 359 iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
289} 360}
290 361
291static void iser_page_vec_build(struct iser_data_buf *data,
292 struct iser_page_vec *page_vec,
293 struct ib_device *ibdev)
294{
295 int page_vec_len = 0;
296
297 page_vec->length = 0;
298 page_vec->offset = 0;
299
300 iser_dbg("Translating sg sz: %d\n", data->dma_nents);
301 page_vec_len = iser_sg_to_page_vec(data, ibdev, page_vec->pages,
302 &page_vec->offset,
303 &page_vec->data_size);
304 iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents, page_vec_len);
305
306 page_vec->length = page_vec_len;
307
308 if (page_vec_len * SIZE_4K < page_vec->data_size) {
309 iser_err("page_vec too short to hold this SG\n");
310 iser_data_buf_dump(data, ibdev);
311 iser_dump_page_vec(page_vec);
312 BUG();
313 }
314}
315
316int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, 362int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
317 struct iser_data_buf *data, 363 struct iser_data_buf *data,
318 enum iser_data_dir iser_dir, 364 enum iser_data_dir iser_dir,
@@ -323,7 +369,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
323 iser_task->dir[iser_dir] = 1; 369 iser_task->dir[iser_dir] = 1;
324 dev = iser_task->iser_conn->ib_conn.device->ib_device; 370 dev = iser_task->iser_conn->ib_conn.device->ib_device;
325 371
326 data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir); 372 data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size, dma_dir);
327 if (data->dma_nents == 0) { 373 if (data->dma_nents == 0) {
328 iser_err("dma_map_sg failed!!!\n"); 374 iser_err("dma_map_sg failed!!!\n");
329 return -EINVAL; 375 return -EINVAL;
@@ -338,24 +384,41 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
338 struct ib_device *dev; 384 struct ib_device *dev;
339 385
340 dev = iser_task->iser_conn->ib_conn.device->ib_device; 386 dev = iser_task->iser_conn->ib_conn.device->ib_device;
341 ib_dma_unmap_sg(dev, data->buf, data->size, dir); 387 ib_dma_unmap_sg(dev, data->sg, data->size, dir);
388}
389
390static int
391iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
392 struct iser_mem_reg *reg)
393{
394 struct scatterlist *sg = mem->sg;
395
396 reg->sge.lkey = device->mr->lkey;
397 reg->rkey = device->mr->rkey;
398 reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
399 reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);
400
401 iser_dbg("Single DMA entry: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
402 " length=0x%x\n", reg->sge.lkey, reg->rkey,
403 reg->sge.addr, reg->sge.length);
404
405 return 0;
342} 406}
343 407
344static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, 408static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
345 struct ib_device *ibdev,
346 struct iser_data_buf *mem, 409 struct iser_data_buf *mem,
347 struct iser_data_buf *mem_copy,
348 enum iser_data_dir cmd_dir, 410 enum iser_data_dir cmd_dir,
349 int aligned_len) 411 int aligned_len)
350{ 412{
351 struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn; 413 struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
414 struct iser_device *device = iser_task->iser_conn->ib_conn.device;
352 415
353 iscsi_conn->fmr_unalign_cnt++; 416 iscsi_conn->fmr_unalign_cnt++;
354 iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n", 417 iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
355 aligned_len, mem->size); 418 aligned_len, mem->size);
356 419
357 if (iser_debug_level > 0) 420 if (iser_debug_level > 0)
358 iser_data_buf_dump(mem, ibdev); 421 iser_data_buf_dump(mem, device->ib_device);
359 422
360 /* unmap the command data before accessing it */ 423 /* unmap the command data before accessing it */
361 iser_dma_unmap_task_data(iser_task, mem, 424 iser_dma_unmap_task_data(iser_task, mem,
@@ -364,13 +427,95 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
364 427
365 /* allocate copy buf, if we are writing, copy the */ 428 /* allocate copy buf, if we are writing, copy the */
366 /* unaligned scatterlist, dma map the copy */ 429 /* unaligned scatterlist, dma map the copy */
367 if (iser_start_rdma_unaligned_sg(iser_task, mem, mem_copy, cmd_dir) != 0) 430 if (iser_start_rdma_unaligned_sg(iser_task, mem, cmd_dir) != 0)
368 return -ENOMEM; 431 return -ENOMEM;
369 432
370 return 0; 433 return 0;
371} 434}
372 435
373/** 436/**
437 * iser_reg_page_vec - Register physical memory
438 *
439 * returns: 0 on success, errno code on failure
440 */
441static
442int iser_reg_page_vec(struct iscsi_iser_task *iser_task,
443 struct iser_data_buf *mem,
444 struct iser_page_vec *page_vec,
445 struct iser_mem_reg *mem_reg)
446{
447 struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
448 struct iser_device *device = ib_conn->device;
449 struct ib_pool_fmr *fmr;
450 int ret, plen;
451
452 plen = iser_sg_to_page_vec(mem, device->ib_device,
453 page_vec->pages,
454 &page_vec->offset,
455 &page_vec->data_size);
456 page_vec->length = plen;
457 if (plen * SIZE_4K < page_vec->data_size) {
458 iser_err("page vec too short to hold this SG\n");
459 iser_data_buf_dump(mem, device->ib_device);
460 iser_dump_page_vec(page_vec);
461 return -EINVAL;
462 }
463
464 fmr = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
465 page_vec->pages,
466 page_vec->length,
467 page_vec->pages[0]);
468 if (IS_ERR(fmr)) {
469 ret = PTR_ERR(fmr);
470 iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
471 return ret;
472 }
473
474 mem_reg->sge.lkey = fmr->fmr->lkey;
475 mem_reg->rkey = fmr->fmr->rkey;
476 mem_reg->sge.addr = page_vec->pages[0] + page_vec->offset;
477 mem_reg->sge.length = page_vec->data_size;
478 mem_reg->mem_h = fmr;
479
480 return 0;
481}
482
483/**
484 * Unregister (previosuly registered using FMR) memory.
485 * If memory is non-FMR does nothing.
486 */
487void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
488 enum iser_data_dir cmd_dir)
489{
490 struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
491 int ret;
492
493 if (!reg->mem_h)
494 return;
495
496 iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h);
497
498 ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
499 if (ret)
500 iser_err("ib_fmr_pool_unmap failed %d\n", ret);
501
502 reg->mem_h = NULL;
503}
504
505void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
506 enum iser_data_dir cmd_dir)
507{
508 struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
509
510 if (!reg->mem_h)
511 return;
512
513 iser_reg_desc_put(&iser_task->iser_conn->ib_conn,
514 reg->mem_h);
515 reg->mem_h = NULL;
516}
517
518/**
374 * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA, 519 * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA,
375 * using FMR (if possible) obtaining rkey and va 520 * using FMR (if possible) obtaining rkey and va
376 * 521 *
@@ -383,45 +528,29 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
383 struct iser_device *device = ib_conn->device; 528 struct iser_device *device = ib_conn->device;
384 struct ib_device *ibdev = device->ib_device; 529 struct ib_device *ibdev = device->ib_device;
385 struct iser_data_buf *mem = &iser_task->data[cmd_dir]; 530 struct iser_data_buf *mem = &iser_task->data[cmd_dir];
386 struct iser_regd_buf *regd_buf; 531 struct iser_mem_reg *mem_reg;
387 int aligned_len; 532 int aligned_len;
388 int err; 533 int err;
389 int i; 534 int i;
390 struct scatterlist *sg;
391 535
392 regd_buf = &iser_task->rdma_regd[cmd_dir]; 536 mem_reg = &iser_task->rdma_reg[cmd_dir];
393 537
394 aligned_len = iser_data_buf_aligned_len(mem, ibdev); 538 aligned_len = iser_data_buf_aligned_len(mem, ibdev);
395 if (aligned_len != mem->dma_nents) { 539 if (aligned_len != mem->dma_nents) {
396 err = fall_to_bounce_buf(iser_task, ibdev, mem, 540 err = fall_to_bounce_buf(iser_task, mem,
397 &iser_task->data_copy[cmd_dir],
398 cmd_dir, aligned_len); 541 cmd_dir, aligned_len);
399 if (err) { 542 if (err) {
400 iser_err("failed to allocate bounce buffer\n"); 543 iser_err("failed to allocate bounce buffer\n");
401 return err; 544 return err;
402 } 545 }
403 mem = &iser_task->data_copy[cmd_dir];
404 } 546 }
405 547
406 /* if there a single dma entry, FMR is not needed */ 548 /* if there a single dma entry, FMR is not needed */
407 if (mem->dma_nents == 1) { 549 if (mem->dma_nents == 1) {
408 sg = (struct scatterlist *)mem->buf; 550 return iser_reg_dma(device, mem, mem_reg);
409
410 regd_buf->reg.lkey = device->mr->lkey;
411 regd_buf->reg.rkey = device->mr->rkey;
412 regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]);
413 regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]);
414
415 iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X "
416 "va: 0x%08lX sz: %ld]\n",
417 (unsigned int)regd_buf->reg.lkey,
418 (unsigned int)regd_buf->reg.rkey,
419 (unsigned long)regd_buf->reg.va,
420 (unsigned long)regd_buf->reg.len);
421 } else { /* use FMR for multiple dma entries */ 551 } else { /* use FMR for multiple dma entries */
422 iser_page_vec_build(mem, ib_conn->fmr.page_vec, ibdev); 552 err = iser_reg_page_vec(iser_task, mem, ib_conn->fmr.page_vec,
423 err = iser_reg_page_vec(ib_conn, ib_conn->fmr.page_vec, 553 mem_reg);
424 &regd_buf->reg);
425 if (err && err != -EAGAIN) { 554 if (err && err != -EAGAIN) {
426 iser_data_buf_dump(mem, ibdev); 555 iser_data_buf_dump(mem, ibdev);
427 iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", 556 iser_err("mem->dma_nents = %d (dlength = 0x%x)\n",
@@ -519,8 +648,10 @@ iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
519 648
520static int 649static int
521iser_reg_sig_mr(struct iscsi_iser_task *iser_task, 650iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
522 struct fast_reg_descriptor *desc, struct ib_sge *data_sge, 651 struct fast_reg_descriptor *desc,
523 struct ib_sge *prot_sge, struct ib_sge *sig_sge) 652 struct iser_mem_reg *data_reg,
653 struct iser_mem_reg *prot_reg,
654 struct iser_mem_reg *sig_reg)
524{ 655{
525 struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn; 656 struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
526 struct iser_pi_context *pi_ctx = desc->pi_ctx; 657 struct iser_pi_context *pi_ctx = desc->pi_ctx;
@@ -544,12 +675,12 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
544 memset(&sig_wr, 0, sizeof(sig_wr)); 675 memset(&sig_wr, 0, sizeof(sig_wr));
545 sig_wr.opcode = IB_WR_REG_SIG_MR; 676 sig_wr.opcode = IB_WR_REG_SIG_MR;
546 sig_wr.wr_id = ISER_FASTREG_LI_WRID; 677 sig_wr.wr_id = ISER_FASTREG_LI_WRID;
547 sig_wr.sg_list = data_sge; 678 sig_wr.sg_list = &data_reg->sge;
548 sig_wr.num_sge = 1; 679 sig_wr.num_sge = 1;
549 sig_wr.wr.sig_handover.sig_attrs = &sig_attrs; 680 sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
550 sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr; 681 sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
551 if (scsi_prot_sg_count(iser_task->sc)) 682 if (scsi_prot_sg_count(iser_task->sc))
552 sig_wr.wr.sig_handover.prot = prot_sge; 683 sig_wr.wr.sig_handover.prot = &prot_reg->sge;
553 sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE | 684 sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
554 IB_ACCESS_REMOTE_READ | 685 IB_ACCESS_REMOTE_READ |
555 IB_ACCESS_REMOTE_WRITE; 686 IB_ACCESS_REMOTE_WRITE;
@@ -566,27 +697,26 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
566 } 697 }
567 desc->reg_indicators &= ~ISER_SIG_KEY_VALID; 698 desc->reg_indicators &= ~ISER_SIG_KEY_VALID;
568 699
569 sig_sge->lkey = pi_ctx->sig_mr->lkey; 700 sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
570 sig_sge->addr = 0; 701 sig_reg->rkey = pi_ctx->sig_mr->rkey;
571 sig_sge->length = scsi_transfer_length(iser_task->sc); 702 sig_reg->sge.addr = 0;
703 sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
572 704
573 iser_dbg("sig_sge: addr: 0x%llx length: %u lkey: 0x%x\n", 705 iser_dbg("sig_sge: lkey: 0x%x, rkey: 0x%x, addr: 0x%llx, length: %u\n",
574 sig_sge->addr, sig_sge->length, 706 sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr,
575 sig_sge->lkey); 707 sig_reg->sge.length);
576err: 708err:
577 return ret; 709 return ret;
578} 710}
579 711
580static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, 712static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
581 struct iser_regd_buf *regd_buf,
582 struct iser_data_buf *mem, 713 struct iser_data_buf *mem,
714 struct fast_reg_descriptor *desc,
583 enum iser_reg_indicator ind, 715 enum iser_reg_indicator ind,
584 struct ib_sge *sge) 716 struct iser_mem_reg *reg)
585{ 717{
586 struct fast_reg_descriptor *desc = regd_buf->reg.mem_h;
587 struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn; 718 struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
588 struct iser_device *device = ib_conn->device; 719 struct iser_device *device = ib_conn->device;
589 struct ib_device *ibdev = device->ib_device;
590 struct ib_mr *mr; 720 struct ib_mr *mr;
591 struct ib_fast_reg_page_list *frpl; 721 struct ib_fast_reg_page_list *frpl;
592 struct ib_send_wr fastreg_wr, inv_wr; 722 struct ib_send_wr fastreg_wr, inv_wr;
@@ -594,17 +724,8 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
594 int ret, offset, size, plen; 724 int ret, offset, size, plen;
595 725
596 /* if there a single dma entry, dma mr suffices */ 726 /* if there a single dma entry, dma mr suffices */
597 if (mem->dma_nents == 1) { 727 if (mem->dma_nents == 1)
598 struct scatterlist *sg = (struct scatterlist *)mem->buf; 728 return iser_reg_dma(device, mem, reg);
599
600 sge->lkey = device->mr->lkey;
601 sge->addr = ib_sg_dma_address(ibdev, &sg[0]);
602 sge->length = ib_sg_dma_len(ibdev, &sg[0]);
603
604 iser_dbg("Single DMA entry: lkey=0x%x, addr=0x%llx, length=0x%x\n",
605 sge->lkey, sge->addr, sge->length);
606 return 0;
607 }
608 729
609 if (ind == ISER_DATA_KEY_VALID) { 730 if (ind == ISER_DATA_KEY_VALID) {
610 mr = desc->data_mr; 731 mr = desc->data_mr;
@@ -652,9 +773,10 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
652 } 773 }
653 desc->reg_indicators &= ~ind; 774 desc->reg_indicators &= ~ind;
654 775
655 sge->lkey = mr->lkey; 776 reg->sge.lkey = mr->lkey;
656 sge->addr = frpl->page_list[0] + offset; 777 reg->rkey = mr->rkey;
657 sge->length = size; 778 reg->sge.addr = frpl->page_list[0] + offset;
779 reg->sge.length = size;
658 780
659 return ret; 781 return ret;
660} 782}
@@ -672,93 +794,66 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
672 struct iser_device *device = ib_conn->device; 794 struct iser_device *device = ib_conn->device;
673 struct ib_device *ibdev = device->ib_device; 795 struct ib_device *ibdev = device->ib_device;
674 struct iser_data_buf *mem = &iser_task->data[cmd_dir]; 796 struct iser_data_buf *mem = &iser_task->data[cmd_dir];
675 struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir]; 797 struct iser_mem_reg *mem_reg = &iser_task->rdma_reg[cmd_dir];
676 struct fast_reg_descriptor *desc = NULL; 798 struct fast_reg_descriptor *desc = NULL;
677 struct ib_sge data_sge;
678 int err, aligned_len; 799 int err, aligned_len;
679 unsigned long flags;
680 800
681 aligned_len = iser_data_buf_aligned_len(mem, ibdev); 801 aligned_len = iser_data_buf_aligned_len(mem, ibdev);
682 if (aligned_len != mem->dma_nents) { 802 if (aligned_len != mem->dma_nents) {
683 err = fall_to_bounce_buf(iser_task, ibdev, mem, 803 err = fall_to_bounce_buf(iser_task, mem,
684 &iser_task->data_copy[cmd_dir],
685 cmd_dir, aligned_len); 804 cmd_dir, aligned_len);
686 if (err) { 805 if (err) {
687 iser_err("failed to allocate bounce buffer\n"); 806 iser_err("failed to allocate bounce buffer\n");
688 return err; 807 return err;
689 } 808 }
690 mem = &iser_task->data_copy[cmd_dir];
691 } 809 }
692 810
693 if (mem->dma_nents != 1 || 811 if (mem->dma_nents != 1 ||
694 scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) { 812 scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
695 spin_lock_irqsave(&ib_conn->lock, flags); 813 desc = iser_reg_desc_get(ib_conn);
696 desc = list_first_entry(&ib_conn->fastreg.pool, 814 mem_reg->mem_h = desc;
697 struct fast_reg_descriptor, list);
698 list_del(&desc->list);
699 spin_unlock_irqrestore(&ib_conn->lock, flags);
700 regd_buf->reg.mem_h = desc;
701 } 815 }
702 816
703 err = iser_fast_reg_mr(iser_task, regd_buf, mem, 817 err = iser_fast_reg_mr(iser_task, mem, desc,
704 ISER_DATA_KEY_VALID, &data_sge); 818 ISER_DATA_KEY_VALID, mem_reg);
705 if (err) 819 if (err)
706 goto err_reg; 820 goto err_reg;
707 821
708 if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) { 822 if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
709 struct ib_sge prot_sge, sig_sge; 823 struct iser_mem_reg prot_reg;
710 824
711 memset(&prot_sge, 0, sizeof(prot_sge)); 825 memset(&prot_reg, 0, sizeof(prot_reg));
712 if (scsi_prot_sg_count(iser_task->sc)) { 826 if (scsi_prot_sg_count(iser_task->sc)) {
713 mem = &iser_task->prot[cmd_dir]; 827 mem = &iser_task->prot[cmd_dir];
714 aligned_len = iser_data_buf_aligned_len(mem, ibdev); 828 aligned_len = iser_data_buf_aligned_len(mem, ibdev);
715 if (aligned_len != mem->dma_nents) { 829 if (aligned_len != mem->dma_nents) {
716 err = fall_to_bounce_buf(iser_task, ibdev, mem, 830 err = fall_to_bounce_buf(iser_task, mem,
717 &iser_task->prot_copy[cmd_dir],
718 cmd_dir, aligned_len); 831 cmd_dir, aligned_len);
719 if (err) { 832 if (err) {
720 iser_err("failed to allocate bounce buffer\n"); 833 iser_err("failed to allocate bounce buffer\n");
721 return err; 834 return err;
722 } 835 }
723 mem = &iser_task->prot_copy[cmd_dir];
724 } 836 }
725 837
726 err = iser_fast_reg_mr(iser_task, regd_buf, mem, 838 err = iser_fast_reg_mr(iser_task, mem, desc,
727 ISER_PROT_KEY_VALID, &prot_sge); 839 ISER_PROT_KEY_VALID, &prot_reg);
728 if (err) 840 if (err)
729 goto err_reg; 841 goto err_reg;
730 } 842 }
731 843
732 err = iser_reg_sig_mr(iser_task, desc, &data_sge, 844 err = iser_reg_sig_mr(iser_task, desc, mem_reg,
733 &prot_sge, &sig_sge); 845 &prot_reg, mem_reg);
734 if (err) { 846 if (err) {
735 iser_err("Failed to register signature mr\n"); 847 iser_err("Failed to register signature mr\n");
736 return err; 848 return err;
737 } 849 }
738 desc->reg_indicators |= ISER_FASTREG_PROTECTED; 850 desc->reg_indicators |= ISER_FASTREG_PROTECTED;
739
740 regd_buf->reg.lkey = sig_sge.lkey;
741 regd_buf->reg.rkey = desc->pi_ctx->sig_mr->rkey;
742 regd_buf->reg.va = sig_sge.addr;
743 regd_buf->reg.len = sig_sge.length;
744 } else {
745 if (desc)
746 regd_buf->reg.rkey = desc->data_mr->rkey;
747 else
748 regd_buf->reg.rkey = device->mr->rkey;
749
750 regd_buf->reg.lkey = data_sge.lkey;
751 regd_buf->reg.va = data_sge.addr;
752 regd_buf->reg.len = data_sge.length;
753 } 851 }
754 852
755 return 0; 853 return 0;
756err_reg: 854err_reg:
757 if (desc) { 855 if (desc)
758 spin_lock_irqsave(&ib_conn->lock, flags); 856 iser_reg_desc_put(ib_conn, desc);
759 list_add_tail(&desc->list, &ib_conn->fastreg.pool);
760 spin_unlock_irqrestore(&ib_conn->lock, flags);
761 }
762 857
763 return err; 858 return err;
764} 859}
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 4065abe28829..cc2dd35ffbc0 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -274,6 +274,65 @@ void iser_free_fmr_pool(struct ib_conn *ib_conn)
274} 274}
275 275
276static int 276static int
277iser_alloc_pi_ctx(struct ib_device *ib_device, struct ib_pd *pd,
278 struct fast_reg_descriptor *desc)
279{
280 struct iser_pi_context *pi_ctx = NULL;
281 struct ib_mr_init_attr mr_init_attr = {.max_reg_descriptors = 2,
282 .flags = IB_MR_SIGNATURE_EN};
283 int ret = 0;
284
285 desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
286 if (!desc->pi_ctx)
287 return -ENOMEM;
288
289 pi_ctx = desc->pi_ctx;
290
291 pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
292 ISCSI_ISER_SG_TABLESIZE);
293 if (IS_ERR(pi_ctx->prot_frpl)) {
294 ret = PTR_ERR(pi_ctx->prot_frpl);
295 goto prot_frpl_failure;
296 }
297
298 pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
299 ISCSI_ISER_SG_TABLESIZE + 1);
300 if (IS_ERR(pi_ctx->prot_mr)) {
301 ret = PTR_ERR(pi_ctx->prot_mr);
302 goto prot_mr_failure;
303 }
304 desc->reg_indicators |= ISER_PROT_KEY_VALID;
305
306 pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
307 if (IS_ERR(pi_ctx->sig_mr)) {
308 ret = PTR_ERR(pi_ctx->sig_mr);
309 goto sig_mr_failure;
310 }
311 desc->reg_indicators |= ISER_SIG_KEY_VALID;
312 desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
313
314 return 0;
315
316sig_mr_failure:
317 ib_dereg_mr(desc->pi_ctx->prot_mr);
318prot_mr_failure:
319 ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
320prot_frpl_failure:
321 kfree(desc->pi_ctx);
322
323 return ret;
324}
325
326static void
327iser_free_pi_ctx(struct iser_pi_context *pi_ctx)
328{
329 ib_free_fast_reg_page_list(pi_ctx->prot_frpl);
330 ib_dereg_mr(pi_ctx->prot_mr);
331 ib_destroy_mr(pi_ctx->sig_mr);
332 kfree(pi_ctx);
333}
334
335static int
277iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, 336iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd,
278 bool pi_enable, struct fast_reg_descriptor *desc) 337 bool pi_enable, struct fast_reg_descriptor *desc)
279{ 338{
@@ -297,59 +356,12 @@ iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd,
297 desc->reg_indicators |= ISER_DATA_KEY_VALID; 356 desc->reg_indicators |= ISER_DATA_KEY_VALID;
298 357
299 if (pi_enable) { 358 if (pi_enable) {
300 struct ib_mr_init_attr mr_init_attr = {0}; 359 ret = iser_alloc_pi_ctx(ib_device, pd, desc);
301 struct iser_pi_context *pi_ctx = NULL; 360 if (ret)
302
303 desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
304 if (!desc->pi_ctx) {
305 iser_err("Failed to allocate pi context\n");
306 ret = -ENOMEM;
307 goto pi_ctx_alloc_failure; 361 goto pi_ctx_alloc_failure;
308 }
309 pi_ctx = desc->pi_ctx;
310
311 pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
312 ISCSI_ISER_SG_TABLESIZE);
313 if (IS_ERR(pi_ctx->prot_frpl)) {
314 ret = PTR_ERR(pi_ctx->prot_frpl);
315 iser_err("Failed to allocate prot frpl ret=%d\n",
316 ret);
317 goto prot_frpl_failure;
318 }
319
320 pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
321 ISCSI_ISER_SG_TABLESIZE + 1);
322 if (IS_ERR(pi_ctx->prot_mr)) {
323 ret = PTR_ERR(pi_ctx->prot_mr);
324 iser_err("Failed to allocate prot frmr ret=%d\n",
325 ret);
326 goto prot_mr_failure;
327 }
328 desc->reg_indicators |= ISER_PROT_KEY_VALID;
329
330 mr_init_attr.max_reg_descriptors = 2;
331 mr_init_attr.flags |= IB_MR_SIGNATURE_EN;
332 pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
333 if (IS_ERR(pi_ctx->sig_mr)) {
334 ret = PTR_ERR(pi_ctx->sig_mr);
335 iser_err("Failed to allocate signature enabled mr err=%d\n",
336 ret);
337 goto sig_mr_failure;
338 }
339 desc->reg_indicators |= ISER_SIG_KEY_VALID;
340 } 362 }
341 desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
342
343 iser_dbg("Create fr_desc %p page_list %p\n",
344 desc, desc->data_frpl->page_list);
345 363
346 return 0; 364 return 0;
347sig_mr_failure:
348 ib_dereg_mr(desc->pi_ctx->prot_mr);
349prot_mr_failure:
350 ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
351prot_frpl_failure:
352 kfree(desc->pi_ctx);
353pi_ctx_alloc_failure: 365pi_ctx_alloc_failure:
354 ib_dereg_mr(desc->data_mr); 366 ib_dereg_mr(desc->data_mr);
355fast_reg_mr_failure: 367fast_reg_mr_failure:
@@ -416,12 +428,8 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
416 list_del(&desc->list); 428 list_del(&desc->list);
417 ib_free_fast_reg_page_list(desc->data_frpl); 429 ib_free_fast_reg_page_list(desc->data_frpl);
418 ib_dereg_mr(desc->data_mr); 430 ib_dereg_mr(desc->data_mr);
419 if (desc->pi_ctx) { 431 if (desc->pi_ctx)
420 ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl); 432 iser_free_pi_ctx(desc->pi_ctx);
421 ib_dereg_mr(desc->pi_ctx->prot_mr);
422 ib_destroy_mr(desc->pi_ctx->sig_mr);
423 kfree(desc->pi_ctx);
424 }
425 kfree(desc); 433 kfree(desc);
426 ++i; 434 ++i;
427 } 435 }
@@ -721,7 +729,7 @@ static void iser_connect_error(struct rdma_cm_id *cma_id)
721 struct iser_conn *iser_conn; 729 struct iser_conn *iser_conn;
722 730
723 iser_conn = (struct iser_conn *)cma_id->context; 731 iser_conn = (struct iser_conn *)cma_id->context;
724 iser_conn->state = ISER_CONN_DOWN; 732 iser_conn->state = ISER_CONN_TERMINATING;
725} 733}
726 734
727/** 735/**
@@ -992,93 +1000,6 @@ connect_failure:
992 return err; 1000 return err;
993} 1001}
994 1002
995/**
996 * iser_reg_page_vec - Register physical memory
997 *
998 * returns: 0 on success, errno code on failure
999 */
1000int iser_reg_page_vec(struct ib_conn *ib_conn,
1001 struct iser_page_vec *page_vec,
1002 struct iser_mem_reg *mem_reg)
1003{
1004 struct ib_pool_fmr *mem;
1005 u64 io_addr;
1006 u64 *page_list;
1007 int status;
1008
1009 page_list = page_vec->pages;
1010 io_addr = page_list[0];
1011
1012 mem = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
1013 page_list,
1014 page_vec->length,
1015 io_addr);
1016
1017 if (IS_ERR(mem)) {
1018 status = (int)PTR_ERR(mem);
1019 iser_err("ib_fmr_pool_map_phys failed: %d\n", status);
1020 return status;
1021 }
1022
1023 mem_reg->lkey = mem->fmr->lkey;
1024 mem_reg->rkey = mem->fmr->rkey;
1025 mem_reg->len = page_vec->length * SIZE_4K;
1026 mem_reg->va = io_addr;
1027 mem_reg->mem_h = (void *)mem;
1028
1029 mem_reg->va += page_vec->offset;
1030 mem_reg->len = page_vec->data_size;
1031
1032 iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, "
1033 "entry[0]: (0x%08lx,%ld)] -> "
1034 "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n",
1035 page_vec, page_vec->length,
1036 (unsigned long)page_vec->pages[0],
1037 (unsigned long)page_vec->data_size,
1038 (unsigned int)mem_reg->lkey, mem_reg->mem_h,
1039 (unsigned long)mem_reg->va, (unsigned long)mem_reg->len);
1040 return 0;
1041}
1042
1043/**
1044 * Unregister (previosuly registered using FMR) memory.
1045 * If memory is non-FMR does nothing.
1046 */
1047void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
1048 enum iser_data_dir cmd_dir)
1049{
1050 struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
1051 int ret;
1052
1053 if (!reg->mem_h)
1054 return;
1055
1056 iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
1057
1058 ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
1059 if (ret)
1060 iser_err("ib_fmr_pool_unmap failed %d\n", ret);
1061
1062 reg->mem_h = NULL;
1063}
1064
1065void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
1066 enum iser_data_dir cmd_dir)
1067{
1068 struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
1069 struct iser_conn *iser_conn = iser_task->iser_conn;
1070 struct ib_conn *ib_conn = &iser_conn->ib_conn;
1071 struct fast_reg_descriptor *desc = reg->mem_h;
1072
1073 if (!desc)
1074 return;
1075
1076 reg->mem_h = NULL;
1077 spin_lock_bh(&ib_conn->lock);
1078 list_add_tail(&desc->list, &ib_conn->fastreg.pool);
1079 spin_unlock_bh(&ib_conn->lock);
1080}
1081
1082int iser_post_recvl(struct iser_conn *iser_conn) 1003int iser_post_recvl(struct iser_conn *iser_conn)
1083{ 1004{
1084 struct ib_recv_wr rx_wr, *rx_wr_failed; 1005 struct ib_recv_wr rx_wr, *rx_wr_failed;
@@ -1210,6 +1131,9 @@ iser_handle_comp_error(struct ib_conn *ib_conn,
1210 iscsi_conn_failure(iser_conn->iscsi_conn, 1131 iscsi_conn_failure(iser_conn->iscsi_conn,
1211 ISCSI_ERR_CONN_FAILED); 1132 ISCSI_ERR_CONN_FAILED);
1212 1133
1134 if (wc->wr_id == ISER_FASTREG_LI_WRID)
1135 return;
1136
1213 if (is_iser_tx_desc(iser_conn, wr_id)) { 1137 if (is_iser_tx_desc(iser_conn, wr_id)) {
1214 struct iser_tx_desc *desc = wr_id; 1138 struct iser_tx_desc *desc = wr_id;
1215 1139
@@ -1254,13 +1178,11 @@ static void iser_handle_wc(struct ib_wc *wc)
1254 else 1178 else
1255 iser_dbg("flush error: wr id %llx\n", wc->wr_id); 1179 iser_dbg("flush error: wr id %llx\n", wc->wr_id);
1256 1180
1257 if (wc->wr_id != ISER_FASTREG_LI_WRID &&
1258 wc->wr_id != ISER_BEACON_WRID)
1259 iser_handle_comp_error(ib_conn, wc);
1260
1261 /* complete in case all flush errors were consumed */
1262 if (wc->wr_id == ISER_BEACON_WRID) 1181 if (wc->wr_id == ISER_BEACON_WRID)
1182 /* all flush errors were consumed */
1263 complete(&ib_conn->flush_comp); 1183 complete(&ib_conn->flush_comp);
1184 else
1185 iser_handle_comp_error(ib_conn, wc);
1264 } 1186 }
1265} 1187}
1266 1188
@@ -1306,7 +1228,7 @@ static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
1306u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, 1228u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
1307 enum iser_data_dir cmd_dir, sector_t *sector) 1229 enum iser_data_dir cmd_dir, sector_t *sector)
1308{ 1230{
1309 struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; 1231 struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
1310 struct fast_reg_descriptor *desc = reg->mem_h; 1232 struct fast_reg_descriptor *desc = reg->mem_h;
1311 unsigned long sector_size = iser_task->sc->device->sector_size; 1233 unsigned long sector_size = iser_task->sc->device->sector_size;
1312 struct ib_mr_status mr_status; 1234 struct ib_mr_status mr_status;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 0747c0595a9d..918814cd0f80 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -40,6 +40,7 @@
40#include <linux/parser.h> 40#include <linux/parser.h>
41#include <linux/random.h> 41#include <linux/random.h>
42#include <linux/jiffies.h> 42#include <linux/jiffies.h>
43#include <rdma/ib_cache.h>
43 44
44#include <linux/atomic.h> 45#include <linux/atomic.h>
45 46
@@ -265,10 +266,10 @@ static int srp_init_qp(struct srp_target_port *target,
265 if (!attr) 266 if (!attr)
266 return -ENOMEM; 267 return -ENOMEM;
267 268
268 ret = ib_find_pkey(target->srp_host->srp_dev->dev, 269 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
269 target->srp_host->port, 270 target->srp_host->port,
270 be16_to_cpu(target->pkey), 271 be16_to_cpu(target->pkey),
271 &attr->pkey_index); 272 &attr->pkey_index);
272 if (ret) 273 if (ret)
273 goto out; 274 goto out;
274 275
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 6e0a477681e9..4b9b866e6b0d 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -207,7 +207,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
207 } 207 }
208 break; 208 break;
209 default: 209 default:
210 printk(KERN_ERR "received unrecognized IB event %d\n", 210 pr_err("received unrecognized IB event %d\n",
211 event->event); 211 event->event);
212 break; 212 break;
213 } 213 }
@@ -218,7 +218,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
218 */ 218 */
219static void srpt_srq_event(struct ib_event *event, void *ctx) 219static void srpt_srq_event(struct ib_event *event, void *ctx)
220{ 220{
221 printk(KERN_INFO "SRQ event %d\n", event->event); 221 pr_info("SRQ event %d\n", event->event);
222} 222}
223 223
224/** 224/**
@@ -242,8 +242,7 @@ static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
242 ch->sess_name, srpt_get_ch_state(ch)); 242 ch->sess_name, srpt_get_ch_state(ch));
243 break; 243 break;
244 default: 244 default:
245 printk(KERN_ERR "received unrecognized IB QP event %d\n", 245 pr_err("received unrecognized IB QP event %d\n", event->event);
246 event->event);
247 break; 246 break;
248 } 247 }
249} 248}
@@ -602,7 +601,7 @@ static void srpt_unregister_mad_agent(struct srpt_device *sdev)
602 sport = &sdev->port[i - 1]; 601 sport = &sdev->port[i - 1];
603 WARN_ON(sport->port != i); 602 WARN_ON(sport->port != i);
604 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0) 603 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
605 printk(KERN_ERR "disabling MAD processing failed.\n"); 604 pr_err("disabling MAD processing failed.\n");
606 if (sport->mad_agent) { 605 if (sport->mad_agent) {
607 ib_unregister_mad_agent(sport->mad_agent); 606 ib_unregister_mad_agent(sport->mad_agent);
608 sport->mad_agent = NULL; 607 sport->mad_agent = NULL;
@@ -810,7 +809,7 @@ static int srpt_post_send(struct srpt_rdma_ch *ch,
810 809
811 ret = -ENOMEM; 810 ret = -ENOMEM;
812 if (unlikely(atomic_dec_return(&ch->sq_wr_avail) < 0)) { 811 if (unlikely(atomic_dec_return(&ch->sq_wr_avail) < 0)) {
813 printk(KERN_WARNING "IB send queue full (needed 1)\n"); 812 pr_warn("IB send queue full (needed 1)\n");
814 goto out; 813 goto out;
815 } 814 }
816 815
@@ -912,7 +911,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
912 911
913 if (ioctx->n_rbuf > 912 if (ioctx->n_rbuf >
914 (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) { 913 (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
915 printk(KERN_ERR "received unsupported SRP_CMD request" 914 pr_err("received unsupported SRP_CMD request"
916 " type (%u out + %u in != %u / %zu)\n", 915 " type (%u out + %u in != %u / %zu)\n",
917 srp_cmd->data_out_desc_cnt, 916 srp_cmd->data_out_desc_cnt,
918 srp_cmd->data_in_desc_cnt, 917 srp_cmd->data_in_desc_cnt,
@@ -1432,7 +1431,7 @@ static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
1432 srpt_unmap_sg_to_ib_sge(ch, ioctx); 1431 srpt_unmap_sg_to_ib_sge(ch, ioctx);
1433 transport_generic_free_cmd(&ioctx->cmd, 0); 1432 transport_generic_free_cmd(&ioctx->cmd, 0);
1434 } else { 1433 } else {
1435 printk(KERN_ERR "IB completion has been received too late for" 1434 pr_err("IB completion has been received too late for"
1436 " wr_id = %u.\n", ioctx->ioctx.index); 1435 " wr_id = %u.\n", ioctx->ioctx.index);
1437 } 1436 }
1438} 1437}
@@ -1457,7 +1456,7 @@ static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
1457 SRPT_STATE_DATA_IN)) 1456 SRPT_STATE_DATA_IN))
1458 target_execute_cmd(&ioctx->cmd); 1457 target_execute_cmd(&ioctx->cmd);
1459 else 1458 else
1460 printk(KERN_ERR "%s[%d]: wrong state = %d\n", __func__, 1459 pr_err("%s[%d]: wrong state = %d\n", __func__,
1461 __LINE__, srpt_get_cmd_state(ioctx)); 1460 __LINE__, srpt_get_cmd_state(ioctx));
1462 } else if (opcode == SRPT_RDMA_ABORT) { 1461 } else if (opcode == SRPT_RDMA_ABORT) {
1463 ioctx->rdma_aborted = true; 1462 ioctx->rdma_aborted = true;
@@ -1481,7 +1480,7 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
1481 switch (opcode) { 1480 switch (opcode) {
1482 case SRPT_RDMA_READ_LAST: 1481 case SRPT_RDMA_READ_LAST:
1483 if (ioctx->n_rdma <= 0) { 1482 if (ioctx->n_rdma <= 0) {
1484 printk(KERN_ERR "Received invalid RDMA read" 1483 pr_err("Received invalid RDMA read"
1485 " error completion with idx %d\n", 1484 " error completion with idx %d\n",
1486 ioctx->ioctx.index); 1485 ioctx->ioctx.index);
1487 break; 1486 break;
@@ -1490,14 +1489,13 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
1490 if (state == SRPT_STATE_NEED_DATA) 1489 if (state == SRPT_STATE_NEED_DATA)
1491 srpt_abort_cmd(ioctx); 1490 srpt_abort_cmd(ioctx);
1492 else 1491 else
1493 printk(KERN_ERR "%s[%d]: wrong state = %d\n", 1492 pr_err("%s[%d]: wrong state = %d\n",
1494 __func__, __LINE__, state); 1493 __func__, __LINE__, state);
1495 break; 1494 break;
1496 case SRPT_RDMA_WRITE_LAST: 1495 case SRPT_RDMA_WRITE_LAST:
1497 break; 1496 break;
1498 default: 1497 default:
1499 printk(KERN_ERR "%s[%d]: opcode = %u\n", __func__, 1498 pr_err("%s[%d]: opcode = %u\n", __func__, __LINE__, opcode);
1500 __LINE__, opcode);
1501 break; 1499 break;
1502 } 1500 }
1503} 1501}
@@ -1549,8 +1547,8 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
1549 BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp)); 1547 BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp));
1550 max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp); 1548 max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp);
1551 if (sense_data_len > max_sense_len) { 1549 if (sense_data_len > max_sense_len) {
1552 printk(KERN_WARNING "truncated sense data from %d to %d" 1550 pr_warn("truncated sense data from %d to %d"
1553 " bytes\n", sense_data_len, max_sense_len); 1551 " bytes\n", sense_data_len, max_sense_len);
1554 sense_data_len = max_sense_len; 1552 sense_data_len = max_sense_len;
1555 } 1553 }
1556 1554
@@ -1628,8 +1626,8 @@ static uint64_t srpt_unpack_lun(const uint8_t *lun, int len)
1628 int addressing_method; 1626 int addressing_method;
1629 1627
1630 if (unlikely(len < 2)) { 1628 if (unlikely(len < 2)) {
1631 printk(KERN_ERR "Illegal LUN length %d, expected 2 bytes or " 1629 pr_err("Illegal LUN length %d, expected 2 bytes or more\n",
1632 "more", len); 1630 len);
1633 goto out; 1631 goto out;
1634 } 1632 }
1635 1633
@@ -1663,7 +1661,7 @@ static uint64_t srpt_unpack_lun(const uint8_t *lun, int len)
1663 1661
1664 case SCSI_LUN_ADDR_METHOD_EXTENDED_LUN: 1662 case SCSI_LUN_ADDR_METHOD_EXTENDED_LUN:
1665 default: 1663 default:
1666 printk(KERN_ERR "Unimplemented LUN addressing method %u", 1664 pr_err("Unimplemented LUN addressing method %u\n",
1667 addressing_method); 1665 addressing_method);
1668 break; 1666 break;
1669 } 1667 }
@@ -1672,8 +1670,7 @@ out:
1672 return res; 1670 return res;
1673 1671
1674out_err: 1672out_err:
1675 printk(KERN_ERR "Support for multi-level LUNs has not yet been" 1673 pr_err("Support for multi-level LUNs has not yet been implemented\n");
1676 " implemented");
1677 goto out; 1674 goto out;
1678} 1675}
1679 1676
@@ -1723,7 +1720,7 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch,
1723 } 1720 }
1724 1721
1725 if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) { 1722 if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) {
1726 printk(KERN_ERR "0x%llx: parsing SRP descriptor table failed.\n", 1723 pr_err("0x%llx: parsing SRP descriptor table failed.\n",
1727 srp_cmd->tag); 1724 srp_cmd->tag);
1728 ret = TCM_INVALID_CDB_FIELD; 1725 ret = TCM_INVALID_CDB_FIELD;
1729 goto send_sense; 1726 goto send_sense;
@@ -1912,7 +1909,7 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1912 srpt_handle_tsk_mgmt(ch, recv_ioctx, send_ioctx); 1909 srpt_handle_tsk_mgmt(ch, recv_ioctx, send_ioctx);
1913 break; 1910 break;
1914 case SRP_I_LOGOUT: 1911 case SRP_I_LOGOUT:
1915 printk(KERN_ERR "Not yet implemented: SRP_I_LOGOUT\n"); 1912 pr_err("Not yet implemented: SRP_I_LOGOUT\n");
1916 break; 1913 break;
1917 case SRP_CRED_RSP: 1914 case SRP_CRED_RSP:
1918 pr_debug("received SRP_CRED_RSP\n"); 1915 pr_debug("received SRP_CRED_RSP\n");
@@ -1921,10 +1918,10 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1921 pr_debug("received SRP_AER_RSP\n"); 1918 pr_debug("received SRP_AER_RSP\n");
1922 break; 1919 break;
1923 case SRP_RSP: 1920 case SRP_RSP:
1924 printk(KERN_ERR "Received SRP_RSP\n"); 1921 pr_err("Received SRP_RSP\n");
1925 break; 1922 break;
1926 default: 1923 default:
1927 printk(KERN_ERR "received IU with unknown opcode 0x%x\n", 1924 pr_err("received IU with unknown opcode 0x%x\n",
1928 srp_cmd->opcode); 1925 srp_cmd->opcode);
1929 break; 1926 break;
1930 } 1927 }
@@ -1948,12 +1945,12 @@ static void srpt_process_rcv_completion(struct ib_cq *cq,
1948 1945
1949 req_lim = atomic_dec_return(&ch->req_lim); 1946 req_lim = atomic_dec_return(&ch->req_lim);
1950 if (unlikely(req_lim < 0)) 1947 if (unlikely(req_lim < 0))
1951 printk(KERN_ERR "req_lim = %d < 0\n", req_lim); 1948 pr_err("req_lim = %d < 0\n", req_lim);
1952 ioctx = sdev->ioctx_ring[index]; 1949 ioctx = sdev->ioctx_ring[index];
1953 srpt_handle_new_iu(ch, ioctx, NULL); 1950 srpt_handle_new_iu(ch, ioctx, NULL);
1954 } else { 1951 } else {
1955 printk(KERN_INFO "receiving failed for idx %u with status %d\n", 1952 pr_info("receiving failed for idx %u with status %d\n",
1956 index, wc->status); 1953 index, wc->status);
1957 } 1954 }
1958} 1955}
1959 1956
@@ -1993,12 +1990,12 @@ static void srpt_process_send_completion(struct ib_cq *cq,
1993 } 1990 }
1994 } else { 1991 } else {
1995 if (opcode == SRPT_SEND) { 1992 if (opcode == SRPT_SEND) {
1996 printk(KERN_INFO "sending response for idx %u failed" 1993 pr_info("sending response for idx %u failed"
1997 " with status %d\n", index, wc->status); 1994 " with status %d\n", index, wc->status);
1998 srpt_handle_send_err_comp(ch, wc->wr_id); 1995 srpt_handle_send_err_comp(ch, wc->wr_id);
1999 } else if (opcode != SRPT_RDMA_MID) { 1996 } else if (opcode != SRPT_RDMA_MID) {
2000 printk(KERN_INFO "RDMA t %d for idx %u failed with" 1997 pr_info("RDMA t %d for idx %u failed with"
2001 " status %d", opcode, index, wc->status); 1998 " status %d\n", opcode, index, wc->status);
2002 srpt_handle_rdma_err_comp(ch, send_ioctx, opcode); 1999 srpt_handle_rdma_err_comp(ch, send_ioctx, opcode);
2003 } 2000 }
2004 } 2001 }
@@ -2062,15 +2059,15 @@ static int srpt_compl_thread(void *arg)
2062 2059
2063 ch = arg; 2060 ch = arg;
2064 BUG_ON(!ch); 2061 BUG_ON(!ch);
2065 printk(KERN_INFO "Session %s: kernel thread %s (PID %d) started\n", 2062 pr_info("Session %s: kernel thread %s (PID %d) started\n",
2066 ch->sess_name, ch->thread->comm, current->pid); 2063 ch->sess_name, ch->thread->comm, current->pid);
2067 while (!kthread_should_stop()) { 2064 while (!kthread_should_stop()) {
2068 wait_event_interruptible(ch->wait_queue, 2065 wait_event_interruptible(ch->wait_queue,
2069 (srpt_process_completion(ch->cq, ch), 2066 (srpt_process_completion(ch->cq, ch),
2070 kthread_should_stop())); 2067 kthread_should_stop()));
2071 } 2068 }
2072 printk(KERN_INFO "Session %s: kernel thread %s (PID %d) stopped\n", 2069 pr_info("Session %s: kernel thread %s (PID %d) stopped\n",
2073 ch->sess_name, ch->thread->comm, current->pid); 2070 ch->sess_name, ch->thread->comm, current->pid);
2074 return 0; 2071 return 0;
2075} 2072}
2076 2073
@@ -2097,7 +2094,7 @@ retry:
2097 ch->rq_size + srp_sq_size, 0); 2094 ch->rq_size + srp_sq_size, 0);
2098 if (IS_ERR(ch->cq)) { 2095 if (IS_ERR(ch->cq)) {
2099 ret = PTR_ERR(ch->cq); 2096 ret = PTR_ERR(ch->cq);
2100 printk(KERN_ERR "failed to create CQ cqe= %d ret= %d\n", 2097 pr_err("failed to create CQ cqe= %d ret= %d\n",
2101 ch->rq_size + srp_sq_size, ret); 2098 ch->rq_size + srp_sq_size, ret);
2102 goto out; 2099 goto out;
2103 } 2100 }
@@ -2123,7 +2120,7 @@ retry:
2123 goto retry; 2120 goto retry;
2124 } 2121 }
2125 } 2122 }
2126 printk(KERN_ERR "failed to create_qp ret= %d\n", ret); 2123 pr_err("failed to create_qp ret= %d\n", ret);
2127 goto err_destroy_cq; 2124 goto err_destroy_cq;
2128 } 2125 }
2129 2126
@@ -2143,7 +2140,7 @@ retry:
2143 2140
2144 ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl"); 2141 ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl");
2145 if (IS_ERR(ch->thread)) { 2142 if (IS_ERR(ch->thread)) {
2146 printk(KERN_ERR "failed to create kernel thread %ld\n", 2143 pr_err("failed to create kernel thread %ld\n",
2147 PTR_ERR(ch->thread)); 2144 PTR_ERR(ch->thread));
2148 ch->thread = NULL; 2145 ch->thread = NULL;
2149 goto err_destroy_qp; 2146 goto err_destroy_qp;
@@ -2204,7 +2201,7 @@ static void __srpt_close_ch(struct srpt_rdma_ch *ch)
2204 /* fall through */ 2201 /* fall through */
2205 case CH_LIVE: 2202 case CH_LIVE:
2206 if (ib_send_cm_dreq(ch->cm_id, NULL, 0) < 0) 2203 if (ib_send_cm_dreq(ch->cm_id, NULL, 0) < 0)
2207 printk(KERN_ERR "sending CM DREQ failed.\n"); 2204 pr_err("sending CM DREQ failed.\n");
2208 break; 2205 break;
2209 case CH_DISCONNECTING: 2206 case CH_DISCONNECTING:
2210 break; 2207 break;
@@ -2291,7 +2288,7 @@ static void srpt_drain_channel(struct ib_cm_id *cm_id)
2291 2288
2292 ret = srpt_ch_qp_err(ch); 2289 ret = srpt_ch_qp_err(ch);
2293 if (ret < 0) 2290 if (ret < 0)
2294 printk(KERN_ERR "Setting queue pair in error state" 2291 pr_err("Setting queue pair in error state"
2295 " failed: %d\n", ret); 2292 " failed: %d\n", ret);
2296 } 2293 }
2297} 2294}
@@ -2435,17 +2432,17 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
2435 2432
2436 it_iu_len = be32_to_cpu(req->req_it_iu_len); 2433 it_iu_len = be32_to_cpu(req->req_it_iu_len);
2437 2434
2438 printk(KERN_INFO "Received SRP_LOGIN_REQ with i_port_id 0x%llx:0x%llx," 2435 pr_info("Received SRP_LOGIN_REQ with i_port_id 0x%llx:0x%llx,"
2439 " t_port_id 0x%llx:0x%llx and it_iu_len %d on port %d" 2436 " t_port_id 0x%llx:0x%llx and it_iu_len %d on port %d"
2440 " (guid=0x%llx:0x%llx)\n", 2437 " (guid=0x%llx:0x%llx)\n",
2441 be64_to_cpu(*(__be64 *)&req->initiator_port_id[0]), 2438 be64_to_cpu(*(__be64 *)&req->initiator_port_id[0]),
2442 be64_to_cpu(*(__be64 *)&req->initiator_port_id[8]), 2439 be64_to_cpu(*(__be64 *)&req->initiator_port_id[8]),
2443 be64_to_cpu(*(__be64 *)&req->target_port_id[0]), 2440 be64_to_cpu(*(__be64 *)&req->target_port_id[0]),
2444 be64_to_cpu(*(__be64 *)&req->target_port_id[8]), 2441 be64_to_cpu(*(__be64 *)&req->target_port_id[8]),
2445 it_iu_len, 2442 it_iu_len,
2446 param->port, 2443 param->port,
2447 be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]), 2444 be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]),
2448 be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8])); 2445 be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8]));
2449 2446
2450 rsp = kzalloc(sizeof *rsp, GFP_KERNEL); 2447 rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
2451 rej = kzalloc(sizeof *rej, GFP_KERNEL); 2448 rej = kzalloc(sizeof *rej, GFP_KERNEL);
@@ -2460,7 +2457,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
2460 rej->reason = __constant_cpu_to_be32( 2457 rej->reason = __constant_cpu_to_be32(
2461 SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE); 2458 SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
2462 ret = -EINVAL; 2459 ret = -EINVAL;
2463 printk(KERN_ERR "rejected SRP_LOGIN_REQ because its" 2460 pr_err("rejected SRP_LOGIN_REQ because its"
2464 " length (%d bytes) is out of range (%d .. %d)\n", 2461 " length (%d bytes) is out of range (%d .. %d)\n",
2465 it_iu_len, 64, srp_max_req_size); 2462 it_iu_len, 64, srp_max_req_size);
2466 goto reject; 2463 goto reject;
@@ -2470,7 +2467,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
2470 rej->reason = __constant_cpu_to_be32( 2467 rej->reason = __constant_cpu_to_be32(
2471 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); 2468 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2472 ret = -EINVAL; 2469 ret = -EINVAL;
2473 printk(KERN_ERR "rejected SRP_LOGIN_REQ because the target port" 2470 pr_err("rejected SRP_LOGIN_REQ because the target port"
2474 " has not yet been enabled\n"); 2471 " has not yet been enabled\n");
2475 goto reject; 2472 goto reject;
2476 } 2473 }
@@ -2516,7 +2513,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
2516 rej->reason = __constant_cpu_to_be32( 2513 rej->reason = __constant_cpu_to_be32(
2517 SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL); 2514 SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
2518 ret = -ENOMEM; 2515 ret = -ENOMEM;
2519 printk(KERN_ERR "rejected SRP_LOGIN_REQ because it" 2516 pr_err("rejected SRP_LOGIN_REQ because it"
2520 " has an invalid target port identifier.\n"); 2517 " has an invalid target port identifier.\n");
2521 goto reject; 2518 goto reject;
2522 } 2519 }
@@ -2525,7 +2522,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
2525 if (!ch) { 2522 if (!ch) {
2526 rej->reason = __constant_cpu_to_be32( 2523 rej->reason = __constant_cpu_to_be32(
2527 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); 2524 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2528 printk(KERN_ERR "rejected SRP_LOGIN_REQ because no memory.\n"); 2525 pr_err("rejected SRP_LOGIN_REQ because no memory.\n");
2529 ret = -ENOMEM; 2526 ret = -ENOMEM;
2530 goto reject; 2527 goto reject;
2531 } 2528 }
@@ -2562,7 +2559,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
2562 if (ret) { 2559 if (ret) {
2563 rej->reason = __constant_cpu_to_be32( 2560 rej->reason = __constant_cpu_to_be32(
2564 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); 2561 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2565 printk(KERN_ERR "rejected SRP_LOGIN_REQ because creating" 2562 pr_err("rejected SRP_LOGIN_REQ because creating"
2566 " a new RDMA channel failed.\n"); 2563 " a new RDMA channel failed.\n");
2567 goto free_ring; 2564 goto free_ring;
2568 } 2565 }
@@ -2571,7 +2568,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
2571 if (ret) { 2568 if (ret) {
2572 rej->reason = __constant_cpu_to_be32( 2569 rej->reason = __constant_cpu_to_be32(
2573 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); 2570 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2574 printk(KERN_ERR "rejected SRP_LOGIN_REQ because enabling" 2571 pr_err("rejected SRP_LOGIN_REQ because enabling"
2575 " RTR failed (error code = %d)\n", ret); 2572 " RTR failed (error code = %d)\n", ret);
2576 goto destroy_ib; 2573 goto destroy_ib;
2577 } 2574 }
@@ -2586,8 +2583,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
2586 2583
2587 nacl = srpt_lookup_acl(sport, ch->i_port_id); 2584 nacl = srpt_lookup_acl(sport, ch->i_port_id);
2588 if (!nacl) { 2585 if (!nacl) {
2589 printk(KERN_INFO "Rejected login because no ACL has been" 2586 pr_info("Rejected login because no ACL has been"
2590 " configured yet for initiator %s.\n", ch->sess_name); 2587 " configured yet for initiator %s.\n", ch->sess_name);
2591 rej->reason = __constant_cpu_to_be32( 2588 rej->reason = __constant_cpu_to_be32(
2592 SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED); 2589 SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED);
2593 goto destroy_ib; 2590 goto destroy_ib;
@@ -2631,7 +2628,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
2631 2628
2632 ret = ib_send_cm_rep(cm_id, rep_param); 2629 ret = ib_send_cm_rep(cm_id, rep_param);
2633 if (ret) { 2630 if (ret) {
2634 printk(KERN_ERR "sending SRP_LOGIN_REQ response failed" 2631 pr_err("sending SRP_LOGIN_REQ response failed"
2635 " (error code = %d)\n", ret); 2632 " (error code = %d)\n", ret);
2636 goto release_channel; 2633 goto release_channel;
2637 } 2634 }
@@ -2679,7 +2676,7 @@ out:
2679 2676
2680static void srpt_cm_rej_recv(struct ib_cm_id *cm_id) 2677static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
2681{ 2678{
2682 printk(KERN_INFO "Received IB REJ for cm_id %p.\n", cm_id); 2679 pr_info("Received IB REJ for cm_id %p.\n", cm_id);
2683 srpt_drain_channel(cm_id); 2680 srpt_drain_channel(cm_id);
2684} 2681}
2685 2682
@@ -2714,13 +2711,13 @@ static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
2714 2711
2715static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id) 2712static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
2716{ 2713{
2717 printk(KERN_INFO "Received IB TimeWait exit for cm_id %p.\n", cm_id); 2714 pr_info("Received IB TimeWait exit for cm_id %p.\n", cm_id);
2718 srpt_drain_channel(cm_id); 2715 srpt_drain_channel(cm_id);
2719} 2716}
2720 2717
2721static void srpt_cm_rep_error(struct ib_cm_id *cm_id) 2718static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
2722{ 2719{
2723 printk(KERN_INFO "Received IB REP error for cm_id %p.\n", cm_id); 2720 pr_info("Received IB REP error for cm_id %p.\n", cm_id);
2724 srpt_drain_channel(cm_id); 2721 srpt_drain_channel(cm_id);
2725} 2722}
2726 2723
@@ -2755,9 +2752,9 @@ static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
2755 2752
2756 if (send_drep) { 2753 if (send_drep) {
2757 if (ib_send_cm_drep(ch->cm_id, NULL, 0) < 0) 2754 if (ib_send_cm_drep(ch->cm_id, NULL, 0) < 0)
2758 printk(KERN_ERR "Sending IB DREP failed.\n"); 2755 pr_err("Sending IB DREP failed.\n");
2759 printk(KERN_INFO "Received DREQ and sent DREP for session %s.\n", 2756 pr_info("Received DREQ and sent DREP for session %s.\n",
2760 ch->sess_name); 2757 ch->sess_name);
2761 } 2758 }
2762} 2759}
2763 2760
@@ -2766,8 +2763,7 @@ static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
2766 */ 2763 */
2767static void srpt_cm_drep_recv(struct ib_cm_id *cm_id) 2764static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
2768{ 2765{
2769 printk(KERN_INFO "Received InfiniBand DREP message for cm_id %p.\n", 2766 pr_info("Received InfiniBand DREP message for cm_id %p.\n", cm_id);
2770 cm_id);
2771 srpt_drain_channel(cm_id); 2767 srpt_drain_channel(cm_id);
2772} 2768}
2773 2769
@@ -2811,14 +2807,13 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2811 srpt_cm_rep_error(cm_id); 2807 srpt_cm_rep_error(cm_id);
2812 break; 2808 break;
2813 case IB_CM_DREQ_ERROR: 2809 case IB_CM_DREQ_ERROR:
2814 printk(KERN_INFO "Received IB DREQ ERROR event.\n"); 2810 pr_info("Received IB DREQ ERROR event.\n");
2815 break; 2811 break;
2816 case IB_CM_MRA_RECEIVED: 2812 case IB_CM_MRA_RECEIVED:
2817 printk(KERN_INFO "Received IB MRA event\n"); 2813 pr_info("Received IB MRA event\n");
2818 break; 2814 break;
2819 default: 2815 default:
2820 printk(KERN_ERR "received unrecognized IB CM event %d\n", 2816 pr_err("received unrecognized IB CM event %d\n", event->event);
2821 event->event);
2822 break; 2817 break;
2823 } 2818 }
2824 2819
@@ -2848,8 +2843,8 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
2848 ret = -ENOMEM; 2843 ret = -ENOMEM;
2849 sq_wr_avail = atomic_sub_return(n_rdma, &ch->sq_wr_avail); 2844 sq_wr_avail = atomic_sub_return(n_rdma, &ch->sq_wr_avail);
2850 if (sq_wr_avail < 0) { 2845 if (sq_wr_avail < 0) {
2851 printk(KERN_WARNING "IB send queue full (needed %d)\n", 2846 pr_warn("IB send queue full (needed %d)\n",
2852 n_rdma); 2847 n_rdma);
2853 goto out; 2848 goto out;
2854 } 2849 }
2855 } 2850 }
@@ -2889,7 +2884,7 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
2889 } 2884 }
2890 2885
2891 if (ret) 2886 if (ret)
2892 printk(KERN_ERR "%s[%d]: ib_post_send() returned %d for %d/%d", 2887 pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
2893 __func__, __LINE__, ret, i, n_rdma); 2888 __func__, __LINE__, ret, i, n_rdma);
2894 if (ret && i > 0) { 2889 if (ret && i > 0) {
2895 wr.num_sge = 0; 2890 wr.num_sge = 0;
@@ -2897,12 +2892,12 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
2897 wr.send_flags = IB_SEND_SIGNALED; 2892 wr.send_flags = IB_SEND_SIGNALED;
2898 while (ch->state == CH_LIVE && 2893 while (ch->state == CH_LIVE &&
2899 ib_post_send(ch->qp, &wr, &bad_wr) != 0) { 2894 ib_post_send(ch->qp, &wr, &bad_wr) != 0) {
2900 printk(KERN_INFO "Trying to abort failed RDMA transfer [%d]", 2895 pr_info("Trying to abort failed RDMA transfer [%d]\n",
2901 ioctx->ioctx.index); 2896 ioctx->ioctx.index);
2902 msleep(1000); 2897 msleep(1000);
2903 } 2898 }
2904 while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) { 2899 while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) {
2905 printk(KERN_INFO "Waiting until RDMA abort finished [%d]", 2900 pr_info("Waiting until RDMA abort finished [%d]\n",
2906 ioctx->ioctx.index); 2901 ioctx->ioctx.index);
2907 msleep(1000); 2902 msleep(1000);
2908 } 2903 }
@@ -2923,17 +2918,17 @@ static int srpt_xfer_data(struct srpt_rdma_ch *ch,
2923 2918
2924 ret = srpt_map_sg_to_ib_sge(ch, ioctx); 2919 ret = srpt_map_sg_to_ib_sge(ch, ioctx);
2925 if (ret) { 2920 if (ret) {
2926 printk(KERN_ERR "%s[%d] ret=%d\n", __func__, __LINE__, ret); 2921 pr_err("%s[%d] ret=%d\n", __func__, __LINE__, ret);
2927 goto out; 2922 goto out;
2928 } 2923 }
2929 2924
2930 ret = srpt_perform_rdmas(ch, ioctx); 2925 ret = srpt_perform_rdmas(ch, ioctx);
2931 if (ret) { 2926 if (ret) {
2932 if (ret == -EAGAIN || ret == -ENOMEM) 2927 if (ret == -EAGAIN || ret == -ENOMEM)
2933 printk(KERN_INFO "%s[%d] queue full -- ret=%d\n", 2928 pr_info("%s[%d] queue full -- ret=%d\n",
2934 __func__, __LINE__, ret); 2929 __func__, __LINE__, ret);
2935 else 2930 else
2936 printk(KERN_ERR "%s[%d] fatal error -- ret=%d\n", 2931 pr_err("%s[%d] fatal error -- ret=%d\n",
2937 __func__, __LINE__, ret); 2932 __func__, __LINE__, ret);
2938 goto out_unmap; 2933 goto out_unmap;
2939 } 2934 }
@@ -3058,7 +3053,7 @@ static void srpt_queue_response(struct se_cmd *cmd)
3058 !ioctx->queue_status_only) { 3053 !ioctx->queue_status_only) {
3059 ret = srpt_xfer_data(ch, ioctx); 3054 ret = srpt_xfer_data(ch, ioctx);
3060 if (ret) { 3055 if (ret) {
3061 printk(KERN_ERR "xfer_data failed for tag %llu\n", 3056 pr_err("xfer_data failed for tag %llu\n",
3062 ioctx->tag); 3057 ioctx->tag);
3063 return; 3058 return;
3064 } 3059 }
@@ -3075,7 +3070,7 @@ static void srpt_queue_response(struct se_cmd *cmd)
3075 } 3070 }
3076 ret = srpt_post_send(ch, ioctx, resp_len); 3071 ret = srpt_post_send(ch, ioctx, resp_len);
3077 if (ret) { 3072 if (ret) {
3078 printk(KERN_ERR "sending cmd response failed for tag %llu\n", 3073 pr_err("sending cmd response failed for tag %llu\n",
3079 ioctx->tag); 3074 ioctx->tag);
3080 srpt_unmap_sg_to_ib_sge(ch, ioctx); 3075 srpt_unmap_sg_to_ib_sge(ch, ioctx);
3081 srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); 3076 srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
@@ -3154,7 +3149,7 @@ static int srpt_release_sdev(struct srpt_device *sdev)
3154 res = wait_event_interruptible(sdev->ch_releaseQ, 3149 res = wait_event_interruptible(sdev->ch_releaseQ,
3155 srpt_ch_list_empty(sdev)); 3150 srpt_ch_list_empty(sdev));
3156 if (res) 3151 if (res)
3157 printk(KERN_ERR "%s: interrupted.\n", __func__); 3152 pr_err("%s: interrupted.\n", __func__);
3158 3153
3159 return 0; 3154 return 0;
3160} 3155}
@@ -3293,7 +3288,7 @@ static void srpt_add_one(struct ib_device *device)
3293 spin_lock_init(&sport->port_acl_lock); 3288 spin_lock_init(&sport->port_acl_lock);
3294 3289
3295 if (srpt_refresh_port(sport)) { 3290 if (srpt_refresh_port(sport)) {
3296 printk(KERN_ERR "MAD registration failed for %s-%d.\n", 3291 pr_err("MAD registration failed for %s-%d.\n",
3297 srpt_sdev_name(sdev), i); 3292 srpt_sdev_name(sdev), i);
3298 goto err_ring; 3293 goto err_ring;
3299 } 3294 }
@@ -3330,7 +3325,7 @@ free_dev:
3330 kfree(sdev); 3325 kfree(sdev);
3331err: 3326err:
3332 sdev = NULL; 3327 sdev = NULL;
3333 printk(KERN_INFO "%s(%s) failed.\n", __func__, device->name); 3328 pr_info("%s(%s) failed.\n", __func__, device->name);
3334 goto out; 3329 goto out;
3335} 3330}
3336 3331
@@ -3344,8 +3339,7 @@ static void srpt_remove_one(struct ib_device *device)
3344 3339
3345 sdev = ib_get_client_data(device, &srpt_client); 3340 sdev = ib_get_client_data(device, &srpt_client);
3346 if (!sdev) { 3341 if (!sdev) {
3347 printk(KERN_INFO "%s(%s): nothing to do.\n", __func__, 3342 pr_info("%s(%s): nothing to do.\n", __func__, device->name);
3348 device->name);
3349 return; 3343 return;
3350 } 3344 }
3351 3345
@@ -3464,7 +3458,7 @@ static struct se_node_acl *srpt_alloc_fabric_acl(struct se_portal_group *se_tpg)
3464 3458
3465 nacl = kzalloc(sizeof(struct srpt_node_acl), GFP_KERNEL); 3459 nacl = kzalloc(sizeof(struct srpt_node_acl), GFP_KERNEL);
3466 if (!nacl) { 3460 if (!nacl) {
3467 printk(KERN_ERR "Unable to allocate struct srpt_node_acl\n"); 3461 pr_err("Unable to allocate struct srpt_node_acl\n");
3468 return NULL; 3462 return NULL;
3469 } 3463 }
3470 3464
@@ -3615,7 +3609,7 @@ static struct se_node_acl *srpt_make_nodeacl(struct se_portal_group *tpg,
3615 u8 i_port_id[16]; 3609 u8 i_port_id[16];
3616 3610
3617 if (srpt_parse_i_port_id(i_port_id, name) < 0) { 3611 if (srpt_parse_i_port_id(i_port_id, name) < 0) {
3618 printk(KERN_ERR "invalid initiator port ID %s\n", name); 3612 pr_err("invalid initiator port ID %s\n", name);
3619 ret = -EINVAL; 3613 ret = -EINVAL;
3620 goto err; 3614 goto err;
3621 } 3615 }
@@ -3816,12 +3810,12 @@ static ssize_t srpt_tpg_store_enable(
3816 3810
3817 ret = kstrtoul(page, 0, &tmp); 3811 ret = kstrtoul(page, 0, &tmp);
3818 if (ret < 0) { 3812 if (ret < 0) {
3819 printk(KERN_ERR "Unable to extract srpt_tpg_store_enable\n"); 3813 pr_err("Unable to extract srpt_tpg_store_enable\n");
3820 return -EINVAL; 3814 return -EINVAL;
3821 } 3815 }
3822 3816
3823 if ((tmp != 0) && (tmp != 1)) { 3817 if ((tmp != 0) && (tmp != 1)) {
3824 printk(KERN_ERR "Illegal value for srpt_tpg_store_enable: %lu\n", tmp); 3818 pr_err("Illegal value for srpt_tpg_store_enable: %lu\n", tmp);
3825 return -EINVAL; 3819 return -EINVAL;
3826 } 3820 }
3827 if (tmp == 1) 3821 if (tmp == 1)
@@ -3980,7 +3974,7 @@ static int __init srpt_init_module(void)
3980 3974
3981 ret = -EINVAL; 3975 ret = -EINVAL;
3982 if (srp_max_req_size < MIN_MAX_REQ_SIZE) { 3976 if (srp_max_req_size < MIN_MAX_REQ_SIZE) {
3983 printk(KERN_ERR "invalid value %d for kernel module parameter" 3977 pr_err("invalid value %d for kernel module parameter"
3984 " srp_max_req_size -- must be at least %d.\n", 3978 " srp_max_req_size -- must be at least %d.\n",
3985 srp_max_req_size, MIN_MAX_REQ_SIZE); 3979 srp_max_req_size, MIN_MAX_REQ_SIZE);
3986 goto out; 3980 goto out;
@@ -3988,7 +3982,7 @@ static int __init srpt_init_module(void)
3988 3982
3989 if (srpt_srq_size < MIN_SRPT_SRQ_SIZE 3983 if (srpt_srq_size < MIN_SRPT_SRQ_SIZE
3990 || srpt_srq_size > MAX_SRPT_SRQ_SIZE) { 3984 || srpt_srq_size > MAX_SRPT_SRQ_SIZE) {
3991 printk(KERN_ERR "invalid value %d for kernel module parameter" 3985 pr_err("invalid value %d for kernel module parameter"
3992 " srpt_srq_size -- must be in the range [%d..%d].\n", 3986 " srpt_srq_size -- must be in the range [%d..%d].\n",
3993 srpt_srq_size, MIN_SRPT_SRQ_SIZE, MAX_SRPT_SRQ_SIZE); 3987 srpt_srq_size, MIN_SRPT_SRQ_SIZE, MAX_SRPT_SRQ_SIZE);
3994 goto out; 3988 goto out;
@@ -3996,7 +3990,7 @@ static int __init srpt_init_module(void)
3996 3990
3997 srpt_target = target_fabric_configfs_init(THIS_MODULE, "srpt"); 3991 srpt_target = target_fabric_configfs_init(THIS_MODULE, "srpt");
3998 if (IS_ERR(srpt_target)) { 3992 if (IS_ERR(srpt_target)) {
3999 printk(KERN_ERR "couldn't register\n"); 3993 pr_err("couldn't register\n");
4000 ret = PTR_ERR(srpt_target); 3994 ret = PTR_ERR(srpt_target);
4001 goto out; 3995 goto out;
4002 } 3996 }
@@ -4018,13 +4012,13 @@ static int __init srpt_init_module(void)
4018 4012
4019 ret = target_fabric_configfs_register(srpt_target); 4013 ret = target_fabric_configfs_register(srpt_target);
4020 if (ret < 0) { 4014 if (ret < 0) {
4021 printk(KERN_ERR "couldn't register\n"); 4015 pr_err("couldn't register\n");
4022 goto out_free_target; 4016 goto out_free_target;
4023 } 4017 }
4024 4018
4025 ret = ib_register_client(&srpt_client); 4019 ret = ib_register_client(&srpt_client);
4026 if (ret) { 4020 if (ret) {
4027 printk(KERN_ERR "couldn't register IB client\n"); 4021 pr_err("couldn't register IB client\n");
4028 goto out_unregister_target; 4022 goto out_unregister_target;
4029 } 4023 }
4030 4024
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index f0fbb4ade85d..4f7dc044601e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -939,21 +939,34 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
939 return err; 939 return err;
940 } 940 }
941 if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) { 941 if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) {
942 /* compute slave's gid block */ 942 __be64 guid = mlx4_get_admin_guid(dev, slave,
943 smp->attr_mod = cpu_to_be32(slave / 8); 943 port);
944 /* execute cmd */ 944
945 err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, 945 /* set the PF admin guid to the FW/HW burned
946 vhcr->in_modifier, opcode_modifier, 946 * GUID, if it wasn't yet set
947 vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); 947 */
948 if (!err) { 948 if (slave == 0 && guid == 0) {
949 /* if needed, move slave gid to index 0 */ 949 smp->attr_mod = 0;
950 if (slave % 8) 950 err = mlx4_cmd_box(dev,
951 memcpy(outsmp->data, 951 inbox->dma,
952 outsmp->data + (slave % 8) * 8, 8); 952 outbox->dma,
953 /* delete all other gids */ 953 vhcr->in_modifier,
954 memset(outsmp->data + 8, 0, 56); 954 opcode_modifier,
955 vhcr->op,
956 MLX4_CMD_TIME_CLASS_C,
957 MLX4_CMD_NATIVE);
958 if (err)
959 return err;
960 mlx4_set_admin_guid(dev,
961 *(__be64 *)outsmp->
962 data, slave, port);
963 } else {
964 memcpy(outsmp->data, &guid, 8);
955 } 965 }
956 return err; 966
967 /* clean all other gids */
968 memset(outsmp->data + 8, 0, 56);
969 return 0;
957 } 970 }
958 if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) { 971 if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) {
959 err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, 972 err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
@@ -2350,6 +2363,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
2350 oper_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT; 2363 oper_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT;
2351 vf_oper->vport[port].vlan_idx = NO_INDX; 2364 vf_oper->vport[port].vlan_idx = NO_INDX;
2352 vf_oper->vport[port].mac_idx = NO_INDX; 2365 vf_oper->vport[port].mac_idx = NO_INDX;
2366 mlx4_set_random_admin_guid(dev, i, port);
2353 } 2367 }
2354 spin_lock_init(&s_state->lock); 2368 spin_lock_init(&s_state->lock);
2355 } 2369 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 190fd624bdfe..2619c9fbf42d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -702,6 +702,8 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
702 priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1; 702 priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1;
703 } 703 }
704 spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); 704 spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
705 mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN,
706 flr_slave);
705 queue_work(priv->mfunc.master.comm_wq, 707 queue_work(priv->mfunc.master.comm_wq,
706 &priv->mfunc.master.slave_flr_event_work); 708 &priv->mfunc.master.slave_flr_event_work);
707 break; 709 break;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index acceb75e8c44..ced5ecab5aa7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2260,6 +2260,37 @@ void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
2260} 2260}
2261EXPORT_SYMBOL_GPL(mlx4_counter_free); 2261EXPORT_SYMBOL_GPL(mlx4_counter_free);
2262 2262
2263void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port)
2264{
2265 struct mlx4_priv *priv = mlx4_priv(dev);
2266
2267 priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
2268}
2269EXPORT_SYMBOL_GPL(mlx4_set_admin_guid);
2270
2271__be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port)
2272{
2273 struct mlx4_priv *priv = mlx4_priv(dev);
2274
2275 return priv->mfunc.master.vf_admin[entry].vport[port].guid;
2276}
2277EXPORT_SYMBOL_GPL(mlx4_get_admin_guid);
2278
2279void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port)
2280{
2281 struct mlx4_priv *priv = mlx4_priv(dev);
2282 __be64 guid;
2283
2284 /* hw GUID */
2285 if (entry == 0)
2286 return;
2287
2288 get_random_bytes((char *)&guid, sizeof(guid));
2289 guid &= ~(cpu_to_be64(1ULL << 56));
2290 guid |= cpu_to_be64(1ULL << 57);
2291 priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
2292}
2293
2263static int mlx4_setup_hca(struct mlx4_dev *dev) 2294static int mlx4_setup_hca(struct mlx4_dev *dev)
2264{ 2295{
2265 struct mlx4_priv *priv = mlx4_priv(dev); 2296 struct mlx4_priv *priv = mlx4_priv(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index f30eeb730a86..502d3dd2c888 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -499,6 +499,7 @@ struct mlx4_vport_state {
499 bool spoofchk; 499 bool spoofchk;
500 u32 link_state; 500 u32 link_state;
501 u8 qos_vport; 501 u8 qos_vport;
502 __be64 guid;
502}; 503};
503 504
504struct mlx4_vf_admin_state { 505struct mlx4_vf_admin_state {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index df2238372ea7..8a64542abc16 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -211,26 +211,28 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
211 return 0; 211 return 0;
212} 212}
213 213
214#define MLX5_U64_4K_PAGE_MASK ((~(u64)0U) << PAGE_SHIFT)
215
214static void free_4k(struct mlx5_core_dev *dev, u64 addr) 216static void free_4k(struct mlx5_core_dev *dev, u64 addr)
215{ 217{
216 struct fw_page *fwp; 218 struct fw_page *fwp;
217 int n; 219 int n;
218 220
219 fwp = find_fw_page(dev, addr & PAGE_MASK); 221 fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK);
220 if (!fwp) { 222 if (!fwp) {
221 mlx5_core_warn(dev, "page not found\n"); 223 mlx5_core_warn(dev, "page not found\n");
222 return; 224 return;
223 } 225 }
224 226
225 n = (addr & ~PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT; 227 n = (addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
226 fwp->free_count++; 228 fwp->free_count++;
227 set_bit(n, &fwp->bitmask); 229 set_bit(n, &fwp->bitmask);
228 if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) { 230 if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
229 rb_erase(&fwp->rb_node, &dev->priv.page_root); 231 rb_erase(&fwp->rb_node, &dev->priv.page_root);
230 if (fwp->free_count != 1) 232 if (fwp->free_count != 1)
231 list_del(&fwp->list); 233 list_del(&fwp->list);
232 dma_unmap_page(&dev->pdev->dev, addr & PAGE_MASK, PAGE_SIZE, 234 dma_unmap_page(&dev->pdev->dev, addr & MLX5_U64_4K_PAGE_MASK,
233 DMA_BIDIRECTIONAL); 235 PAGE_SIZE, DMA_BIDIRECTIONAL);
234 __free_page(fwp->page); 236 __free_page(fwp->page);
235 kfree(fwp); 237 kfree(fwp);
236 } else if (fwp->free_count == 1) { 238 } else if (fwp->free_count == 1) {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index f9ce34bec45b..83e80ab94500 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1345,6 +1345,10 @@ int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port);
1345int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx); 1345int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx);
1346void mlx4_counter_free(struct mlx4_dev *dev, u32 idx); 1346void mlx4_counter_free(struct mlx4_dev *dev, u32 idx);
1347 1347
1348void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry,
1349 int port);
1350__be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port);
1351void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port);
1348int mlx4_flow_attach(struct mlx4_dev *dev, 1352int mlx4_flow_attach(struct mlx4_dev *dev,
1349 struct mlx4_net_trans_rule *rule, u64 *reg_id); 1353 struct mlx4_net_trans_rule *rule, u64 *reg_id);
1350int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id); 1354int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id);