diff options
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/core/Makefile | 2 | ||||
-rw-r--r-- | drivers/infiniband/core/multicast.c | 837 | ||||
-rw-r--r-- | drivers/infiniband/core/sa.h | 66 | ||||
-rw-r--r-- | drivers/infiniband/core/sa_query.c | 29 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 195 |
5 files changed, 991 insertions, 138 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 50fb1cd447b7..189e5d4b9b17 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile | |||
@@ -12,7 +12,7 @@ ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ | |||
12 | 12 | ||
13 | ib_mad-y := mad.o smi.o agent.o mad_rmpp.o | 13 | ib_mad-y := mad.o smi.o agent.o mad_rmpp.o |
14 | 14 | ||
15 | ib_sa-y := sa_query.o | 15 | ib_sa-y := sa_query.o multicast.o |
16 | 16 | ||
17 | ib_cm-y := cm.o | 17 | ib_cm-y := cm.o |
18 | 18 | ||
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c new file mode 100644 index 000000000000..4a579b3a1c90 --- /dev/null +++ b/drivers/infiniband/core/multicast.c | |||
@@ -0,0 +1,837 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Intel Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/completion.h> | ||
34 | #include <linux/dma-mapping.h> | ||
35 | #include <linux/err.h> | ||
36 | #include <linux/interrupt.h> | ||
37 | #include <linux/pci.h> | ||
38 | #include <linux/bitops.h> | ||
39 | #include <linux/random.h> | ||
40 | |||
41 | #include <rdma/ib_cache.h> | ||
42 | #include "sa.h" | ||
43 | |||
44 | static void mcast_add_one(struct ib_device *device); | ||
45 | static void mcast_remove_one(struct ib_device *device); | ||
46 | |||
47 | static struct ib_client mcast_client = { | ||
48 | .name = "ib_multicast", | ||
49 | .add = mcast_add_one, | ||
50 | .remove = mcast_remove_one | ||
51 | }; | ||
52 | |||
53 | static struct ib_sa_client sa_client; | ||
54 | static struct workqueue_struct *mcast_wq; | ||
55 | static union ib_gid mgid0; | ||
56 | |||
57 | struct mcast_device; | ||
58 | |||
59 | struct mcast_port { | ||
60 | struct mcast_device *dev; | ||
61 | spinlock_t lock; | ||
62 | struct rb_root table; | ||
63 | atomic_t refcount; | ||
64 | struct completion comp; | ||
65 | u8 port_num; | ||
66 | }; | ||
67 | |||
68 | struct mcast_device { | ||
69 | struct ib_device *device; | ||
70 | struct ib_event_handler event_handler; | ||
71 | int start_port; | ||
72 | int end_port; | ||
73 | struct mcast_port port[0]; | ||
74 | }; | ||
75 | |||
76 | enum mcast_state { | ||
77 | MCAST_IDLE, | ||
78 | MCAST_JOINING, | ||
79 | MCAST_MEMBER, | ||
80 | MCAST_BUSY, | ||
81 | MCAST_ERROR | ||
82 | }; | ||
83 | |||
84 | struct mcast_member; | ||
85 | |||
86 | struct mcast_group { | ||
87 | struct ib_sa_mcmember_rec rec; | ||
88 | struct rb_node node; | ||
89 | struct mcast_port *port; | ||
90 | spinlock_t lock; | ||
91 | struct work_struct work; | ||
92 | struct list_head pending_list; | ||
93 | struct list_head active_list; | ||
94 | struct mcast_member *last_join; | ||
95 | int members[3]; | ||
96 | atomic_t refcount; | ||
97 | enum mcast_state state; | ||
98 | struct ib_sa_query *query; | ||
99 | int query_id; | ||
100 | }; | ||
101 | |||
102 | struct mcast_member { | ||
103 | struct ib_sa_multicast multicast; | ||
104 | struct ib_sa_client *client; | ||
105 | struct mcast_group *group; | ||
106 | struct list_head list; | ||
107 | enum mcast_state state; | ||
108 | atomic_t refcount; | ||
109 | struct completion comp; | ||
110 | }; | ||
111 | |||
112 | static void join_handler(int status, struct ib_sa_mcmember_rec *rec, | ||
113 | void *context); | ||
114 | static void leave_handler(int status, struct ib_sa_mcmember_rec *rec, | ||
115 | void *context); | ||
116 | |||
117 | static struct mcast_group *mcast_find(struct mcast_port *port, | ||
118 | union ib_gid *mgid) | ||
119 | { | ||
120 | struct rb_node *node = port->table.rb_node; | ||
121 | struct mcast_group *group; | ||
122 | int ret; | ||
123 | |||
124 | while (node) { | ||
125 | group = rb_entry(node, struct mcast_group, node); | ||
126 | ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid); | ||
127 | if (!ret) | ||
128 | return group; | ||
129 | |||
130 | if (ret < 0) | ||
131 | node = node->rb_left; | ||
132 | else | ||
133 | node = node->rb_right; | ||
134 | } | ||
135 | return NULL; | ||
136 | } | ||
137 | |||
138 | static struct mcast_group *mcast_insert(struct mcast_port *port, | ||
139 | struct mcast_group *group, | ||
140 | int allow_duplicates) | ||
141 | { | ||
142 | struct rb_node **link = &port->table.rb_node; | ||
143 | struct rb_node *parent = NULL; | ||
144 | struct mcast_group *cur_group; | ||
145 | int ret; | ||
146 | |||
147 | while (*link) { | ||
148 | parent = *link; | ||
149 | cur_group = rb_entry(parent, struct mcast_group, node); | ||
150 | |||
151 | ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw, | ||
152 | sizeof group->rec.mgid); | ||
153 | if (ret < 0) | ||
154 | link = &(*link)->rb_left; | ||
155 | else if (ret > 0) | ||
156 | link = &(*link)->rb_right; | ||
157 | else if (allow_duplicates) | ||
158 | link = &(*link)->rb_left; | ||
159 | else | ||
160 | return cur_group; | ||
161 | } | ||
162 | rb_link_node(&group->node, parent, link); | ||
163 | rb_insert_color(&group->node, &port->table); | ||
164 | return NULL; | ||
165 | } | ||
166 | |||
167 | static void deref_port(struct mcast_port *port) | ||
168 | { | ||
169 | if (atomic_dec_and_test(&port->refcount)) | ||
170 | complete(&port->comp); | ||
171 | } | ||
172 | |||
173 | static void release_group(struct mcast_group *group) | ||
174 | { | ||
175 | struct mcast_port *port = group->port; | ||
176 | unsigned long flags; | ||
177 | |||
178 | spin_lock_irqsave(&port->lock, flags); | ||
179 | if (atomic_dec_and_test(&group->refcount)) { | ||
180 | rb_erase(&group->node, &port->table); | ||
181 | spin_unlock_irqrestore(&port->lock, flags); | ||
182 | kfree(group); | ||
183 | deref_port(port); | ||
184 | } else | ||
185 | spin_unlock_irqrestore(&port->lock, flags); | ||
186 | } | ||
187 | |||
188 | static void deref_member(struct mcast_member *member) | ||
189 | { | ||
190 | if (atomic_dec_and_test(&member->refcount)) | ||
191 | complete(&member->comp); | ||
192 | } | ||
193 | |||
194 | static void queue_join(struct mcast_member *member) | ||
195 | { | ||
196 | struct mcast_group *group = member->group; | ||
197 | unsigned long flags; | ||
198 | |||
199 | spin_lock_irqsave(&group->lock, flags); | ||
200 | list_add(&member->list, &group->pending_list); | ||
201 | if (group->state == MCAST_IDLE) { | ||
202 | group->state = MCAST_BUSY; | ||
203 | atomic_inc(&group->refcount); | ||
204 | queue_work(mcast_wq, &group->work); | ||
205 | } | ||
206 | spin_unlock_irqrestore(&group->lock, flags); | ||
207 | } | ||
208 | |||
209 | /* | ||
210 | * A multicast group has three types of members: full member, non member, and | ||
211 | * send only member. We need to keep track of the number of members of each | ||
212 | * type based on their join state. Adjust the number of members the belong to | ||
213 | * the specified join states. | ||
214 | */ | ||
215 | static void adjust_membership(struct mcast_group *group, u8 join_state, int inc) | ||
216 | { | ||
217 | int i; | ||
218 | |||
219 | for (i = 0; i < 3; i++, join_state >>= 1) | ||
220 | if (join_state & 0x1) | ||
221 | group->members[i] += inc; | ||
222 | } | ||
223 | |||
224 | /* | ||
225 | * If a multicast group has zero members left for a particular join state, but | ||
226 | * the group is still a member with the SA, we need to leave that join state. | ||
227 | * Determine which join states we still belong to, but that do not have any | ||
228 | * active members. | ||
229 | */ | ||
230 | static u8 get_leave_state(struct mcast_group *group) | ||
231 | { | ||
232 | u8 leave_state = 0; | ||
233 | int i; | ||
234 | |||
235 | for (i = 0; i < 3; i++) | ||
236 | if (!group->members[i]) | ||
237 | leave_state |= (0x1 << i); | ||
238 | |||
239 | return leave_state & group->rec.join_state; | ||
240 | } | ||
241 | |||
242 | static int check_selector(ib_sa_comp_mask comp_mask, | ||
243 | ib_sa_comp_mask selector_mask, | ||
244 | ib_sa_comp_mask value_mask, | ||
245 | u8 selector, u8 src_value, u8 dst_value) | ||
246 | { | ||
247 | int err; | ||
248 | |||
249 | if (!(comp_mask & selector_mask) || !(comp_mask & value_mask)) | ||
250 | return 0; | ||
251 | |||
252 | switch (selector) { | ||
253 | case IB_SA_GT: | ||
254 | err = (src_value <= dst_value); | ||
255 | break; | ||
256 | case IB_SA_LT: | ||
257 | err = (src_value >= dst_value); | ||
258 | break; | ||
259 | case IB_SA_EQ: | ||
260 | err = (src_value != dst_value); | ||
261 | break; | ||
262 | default: | ||
263 | err = 0; | ||
264 | break; | ||
265 | } | ||
266 | |||
267 | return err; | ||
268 | } | ||
269 | |||
270 | static int cmp_rec(struct ib_sa_mcmember_rec *src, | ||
271 | struct ib_sa_mcmember_rec *dst, ib_sa_comp_mask comp_mask) | ||
272 | { | ||
273 | /* MGID must already match */ | ||
274 | |||
275 | if (comp_mask & IB_SA_MCMEMBER_REC_PORT_GID && | ||
276 | memcmp(&src->port_gid, &dst->port_gid, sizeof src->port_gid)) | ||
277 | return -EINVAL; | ||
278 | if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey) | ||
279 | return -EINVAL; | ||
280 | if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid) | ||
281 | return -EINVAL; | ||
282 | if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR, | ||
283 | IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector, | ||
284 | src->mtu, dst->mtu)) | ||
285 | return -EINVAL; | ||
286 | if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS && | ||
287 | src->traffic_class != dst->traffic_class) | ||
288 | return -EINVAL; | ||
289 | if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey) | ||
290 | return -EINVAL; | ||
291 | if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR, | ||
292 | IB_SA_MCMEMBER_REC_RATE, dst->rate_selector, | ||
293 | src->rate, dst->rate)) | ||
294 | return -EINVAL; | ||
295 | if (check_selector(comp_mask, | ||
296 | IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR, | ||
297 | IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME, | ||
298 | dst->packet_life_time_selector, | ||
299 | src->packet_life_time, dst->packet_life_time)) | ||
300 | return -EINVAL; | ||
301 | if (comp_mask & IB_SA_MCMEMBER_REC_SL && src->sl != dst->sl) | ||
302 | return -EINVAL; | ||
303 | if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL && | ||
304 | src->flow_label != dst->flow_label) | ||
305 | return -EINVAL; | ||
306 | if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT && | ||
307 | src->hop_limit != dst->hop_limit) | ||
308 | return -EINVAL; | ||
309 | if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE && src->scope != dst->scope) | ||
310 | return -EINVAL; | ||
311 | |||
312 | /* join_state checked separately, proxy_join ignored */ | ||
313 | |||
314 | return 0; | ||
315 | } | ||
316 | |||
317 | static int send_join(struct mcast_group *group, struct mcast_member *member) | ||
318 | { | ||
319 | struct mcast_port *port = group->port; | ||
320 | int ret; | ||
321 | |||
322 | group->last_join = member; | ||
323 | ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device, | ||
324 | port->port_num, IB_MGMT_METHOD_SET, | ||
325 | &member->multicast.rec, | ||
326 | member->multicast.comp_mask, | ||
327 | 3000, GFP_KERNEL, join_handler, group, | ||
328 | &group->query); | ||
329 | if (ret >= 0) { | ||
330 | group->query_id = ret; | ||
331 | ret = 0; | ||
332 | } | ||
333 | return ret; | ||
334 | } | ||
335 | |||
336 | static int send_leave(struct mcast_group *group, u8 leave_state) | ||
337 | { | ||
338 | struct mcast_port *port = group->port; | ||
339 | struct ib_sa_mcmember_rec rec; | ||
340 | int ret; | ||
341 | |||
342 | rec = group->rec; | ||
343 | rec.join_state = leave_state; | ||
344 | |||
345 | ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device, | ||
346 | port->port_num, IB_SA_METHOD_DELETE, &rec, | ||
347 | IB_SA_MCMEMBER_REC_MGID | | ||
348 | IB_SA_MCMEMBER_REC_PORT_GID | | ||
349 | IB_SA_MCMEMBER_REC_JOIN_STATE, | ||
350 | 3000, GFP_KERNEL, leave_handler, | ||
351 | group, &group->query); | ||
352 | if (ret >= 0) { | ||
353 | group->query_id = ret; | ||
354 | ret = 0; | ||
355 | } | ||
356 | return ret; | ||
357 | } | ||
358 | |||
359 | static void join_group(struct mcast_group *group, struct mcast_member *member, | ||
360 | u8 join_state) | ||
361 | { | ||
362 | member->state = MCAST_MEMBER; | ||
363 | adjust_membership(group, join_state, 1); | ||
364 | group->rec.join_state |= join_state; | ||
365 | member->multicast.rec = group->rec; | ||
366 | member->multicast.rec.join_state = join_state; | ||
367 | list_move(&member->list, &group->active_list); | ||
368 | } | ||
369 | |||
370 | static int fail_join(struct mcast_group *group, struct mcast_member *member, | ||
371 | int status) | ||
372 | { | ||
373 | spin_lock_irq(&group->lock); | ||
374 | list_del_init(&member->list); | ||
375 | spin_unlock_irq(&group->lock); | ||
376 | return member->multicast.callback(status, &member->multicast); | ||
377 | } | ||
378 | |||
379 | static void process_group_error(struct mcast_group *group) | ||
380 | { | ||
381 | struct mcast_member *member; | ||
382 | int ret; | ||
383 | |||
384 | spin_lock_irq(&group->lock); | ||
385 | while (!list_empty(&group->active_list)) { | ||
386 | member = list_entry(group->active_list.next, | ||
387 | struct mcast_member, list); | ||
388 | atomic_inc(&member->refcount); | ||
389 | list_del_init(&member->list); | ||
390 | adjust_membership(group, member->multicast.rec.join_state, -1); | ||
391 | member->state = MCAST_ERROR; | ||
392 | spin_unlock_irq(&group->lock); | ||
393 | |||
394 | ret = member->multicast.callback(-ENETRESET, | ||
395 | &member->multicast); | ||
396 | deref_member(member); | ||
397 | if (ret) | ||
398 | ib_sa_free_multicast(&member->multicast); | ||
399 | spin_lock_irq(&group->lock); | ||
400 | } | ||
401 | |||
402 | group->rec.join_state = 0; | ||
403 | group->state = MCAST_BUSY; | ||
404 | spin_unlock_irq(&group->lock); | ||
405 | } | ||
406 | |||
407 | static void mcast_work_handler(struct work_struct *work) | ||
408 | { | ||
409 | struct mcast_group *group; | ||
410 | struct mcast_member *member; | ||
411 | struct ib_sa_multicast *multicast; | ||
412 | int status, ret; | ||
413 | u8 join_state; | ||
414 | |||
415 | group = container_of(work, typeof(*group), work); | ||
416 | retest: | ||
417 | spin_lock_irq(&group->lock); | ||
418 | while (!list_empty(&group->pending_list) || | ||
419 | (group->state == MCAST_ERROR)) { | ||
420 | |||
421 | if (group->state == MCAST_ERROR) { | ||
422 | spin_unlock_irq(&group->lock); | ||
423 | process_group_error(group); | ||
424 | goto retest; | ||
425 | } | ||
426 | |||
427 | member = list_entry(group->pending_list.next, | ||
428 | struct mcast_member, list); | ||
429 | multicast = &member->multicast; | ||
430 | join_state = multicast->rec.join_state; | ||
431 | atomic_inc(&member->refcount); | ||
432 | |||
433 | if (join_state == (group->rec.join_state & join_state)) { | ||
434 | status = cmp_rec(&group->rec, &multicast->rec, | ||
435 | multicast->comp_mask); | ||
436 | if (!status) | ||
437 | join_group(group, member, join_state); | ||
438 | else | ||
439 | list_del_init(&member->list); | ||
440 | spin_unlock_irq(&group->lock); | ||
441 | ret = multicast->callback(status, multicast); | ||
442 | } else { | ||
443 | spin_unlock_irq(&group->lock); | ||
444 | status = send_join(group, member); | ||
445 | if (!status) { | ||
446 | deref_member(member); | ||
447 | return; | ||
448 | } | ||
449 | ret = fail_join(group, member, status); | ||
450 | } | ||
451 | |||
452 | deref_member(member); | ||
453 | if (ret) | ||
454 | ib_sa_free_multicast(&member->multicast); | ||
455 | spin_lock_irq(&group->lock); | ||
456 | } | ||
457 | |||
458 | join_state = get_leave_state(group); | ||
459 | if (join_state) { | ||
460 | group->rec.join_state &= ~join_state; | ||
461 | spin_unlock_irq(&group->lock); | ||
462 | if (send_leave(group, join_state)) | ||
463 | goto retest; | ||
464 | } else { | ||
465 | group->state = MCAST_IDLE; | ||
466 | spin_unlock_irq(&group->lock); | ||
467 | release_group(group); | ||
468 | } | ||
469 | } | ||
470 | |||
471 | /* | ||
472 | * Fail a join request if it is still active - at the head of the pending queue. | ||
473 | */ | ||
474 | static void process_join_error(struct mcast_group *group, int status) | ||
475 | { | ||
476 | struct mcast_member *member; | ||
477 | int ret; | ||
478 | |||
479 | spin_lock_irq(&group->lock); | ||
480 | member = list_entry(group->pending_list.next, | ||
481 | struct mcast_member, list); | ||
482 | if (group->last_join == member) { | ||
483 | atomic_inc(&member->refcount); | ||
484 | list_del_init(&member->list); | ||
485 | spin_unlock_irq(&group->lock); | ||
486 | ret = member->multicast.callback(status, &member->multicast); | ||
487 | deref_member(member); | ||
488 | if (ret) | ||
489 | ib_sa_free_multicast(&member->multicast); | ||
490 | } else | ||
491 | spin_unlock_irq(&group->lock); | ||
492 | } | ||
493 | |||
494 | static void join_handler(int status, struct ib_sa_mcmember_rec *rec, | ||
495 | void *context) | ||
496 | { | ||
497 | struct mcast_group *group = context; | ||
498 | |||
499 | if (status) | ||
500 | process_join_error(group, status); | ||
501 | else { | ||
502 | spin_lock_irq(&group->port->lock); | ||
503 | group->rec = *rec; | ||
504 | if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) { | ||
505 | rb_erase(&group->node, &group->port->table); | ||
506 | mcast_insert(group->port, group, 1); | ||
507 | } | ||
508 | spin_unlock_irq(&group->port->lock); | ||
509 | } | ||
510 | mcast_work_handler(&group->work); | ||
511 | } | ||
512 | |||
513 | static void leave_handler(int status, struct ib_sa_mcmember_rec *rec, | ||
514 | void *context) | ||
515 | { | ||
516 | struct mcast_group *group = context; | ||
517 | |||
518 | mcast_work_handler(&group->work); | ||
519 | } | ||
520 | |||
521 | static struct mcast_group *acquire_group(struct mcast_port *port, | ||
522 | union ib_gid *mgid, gfp_t gfp_mask) | ||
523 | { | ||
524 | struct mcast_group *group, *cur_group; | ||
525 | unsigned long flags; | ||
526 | int is_mgid0; | ||
527 | |||
528 | is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0); | ||
529 | if (!is_mgid0) { | ||
530 | spin_lock_irqsave(&port->lock, flags); | ||
531 | group = mcast_find(port, mgid); | ||
532 | if (group) | ||
533 | goto found; | ||
534 | spin_unlock_irqrestore(&port->lock, flags); | ||
535 | } | ||
536 | |||
537 | group = kzalloc(sizeof *group, gfp_mask); | ||
538 | if (!group) | ||
539 | return NULL; | ||
540 | |||
541 | group->port = port; | ||
542 | group->rec.mgid = *mgid; | ||
543 | INIT_LIST_HEAD(&group->pending_list); | ||
544 | INIT_LIST_HEAD(&group->active_list); | ||
545 | INIT_WORK(&group->work, mcast_work_handler); | ||
546 | spin_lock_init(&group->lock); | ||
547 | |||
548 | spin_lock_irqsave(&port->lock, flags); | ||
549 | cur_group = mcast_insert(port, group, is_mgid0); | ||
550 | if (cur_group) { | ||
551 | kfree(group); | ||
552 | group = cur_group; | ||
553 | } else | ||
554 | atomic_inc(&port->refcount); | ||
555 | found: | ||
556 | atomic_inc(&group->refcount); | ||
557 | spin_unlock_irqrestore(&port->lock, flags); | ||
558 | return group; | ||
559 | } | ||
560 | |||
561 | /* | ||
562 | * We serialize all join requests to a single group to make our lives much | ||
563 | * easier. Otherwise, two users could try to join the same group | ||
564 | * simultaneously, with different configurations, one could leave while the | ||
565 | * join is in progress, etc., which makes locking around error recovery | ||
566 | * difficult. | ||
567 | */ | ||
568 | struct ib_sa_multicast * | ||
569 | ib_sa_join_multicast(struct ib_sa_client *client, | ||
570 | struct ib_device *device, u8 port_num, | ||
571 | struct ib_sa_mcmember_rec *rec, | ||
572 | ib_sa_comp_mask comp_mask, gfp_t gfp_mask, | ||
573 | int (*callback)(int status, | ||
574 | struct ib_sa_multicast *multicast), | ||
575 | void *context) | ||
576 | { | ||
577 | struct mcast_device *dev; | ||
578 | struct mcast_member *member; | ||
579 | struct ib_sa_multicast *multicast; | ||
580 | int ret; | ||
581 | |||
582 | dev = ib_get_client_data(device, &mcast_client); | ||
583 | if (!dev) | ||
584 | return ERR_PTR(-ENODEV); | ||
585 | |||
586 | member = kmalloc(sizeof *member, gfp_mask); | ||
587 | if (!member) | ||
588 | return ERR_PTR(-ENOMEM); | ||
589 | |||
590 | ib_sa_client_get(client); | ||
591 | member->client = client; | ||
592 | member->multicast.rec = *rec; | ||
593 | member->multicast.comp_mask = comp_mask; | ||
594 | member->multicast.callback = callback; | ||
595 | member->multicast.context = context; | ||
596 | init_completion(&member->comp); | ||
597 | atomic_set(&member->refcount, 1); | ||
598 | member->state = MCAST_JOINING; | ||
599 | |||
600 | member->group = acquire_group(&dev->port[port_num - dev->start_port], | ||
601 | &rec->mgid, gfp_mask); | ||
602 | if (!member->group) { | ||
603 | ret = -ENOMEM; | ||
604 | goto err; | ||
605 | } | ||
606 | |||
607 | /* | ||
608 | * The user will get the multicast structure in their callback. They | ||
609 | * could then free the multicast structure before we can return from | ||
610 | * this routine. So we save the pointer to return before queuing | ||
611 | * any callback. | ||
612 | */ | ||
613 | multicast = &member->multicast; | ||
614 | queue_join(member); | ||
615 | return multicast; | ||
616 | |||
617 | err: | ||
618 | ib_sa_client_put(client); | ||
619 | kfree(member); | ||
620 | return ERR_PTR(ret); | ||
621 | } | ||
622 | EXPORT_SYMBOL(ib_sa_join_multicast); | ||
623 | |||
624 | void ib_sa_free_multicast(struct ib_sa_multicast *multicast) | ||
625 | { | ||
626 | struct mcast_member *member; | ||
627 | struct mcast_group *group; | ||
628 | |||
629 | member = container_of(multicast, struct mcast_member, multicast); | ||
630 | group = member->group; | ||
631 | |||
632 | spin_lock_irq(&group->lock); | ||
633 | if (member->state == MCAST_MEMBER) | ||
634 | adjust_membership(group, multicast->rec.join_state, -1); | ||
635 | |||
636 | list_del_init(&member->list); | ||
637 | |||
638 | if (group->state == MCAST_IDLE) { | ||
639 | group->state = MCAST_BUSY; | ||
640 | spin_unlock_irq(&group->lock); | ||
641 | /* Continue to hold reference on group until callback */ | ||
642 | queue_work(mcast_wq, &group->work); | ||
643 | } else { | ||
644 | spin_unlock_irq(&group->lock); | ||
645 | release_group(group); | ||
646 | } | ||
647 | |||
648 | deref_member(member); | ||
649 | wait_for_completion(&member->comp); | ||
650 | ib_sa_client_put(member->client); | ||
651 | kfree(member); | ||
652 | } | ||
653 | EXPORT_SYMBOL(ib_sa_free_multicast); | ||
654 | |||
655 | int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num, | ||
656 | union ib_gid *mgid, struct ib_sa_mcmember_rec *rec) | ||
657 | { | ||
658 | struct mcast_device *dev; | ||
659 | struct mcast_port *port; | ||
660 | struct mcast_group *group; | ||
661 | unsigned long flags; | ||
662 | int ret = 0; | ||
663 | |||
664 | dev = ib_get_client_data(device, &mcast_client); | ||
665 | if (!dev) | ||
666 | return -ENODEV; | ||
667 | |||
668 | port = &dev->port[port_num - dev->start_port]; | ||
669 | spin_lock_irqsave(&port->lock, flags); | ||
670 | group = mcast_find(port, mgid); | ||
671 | if (group) | ||
672 | *rec = group->rec; | ||
673 | else | ||
674 | ret = -EADDRNOTAVAIL; | ||
675 | spin_unlock_irqrestore(&port->lock, flags); | ||
676 | |||
677 | return ret; | ||
678 | } | ||
679 | EXPORT_SYMBOL(ib_sa_get_mcmember_rec); | ||
680 | |||
681 | int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, | ||
682 | struct ib_sa_mcmember_rec *rec, | ||
683 | struct ib_ah_attr *ah_attr) | ||
684 | { | ||
685 | int ret; | ||
686 | u16 gid_index; | ||
687 | u8 p; | ||
688 | |||
689 | ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index); | ||
690 | if (ret) | ||
691 | return ret; | ||
692 | |||
693 | memset(ah_attr, 0, sizeof *ah_attr); | ||
694 | ah_attr->dlid = be16_to_cpu(rec->mlid); | ||
695 | ah_attr->sl = rec->sl; | ||
696 | ah_attr->port_num = port_num; | ||
697 | ah_attr->static_rate = rec->rate; | ||
698 | |||
699 | ah_attr->ah_flags = IB_AH_GRH; | ||
700 | ah_attr->grh.dgid = rec->mgid; | ||
701 | |||
702 | ah_attr->grh.sgid_index = (u8) gid_index; | ||
703 | ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); | ||
704 | ah_attr->grh.hop_limit = rec->hop_limit; | ||
705 | ah_attr->grh.traffic_class = rec->traffic_class; | ||
706 | |||
707 | return 0; | ||
708 | } | ||
709 | EXPORT_SYMBOL(ib_init_ah_from_mcmember); | ||
710 | |||
711 | static void mcast_groups_lost(struct mcast_port *port) | ||
712 | { | ||
713 | struct mcast_group *group; | ||
714 | struct rb_node *node; | ||
715 | unsigned long flags; | ||
716 | |||
717 | spin_lock_irqsave(&port->lock, flags); | ||
718 | for (node = rb_first(&port->table); node; node = rb_next(node)) { | ||
719 | group = rb_entry(node, struct mcast_group, node); | ||
720 | spin_lock(&group->lock); | ||
721 | if (group->state == MCAST_IDLE) { | ||
722 | atomic_inc(&group->refcount); | ||
723 | queue_work(mcast_wq, &group->work); | ||
724 | } | ||
725 | group->state = MCAST_ERROR; | ||
726 | spin_unlock(&group->lock); | ||
727 | } | ||
728 | spin_unlock_irqrestore(&port->lock, flags); | ||
729 | } | ||
730 | |||
731 | static void mcast_event_handler(struct ib_event_handler *handler, | ||
732 | struct ib_event *event) | ||
733 | { | ||
734 | struct mcast_device *dev; | ||
735 | |||
736 | dev = container_of(handler, struct mcast_device, event_handler); | ||
737 | |||
738 | switch (event->event) { | ||
739 | case IB_EVENT_PORT_ERR: | ||
740 | case IB_EVENT_LID_CHANGE: | ||
741 | case IB_EVENT_SM_CHANGE: | ||
742 | case IB_EVENT_CLIENT_REREGISTER: | ||
743 | mcast_groups_lost(&dev->port[event->element.port_num - | ||
744 | dev->start_port]); | ||
745 | break; | ||
746 | default: | ||
747 | break; | ||
748 | } | ||
749 | } | ||
750 | |||
751 | static void mcast_add_one(struct ib_device *device) | ||
752 | { | ||
753 | struct mcast_device *dev; | ||
754 | struct mcast_port *port; | ||
755 | int i; | ||
756 | |||
757 | if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) | ||
758 | return; | ||
759 | |||
760 | dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port, | ||
761 | GFP_KERNEL); | ||
762 | if (!dev) | ||
763 | return; | ||
764 | |||
765 | if (device->node_type == RDMA_NODE_IB_SWITCH) | ||
766 | dev->start_port = dev->end_port = 0; | ||
767 | else { | ||
768 | dev->start_port = 1; | ||
769 | dev->end_port = device->phys_port_cnt; | ||
770 | } | ||
771 | |||
772 | for (i = 0; i <= dev->end_port - dev->start_port; i++) { | ||
773 | port = &dev->port[i]; | ||
774 | port->dev = dev; | ||
775 | port->port_num = dev->start_port + i; | ||
776 | spin_lock_init(&port->lock); | ||
777 | port->table = RB_ROOT; | ||
778 | init_completion(&port->comp); | ||
779 | atomic_set(&port->refcount, 1); | ||
780 | } | ||
781 | |||
782 | dev->device = device; | ||
783 | ib_set_client_data(device, &mcast_client, dev); | ||
784 | |||
785 | INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler); | ||
786 | ib_register_event_handler(&dev->event_handler); | ||
787 | } | ||
788 | |||
789 | static void mcast_remove_one(struct ib_device *device) | ||
790 | { | ||
791 | struct mcast_device *dev; | ||
792 | struct mcast_port *port; | ||
793 | int i; | ||
794 | |||
795 | dev = ib_get_client_data(device, &mcast_client); | ||
796 | if (!dev) | ||
797 | return; | ||
798 | |||
799 | ib_unregister_event_handler(&dev->event_handler); | ||
800 | flush_workqueue(mcast_wq); | ||
801 | |||
802 | for (i = 0; i <= dev->end_port - dev->start_port; i++) { | ||
803 | port = &dev->port[i]; | ||
804 | deref_port(port); | ||
805 | wait_for_completion(&port->comp); | ||
806 | } | ||
807 | |||
808 | kfree(dev); | ||
809 | } | ||
810 | |||
811 | int mcast_init(void) | ||
812 | { | ||
813 | int ret; | ||
814 | |||
815 | mcast_wq = create_singlethread_workqueue("ib_mcast"); | ||
816 | if (!mcast_wq) | ||
817 | return -ENOMEM; | ||
818 | |||
819 | ib_sa_register_client(&sa_client); | ||
820 | |||
821 | ret = ib_register_client(&mcast_client); | ||
822 | if (ret) | ||
823 | goto err; | ||
824 | return 0; | ||
825 | |||
826 | err: | ||
827 | ib_sa_unregister_client(&sa_client); | ||
828 | destroy_workqueue(mcast_wq); | ||
829 | return ret; | ||
830 | } | ||
831 | |||
832 | void mcast_cleanup(void) | ||
833 | { | ||
834 | ib_unregister_client(&mcast_client); | ||
835 | ib_sa_unregister_client(&sa_client); | ||
836 | destroy_workqueue(mcast_wq); | ||
837 | } | ||
diff --git a/drivers/infiniband/core/sa.h b/drivers/infiniband/core/sa.h new file mode 100644 index 000000000000..24c93fd320fb --- /dev/null +++ b/drivers/infiniband/core/sa.h | |||
@@ -0,0 +1,66 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2004 Topspin Communications. All rights reserved. | ||
3 | * Copyright (c) 2005 Voltaire, Inc. All rights reserved. | ||
4 | * Copyright (c) 2006 Intel Corporation. All rights reserved. | ||
5 | * | ||
6 | * This software is available to you under a choice of one of two | ||
7 | * licenses. You may choose to be licensed under the terms of the GNU | ||
8 | * General Public License (GPL) Version 2, available from the file | ||
9 | * COPYING in the main directory of this source tree, or the | ||
10 | * OpenIB.org BSD license below: | ||
11 | * | ||
12 | * Redistribution and use in source and binary forms, with or | ||
13 | * without modification, are permitted provided that the following | ||
14 | * conditions are met: | ||
15 | * | ||
16 | * - Redistributions of source code must retain the above | ||
17 | * copyright notice, this list of conditions and the following | ||
18 | * disclaimer. | ||
19 | * | ||
20 | * - Redistributions in binary form must reproduce the above | ||
21 | * copyright notice, this list of conditions and the following | ||
22 | * disclaimer in the documentation and/or other materials | ||
23 | * provided with the distribution. | ||
24 | * | ||
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
26 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
27 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
28 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
29 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
30 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
31 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
32 | * SOFTWARE. | ||
33 | */ | ||
34 | |||
35 | #ifndef SA_H | ||
36 | #define SA_H | ||
37 | |||
38 | #include <rdma/ib_sa.h> | ||
39 | |||
40 | static inline void ib_sa_client_get(struct ib_sa_client *client) | ||
41 | { | ||
42 | atomic_inc(&client->users); | ||
43 | } | ||
44 | |||
45 | static inline void ib_sa_client_put(struct ib_sa_client *client) | ||
46 | { | ||
47 | if (atomic_dec_and_test(&client->users)) | ||
48 | complete(&client->comp); | ||
49 | } | ||
50 | |||
51 | int ib_sa_mcmember_rec_query(struct ib_sa_client *client, | ||
52 | struct ib_device *device, u8 port_num, | ||
53 | u8 method, | ||
54 | struct ib_sa_mcmember_rec *rec, | ||
55 | ib_sa_comp_mask comp_mask, | ||
56 | int timeout_ms, gfp_t gfp_mask, | ||
57 | void (*callback)(int status, | ||
58 | struct ib_sa_mcmember_rec *resp, | ||
59 | void *context), | ||
60 | void *context, | ||
61 | struct ib_sa_query **sa_query); | ||
62 | |||
63 | int mcast_init(void); | ||
64 | void mcast_cleanup(void); | ||
65 | |||
66 | #endif /* SA_H */ | ||
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index e45afba75341..d7d4a5309ba9 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c | |||
@@ -47,8 +47,8 @@ | |||
47 | #include <linux/workqueue.h> | 47 | #include <linux/workqueue.h> |
48 | 48 | ||
49 | #include <rdma/ib_pack.h> | 49 | #include <rdma/ib_pack.h> |
50 | #include <rdma/ib_sa.h> | ||
51 | #include <rdma/ib_cache.h> | 50 | #include <rdma/ib_cache.h> |
51 | #include "sa.h" | ||
52 | 52 | ||
53 | MODULE_AUTHOR("Roland Dreier"); | 53 | MODULE_AUTHOR("Roland Dreier"); |
54 | MODULE_DESCRIPTION("InfiniBand subnet administration query support"); | 54 | MODULE_DESCRIPTION("InfiniBand subnet administration query support"); |
@@ -425,17 +425,6 @@ void ib_sa_register_client(struct ib_sa_client *client) | |||
425 | } | 425 | } |
426 | EXPORT_SYMBOL(ib_sa_register_client); | 426 | EXPORT_SYMBOL(ib_sa_register_client); |
427 | 427 | ||
428 | static inline void ib_sa_client_get(struct ib_sa_client *client) | ||
429 | { | ||
430 | atomic_inc(&client->users); | ||
431 | } | ||
432 | |||
433 | static inline void ib_sa_client_put(struct ib_sa_client *client) | ||
434 | { | ||
435 | if (atomic_dec_and_test(&client->users)) | ||
436 | complete(&client->comp); | ||
437 | } | ||
438 | |||
439 | void ib_sa_unregister_client(struct ib_sa_client *client) | 428 | void ib_sa_unregister_client(struct ib_sa_client *client) |
440 | { | 429 | { |
441 | ib_sa_client_put(client); | 430 | ib_sa_client_put(client); |
@@ -901,7 +890,6 @@ err1: | |||
901 | kfree(query); | 890 | kfree(query); |
902 | return ret; | 891 | return ret; |
903 | } | 892 | } |
904 | EXPORT_SYMBOL(ib_sa_mcmember_rec_query); | ||
905 | 893 | ||
906 | static void send_handler(struct ib_mad_agent *agent, | 894 | static void send_handler(struct ib_mad_agent *agent, |
907 | struct ib_mad_send_wc *mad_send_wc) | 895 | struct ib_mad_send_wc *mad_send_wc) |
@@ -1053,14 +1041,27 @@ static int __init ib_sa_init(void) | |||
1053 | get_random_bytes(&tid, sizeof tid); | 1041 | get_random_bytes(&tid, sizeof tid); |
1054 | 1042 | ||
1055 | ret = ib_register_client(&sa_client); | 1043 | ret = ib_register_client(&sa_client); |
1056 | if (ret) | 1044 | if (ret) { |
1057 | printk(KERN_ERR "Couldn't register ib_sa client\n"); | 1045 | printk(KERN_ERR "Couldn't register ib_sa client\n"); |
1046 | goto err1; | ||
1047 | } | ||
1048 | |||
1049 | ret = mcast_init(); | ||
1050 | if (ret) { | ||
1051 | printk(KERN_ERR "Couldn't initialize multicast handling\n"); | ||
1052 | goto err2; | ||
1053 | } | ||
1058 | 1054 | ||
1055 | return 0; | ||
1056 | err2: | ||
1057 | ib_unregister_client(&sa_client); | ||
1058 | err1: | ||
1059 | return ret; | 1059 | return ret; |
1060 | } | 1060 | } |
1061 | 1061 | ||
1062 | static void __exit ib_sa_cleanup(void) | 1062 | static void __exit ib_sa_cleanup(void) |
1063 | { | 1063 | { |
1064 | mcast_cleanup(); | ||
1064 | ib_unregister_client(&sa_client); | 1065 | ib_unregister_client(&sa_client); |
1065 | idr_destroy(&query_idr); | 1066 | idr_destroy(&query_idr); |
1066 | } | 1067 | } |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index fea737f520fd..b303ce6bc21e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |||
@@ -60,14 +60,11 @@ static DEFINE_MUTEX(mcast_mutex); | |||
60 | /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ | 60 | /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ |
61 | struct ipoib_mcast { | 61 | struct ipoib_mcast { |
62 | struct ib_sa_mcmember_rec mcmember; | 62 | struct ib_sa_mcmember_rec mcmember; |
63 | struct ib_sa_multicast *mc; | ||
63 | struct ipoib_ah *ah; | 64 | struct ipoib_ah *ah; |
64 | 65 | ||
65 | struct rb_node rb_node; | 66 | struct rb_node rb_node; |
66 | struct list_head list; | 67 | struct list_head list; |
67 | struct completion done; | ||
68 | |||
69 | int query_id; | ||
70 | struct ib_sa_query *query; | ||
71 | 68 | ||
72 | unsigned long created; | 69 | unsigned long created; |
73 | unsigned long backoff; | 70 | unsigned long backoff; |
@@ -299,18 +296,22 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, | |||
299 | return 0; | 296 | return 0; |
300 | } | 297 | } |
301 | 298 | ||
302 | static void | 299 | static int |
303 | ipoib_mcast_sendonly_join_complete(int status, | 300 | ipoib_mcast_sendonly_join_complete(int status, |
304 | struct ib_sa_mcmember_rec *mcmember, | 301 | struct ib_sa_multicast *multicast) |
305 | void *mcast_ptr) | ||
306 | { | 302 | { |
307 | struct ipoib_mcast *mcast = mcast_ptr; | 303 | struct ipoib_mcast *mcast = multicast->context; |
308 | struct net_device *dev = mcast->dev; | 304 | struct net_device *dev = mcast->dev; |
309 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 305 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
310 | 306 | ||
307 | /* We trap for port events ourselves. */ | ||
308 | if (status == -ENETRESET) | ||
309 | return 0; | ||
310 | |||
311 | if (!status) | 311 | if (!status) |
312 | ipoib_mcast_join_finish(mcast, mcmember); | 312 | status = ipoib_mcast_join_finish(mcast, &multicast->rec); |
313 | else { | 313 | |
314 | if (status) { | ||
314 | if (mcast->logcount++ < 20) | 315 | if (mcast->logcount++ < 20) |
315 | ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for " | 316 | ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for " |
316 | IPOIB_GID_FMT ", status %d\n", | 317 | IPOIB_GID_FMT ", status %d\n", |
@@ -325,11 +326,10 @@ ipoib_mcast_sendonly_join_complete(int status, | |||
325 | spin_unlock_irq(&priv->tx_lock); | 326 | spin_unlock_irq(&priv->tx_lock); |
326 | 327 | ||
327 | /* Clear the busy flag so we try again */ | 328 | /* Clear the busy flag so we try again */ |
328 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | 329 | status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, |
329 | mcast->query = NULL; | 330 | &mcast->flags); |
330 | } | 331 | } |
331 | 332 | return status; | |
332 | complete(&mcast->done); | ||
333 | } | 333 | } |
334 | 334 | ||
335 | static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) | 335 | static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) |
@@ -359,35 +359,33 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) | |||
359 | rec.port_gid = priv->local_gid; | 359 | rec.port_gid = priv->local_gid; |
360 | rec.pkey = cpu_to_be16(priv->pkey); | 360 | rec.pkey = cpu_to_be16(priv->pkey); |
361 | 361 | ||
362 | init_completion(&mcast->done); | 362 | mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, |
363 | 363 | priv->port, &rec, | |
364 | ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port, &rec, | 364 | IB_SA_MCMEMBER_REC_MGID | |
365 | IB_SA_MCMEMBER_REC_MGID | | 365 | IB_SA_MCMEMBER_REC_PORT_GID | |
366 | IB_SA_MCMEMBER_REC_PORT_GID | | 366 | IB_SA_MCMEMBER_REC_PKEY | |
367 | IB_SA_MCMEMBER_REC_PKEY | | 367 | IB_SA_MCMEMBER_REC_JOIN_STATE, |
368 | IB_SA_MCMEMBER_REC_JOIN_STATE, | 368 | GFP_ATOMIC, |
369 | 1000, GFP_ATOMIC, | 369 | ipoib_mcast_sendonly_join_complete, |
370 | ipoib_mcast_sendonly_join_complete, | 370 | mcast); |
371 | mcast, &mcast->query); | 371 | if (IS_ERR(mcast->mc)) { |
372 | if (ret < 0) { | 372 | ret = PTR_ERR(mcast->mc); |
373 | ipoib_warn(priv, "ib_sa_mcmember_rec_set failed (ret = %d)\n", | 373 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); |
374 | ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n", | ||
374 | ret); | 375 | ret); |
375 | } else { | 376 | } else { |
376 | ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT | 377 | ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT |
377 | ", starting join\n", | 378 | ", starting join\n", |
378 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | 379 | IPOIB_GID_ARG(mcast->mcmember.mgid)); |
379 | |||
380 | mcast->query_id = ret; | ||
381 | } | 380 | } |
382 | 381 | ||
383 | return ret; | 382 | return ret; |
384 | } | 383 | } |
385 | 384 | ||
386 | static void ipoib_mcast_join_complete(int status, | 385 | static int ipoib_mcast_join_complete(int status, |
387 | struct ib_sa_mcmember_rec *mcmember, | 386 | struct ib_sa_multicast *multicast) |
388 | void *mcast_ptr) | ||
389 | { | 387 | { |
390 | struct ipoib_mcast *mcast = mcast_ptr; | 388 | struct ipoib_mcast *mcast = multicast->context; |
391 | struct net_device *dev = mcast->dev; | 389 | struct net_device *dev = mcast->dev; |
392 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 390 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
393 | 391 | ||
@@ -395,24 +393,25 @@ static void ipoib_mcast_join_complete(int status, | |||
395 | " (status %d)\n", | 393 | " (status %d)\n", |
396 | IPOIB_GID_ARG(mcast->mcmember.mgid), status); | 394 | IPOIB_GID_ARG(mcast->mcmember.mgid), status); |
397 | 395 | ||
398 | if (!status && !ipoib_mcast_join_finish(mcast, mcmember)) { | 396 | /* We trap for port events ourselves. */ |
397 | if (status == -ENETRESET) | ||
398 | return 0; | ||
399 | |||
400 | if (!status) | ||
401 | status = ipoib_mcast_join_finish(mcast, &multicast->rec); | ||
402 | |||
403 | if (!status) { | ||
399 | mcast->backoff = 1; | 404 | mcast->backoff = 1; |
400 | mutex_lock(&mcast_mutex); | 405 | mutex_lock(&mcast_mutex); |
401 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) | 406 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) |
402 | queue_delayed_work(ipoib_workqueue, | 407 | queue_delayed_work(ipoib_workqueue, |
403 | &priv->mcast_task, 0); | 408 | &priv->mcast_task, 0); |
404 | mutex_unlock(&mcast_mutex); | 409 | mutex_unlock(&mcast_mutex); |
405 | complete(&mcast->done); | 410 | return 0; |
406 | return; | ||
407 | } | ||
408 | |||
409 | if (status == -EINTR) { | ||
410 | complete(&mcast->done); | ||
411 | return; | ||
412 | } | 411 | } |
413 | 412 | ||
414 | if (status && mcast->logcount++ < 20) { | 413 | if (mcast->logcount++ < 20) { |
415 | if (status == -ETIMEDOUT || status == -EINTR) { | 414 | if (status == -ETIMEDOUT) { |
416 | ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT | 415 | ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT |
417 | ", status %d\n", | 416 | ", status %d\n", |
418 | IPOIB_GID_ARG(mcast->mcmember.mgid), | 417 | IPOIB_GID_ARG(mcast->mcmember.mgid), |
@@ -429,24 +428,18 @@ static void ipoib_mcast_join_complete(int status, | |||
429 | if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) | 428 | if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) |
430 | mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; | 429 | mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; |
431 | 430 | ||
432 | mutex_lock(&mcast_mutex); | 431 | /* Clear the busy flag so we try again */ |
432 | status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | ||
433 | 433 | ||
434 | mutex_lock(&mcast_mutex); | ||
434 | spin_lock_irq(&priv->lock); | 435 | spin_lock_irq(&priv->lock); |
435 | mcast->query = NULL; | 436 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) |
436 | 437 | queue_delayed_work(ipoib_workqueue, &priv->mcast_task, | |
437 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) { | 438 | mcast->backoff * HZ); |
438 | if (status == -ETIMEDOUT) | ||
439 | queue_delayed_work(ipoib_workqueue, &priv->mcast_task, | ||
440 | 0); | ||
441 | else | ||
442 | queue_delayed_work(ipoib_workqueue, &priv->mcast_task, | ||
443 | mcast->backoff * HZ); | ||
444 | } else | ||
445 | complete(&mcast->done); | ||
446 | spin_unlock_irq(&priv->lock); | 439 | spin_unlock_irq(&priv->lock); |
447 | mutex_unlock(&mcast_mutex); | 440 | mutex_unlock(&mcast_mutex); |
448 | 441 | ||
449 | return; | 442 | return status; |
450 | } | 443 | } |
451 | 444 | ||
452 | static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, | 445 | static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, |
@@ -495,15 +488,14 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, | |||
495 | rec.hop_limit = priv->broadcast->mcmember.hop_limit; | 488 | rec.hop_limit = priv->broadcast->mcmember.hop_limit; |
496 | } | 489 | } |
497 | 490 | ||
498 | init_completion(&mcast->done); | 491 | set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); |
499 | 492 | mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, | |
500 | ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port, | 493 | &rec, comp_mask, GFP_KERNEL, |
501 | &rec, comp_mask, mcast->backoff * 1000, | 494 | ipoib_mcast_join_complete, mcast); |
502 | GFP_ATOMIC, ipoib_mcast_join_complete, | 495 | if (IS_ERR(mcast->mc)) { |
503 | mcast, &mcast->query); | 496 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); |
504 | 497 | ret = PTR_ERR(mcast->mc); | |
505 | if (ret < 0) { | 498 | ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); |
506 | ipoib_warn(priv, "ib_sa_mcmember_rec_set failed, status %d\n", ret); | ||
507 | 499 | ||
508 | mcast->backoff *= 2; | 500 | mcast->backoff *= 2; |
509 | if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) | 501 | if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) |
@@ -515,8 +507,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, | |||
515 | &priv->mcast_task, | 507 | &priv->mcast_task, |
516 | mcast->backoff * HZ); | 508 | mcast->backoff * HZ); |
517 | mutex_unlock(&mcast_mutex); | 509 | mutex_unlock(&mcast_mutex); |
518 | } else | 510 | } |
519 | mcast->query_id = ret; | ||
520 | } | 511 | } |
521 | 512 | ||
522 | void ipoib_mcast_join_task(struct work_struct *work) | 513 | void ipoib_mcast_join_task(struct work_struct *work) |
@@ -541,7 +532,7 @@ void ipoib_mcast_join_task(struct work_struct *work) | |||
541 | priv->local_rate = attr.active_speed * | 532 | priv->local_rate = attr.active_speed * |
542 | ib_width_enum_to_int(attr.active_width); | 533 | ib_width_enum_to_int(attr.active_width); |
543 | } else | 534 | } else |
544 | ipoib_warn(priv, "ib_query_port failed\n"); | 535 | ipoib_warn(priv, "ib_query_port failed\n"); |
545 | } | 536 | } |
546 | 537 | ||
547 | if (!priv->broadcast) { | 538 | if (!priv->broadcast) { |
@@ -568,7 +559,8 @@ void ipoib_mcast_join_task(struct work_struct *work) | |||
568 | } | 559 | } |
569 | 560 | ||
570 | if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { | 561 | if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { |
571 | ipoib_mcast_join(dev, priv->broadcast, 0); | 562 | if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) |
563 | ipoib_mcast_join(dev, priv->broadcast, 0); | ||
572 | return; | 564 | return; |
573 | } | 565 | } |
574 | 566 | ||
@@ -625,26 +617,9 @@ int ipoib_mcast_start_thread(struct net_device *dev) | |||
625 | return 0; | 617 | return 0; |
626 | } | 618 | } |
627 | 619 | ||
628 | static void wait_for_mcast_join(struct ipoib_dev_priv *priv, | ||
629 | struct ipoib_mcast *mcast) | ||
630 | { | ||
631 | spin_lock_irq(&priv->lock); | ||
632 | if (mcast && mcast->query) { | ||
633 | ib_sa_cancel_query(mcast->query_id, mcast->query); | ||
634 | mcast->query = NULL; | ||
635 | spin_unlock_irq(&priv->lock); | ||
636 | ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n", | ||
637 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | ||
638 | wait_for_completion(&mcast->done); | ||
639 | } | ||
640 | else | ||
641 | spin_unlock_irq(&priv->lock); | ||
642 | } | ||
643 | |||
644 | int ipoib_mcast_stop_thread(struct net_device *dev, int flush) | 620 | int ipoib_mcast_stop_thread(struct net_device *dev, int flush) |
645 | { | 621 | { |
646 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 622 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
647 | struct ipoib_mcast *mcast; | ||
648 | 623 | ||
649 | ipoib_dbg_mcast(priv, "stopping multicast thread\n"); | 624 | ipoib_dbg_mcast(priv, "stopping multicast thread\n"); |
650 | 625 | ||
@@ -660,52 +635,27 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush) | |||
660 | if (flush) | 635 | if (flush) |
661 | flush_workqueue(ipoib_workqueue); | 636 | flush_workqueue(ipoib_workqueue); |
662 | 637 | ||
663 | wait_for_mcast_join(priv, priv->broadcast); | ||
664 | |||
665 | list_for_each_entry(mcast, &priv->multicast_list, list) | ||
666 | wait_for_mcast_join(priv, mcast); | ||
667 | |||
668 | return 0; | 638 | return 0; |
669 | } | 639 | } |
670 | 640 | ||
671 | static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) | 641 | static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) |
672 | { | 642 | { |
673 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 643 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
674 | struct ib_sa_mcmember_rec rec = { | ||
675 | .join_state = 1 | ||
676 | }; | ||
677 | int ret = 0; | 644 | int ret = 0; |
678 | 645 | ||
679 | if (!test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) | 646 | if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { |
680 | return 0; | 647 | ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n", |
681 | 648 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | |
682 | ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n", | ||
683 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | ||
684 | |||
685 | rec.mgid = mcast->mcmember.mgid; | ||
686 | rec.port_gid = priv->local_gid; | ||
687 | rec.pkey = cpu_to_be16(priv->pkey); | ||
688 | 649 | ||
689 | /* Remove ourselves from the multicast group */ | 650 | /* Remove ourselves from the multicast group */ |
690 | ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid), | 651 | ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid), |
691 | &mcast->mcmember.mgid); | 652 | &mcast->mcmember.mgid); |
692 | if (ret) | 653 | if (ret) |
693 | ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret); | 654 | ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret); |
655 | } | ||
694 | 656 | ||
695 | /* | 657 | if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) |
696 | * Just make one shot at leaving and don't wait for a reply; | 658 | ib_sa_free_multicast(mcast->mc); |
697 | * if we fail, too bad. | ||
698 | */ | ||
699 | ret = ib_sa_mcmember_rec_delete(&ipoib_sa_client, priv->ca, priv->port, &rec, | ||
700 | IB_SA_MCMEMBER_REC_MGID | | ||
701 | IB_SA_MCMEMBER_REC_PORT_GID | | ||
702 | IB_SA_MCMEMBER_REC_PKEY | | ||
703 | IB_SA_MCMEMBER_REC_JOIN_STATE, | ||
704 | 0, GFP_ATOMIC, NULL, | ||
705 | mcast, &mcast->query); | ||
706 | if (ret < 0) | ||
707 | ipoib_warn(priv, "ib_sa_mcmember_rec_delete failed " | ||
708 | "for leave (result = %d)\n", ret); | ||
709 | 659 | ||
710 | return 0; | 660 | return 0; |
711 | } | 661 | } |
@@ -758,7 +708,7 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) | |||
758 | dev_kfree_skb_any(skb); | 708 | dev_kfree_skb_any(skb); |
759 | } | 709 | } |
760 | 710 | ||
761 | if (mcast->query) | 711 | if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) |
762 | ipoib_dbg_mcast(priv, "no address vector, " | 712 | ipoib_dbg_mcast(priv, "no address vector, " |
763 | "but multicast join already started\n"); | 713 | "but multicast join already started\n"); |
764 | else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) | 714 | else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) |
@@ -916,7 +866,6 @@ void ipoib_mcast_restart_task(struct work_struct *work) | |||
916 | 866 | ||
917 | /* We have to cancel outside of the spinlock */ | 867 | /* We have to cancel outside of the spinlock */ |
918 | list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { | 868 | list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { |
919 | wait_for_mcast_join(priv, mcast); | ||
920 | ipoib_mcast_leave(mcast->dev, mcast); | 869 | ipoib_mcast_leave(mcast->dev, mcast); |
921 | ipoib_mcast_free(mcast); | 870 | ipoib_mcast_free(mcast); |
922 | } | 871 | } |