diff options
Diffstat (limited to 'drivers/infiniband/ulp/ipoib/ipoib_multicast.c')
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 991 |
1 files changed, 991 insertions, 0 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c new file mode 100644 index 000000000000..f46932dc81c9 --- /dev/null +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |||
@@ -0,0 +1,991 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | * $Id: ipoib_multicast.c 1362 2004-12-18 15:56:29Z roland $ | ||
33 | */ | ||
34 | |||
35 | #include <linux/skbuff.h> | ||
36 | #include <linux/rtnetlink.h> | ||
37 | #include <linux/ip.h> | ||
38 | #include <linux/in.h> | ||
39 | #include <linux/igmp.h> | ||
40 | #include <linux/inetdevice.h> | ||
41 | #include <linux/delay.h> | ||
42 | #include <linux/completion.h> | ||
43 | |||
44 | #include "ipoib.h" | ||
45 | |||
46 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG | ||
47 | static int mcast_debug_level; | ||
48 | |||
49 | module_param(mcast_debug_level, int, 0644); | ||
50 | MODULE_PARM_DESC(mcast_debug_level, | ||
51 | "Enable multicast debug tracing if > 0"); | ||
52 | #endif | ||
53 | |||
54 | static DECLARE_MUTEX(mcast_mutex); | ||
55 | |||
56 | /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ | ||
57 | struct ipoib_mcast { | ||
58 | struct ib_sa_mcmember_rec mcmember; | ||
59 | struct ipoib_ah *ah; | ||
60 | |||
61 | struct rb_node rb_node; | ||
62 | struct list_head list; | ||
63 | struct completion done; | ||
64 | |||
65 | int query_id; | ||
66 | struct ib_sa_query *query; | ||
67 | |||
68 | unsigned long created; | ||
69 | unsigned long backoff; | ||
70 | |||
71 | unsigned long flags; | ||
72 | unsigned char logcount; | ||
73 | |||
74 | struct list_head neigh_list; | ||
75 | |||
76 | struct sk_buff_head pkt_queue; | ||
77 | |||
78 | struct net_device *dev; | ||
79 | }; | ||
80 | |||
81 | struct ipoib_mcast_iter { | ||
82 | struct net_device *dev; | ||
83 | union ib_gid mgid; | ||
84 | unsigned long created; | ||
85 | unsigned int queuelen; | ||
86 | unsigned int complete; | ||
87 | unsigned int send_only; | ||
88 | }; | ||
89 | |||
90 | static void ipoib_mcast_free(struct ipoib_mcast *mcast) | ||
91 | { | ||
92 | struct net_device *dev = mcast->dev; | ||
93 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
94 | struct ipoib_neigh *neigh, *tmp; | ||
95 | unsigned long flags; | ||
96 | LIST_HEAD(ah_list); | ||
97 | struct ipoib_ah *ah, *tah; | ||
98 | |||
99 | ipoib_dbg_mcast(netdev_priv(dev), | ||
100 | "deleting multicast group " IPOIB_GID_FMT "\n", | ||
101 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | ||
102 | |||
103 | spin_lock_irqsave(&priv->lock, flags); | ||
104 | |||
105 | list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) { | ||
106 | if (neigh->ah) | ||
107 | list_add_tail(&neigh->ah->list, &ah_list); | ||
108 | *to_ipoib_neigh(neigh->neighbour) = NULL; | ||
109 | neigh->neighbour->ops->destructor = NULL; | ||
110 | kfree(neigh); | ||
111 | } | ||
112 | |||
113 | spin_unlock_irqrestore(&priv->lock, flags); | ||
114 | |||
115 | list_for_each_entry_safe(ah, tah, &ah_list, list) | ||
116 | ipoib_put_ah(ah); | ||
117 | |||
118 | if (mcast->ah) | ||
119 | ipoib_put_ah(mcast->ah); | ||
120 | |||
121 | while (!skb_queue_empty(&mcast->pkt_queue)) { | ||
122 | struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); | ||
123 | |||
124 | skb->dev = dev; | ||
125 | dev_kfree_skb_any(skb); | ||
126 | } | ||
127 | |||
128 | kfree(mcast); | ||
129 | } | ||
130 | |||
131 | static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, | ||
132 | int can_sleep) | ||
133 | { | ||
134 | struct ipoib_mcast *mcast; | ||
135 | |||
136 | mcast = kmalloc(sizeof (*mcast), can_sleep ? GFP_KERNEL : GFP_ATOMIC); | ||
137 | if (!mcast) | ||
138 | return NULL; | ||
139 | |||
140 | memset(mcast, 0, sizeof (*mcast)); | ||
141 | |||
142 | init_completion(&mcast->done); | ||
143 | |||
144 | mcast->dev = dev; | ||
145 | mcast->created = jiffies; | ||
146 | mcast->backoff = HZ; | ||
147 | mcast->logcount = 0; | ||
148 | |||
149 | INIT_LIST_HEAD(&mcast->list); | ||
150 | INIT_LIST_HEAD(&mcast->neigh_list); | ||
151 | skb_queue_head_init(&mcast->pkt_queue); | ||
152 | |||
153 | mcast->ah = NULL; | ||
154 | mcast->query = NULL; | ||
155 | |||
156 | return mcast; | ||
157 | } | ||
158 | |||
159 | static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, union ib_gid *mgid) | ||
160 | { | ||
161 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
162 | struct rb_node *n = priv->multicast_tree.rb_node; | ||
163 | |||
164 | while (n) { | ||
165 | struct ipoib_mcast *mcast; | ||
166 | int ret; | ||
167 | |||
168 | mcast = rb_entry(n, struct ipoib_mcast, rb_node); | ||
169 | |||
170 | ret = memcmp(mgid->raw, mcast->mcmember.mgid.raw, | ||
171 | sizeof (union ib_gid)); | ||
172 | if (ret < 0) | ||
173 | n = n->rb_left; | ||
174 | else if (ret > 0) | ||
175 | n = n->rb_right; | ||
176 | else | ||
177 | return mcast; | ||
178 | } | ||
179 | |||
180 | return NULL; | ||
181 | } | ||
182 | |||
183 | static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast) | ||
184 | { | ||
185 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
186 | struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL; | ||
187 | |||
188 | while (*n) { | ||
189 | struct ipoib_mcast *tmcast; | ||
190 | int ret; | ||
191 | |||
192 | pn = *n; | ||
193 | tmcast = rb_entry(pn, struct ipoib_mcast, rb_node); | ||
194 | |||
195 | ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw, | ||
196 | sizeof (union ib_gid)); | ||
197 | if (ret < 0) | ||
198 | n = &pn->rb_left; | ||
199 | else if (ret > 0) | ||
200 | n = &pn->rb_right; | ||
201 | else | ||
202 | return -EEXIST; | ||
203 | } | ||
204 | |||
205 | rb_link_node(&mcast->rb_node, pn, n); | ||
206 | rb_insert_color(&mcast->rb_node, &priv->multicast_tree); | ||
207 | |||
208 | return 0; | ||
209 | } | ||
210 | |||
211 | static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, | ||
212 | struct ib_sa_mcmember_rec *mcmember) | ||
213 | { | ||
214 | struct net_device *dev = mcast->dev; | ||
215 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
216 | int ret; | ||
217 | |||
218 | mcast->mcmember = *mcmember; | ||
219 | |||
220 | /* Set the cached Q_Key before we attach if it's the broadcast group */ | ||
221 | if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4, | ||
222 | sizeof (union ib_gid))) { | ||
223 | priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); | ||
224 | priv->tx_wr.wr.ud.remote_qkey = priv->qkey; | ||
225 | } | ||
226 | |||
227 | if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { | ||
228 | if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { | ||
229 | ipoib_warn(priv, "multicast group " IPOIB_GID_FMT | ||
230 | " already attached\n", | ||
231 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | ||
232 | |||
233 | return 0; | ||
234 | } | ||
235 | |||
236 | ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid), | ||
237 | &mcast->mcmember.mgid); | ||
238 | if (ret < 0) { | ||
239 | ipoib_warn(priv, "couldn't attach QP to multicast group " | ||
240 | IPOIB_GID_FMT "\n", | ||
241 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | ||
242 | |||
243 | clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags); | ||
244 | return ret; | ||
245 | } | ||
246 | } | ||
247 | |||
248 | { | ||
249 | struct ib_ah_attr av = { | ||
250 | .dlid = be16_to_cpu(mcast->mcmember.mlid), | ||
251 | .port_num = priv->port, | ||
252 | .sl = mcast->mcmember.sl, | ||
253 | .ah_flags = IB_AH_GRH, | ||
254 | .grh = { | ||
255 | .flow_label = be32_to_cpu(mcast->mcmember.flow_label), | ||
256 | .hop_limit = mcast->mcmember.hop_limit, | ||
257 | .sgid_index = 0, | ||
258 | .traffic_class = mcast->mcmember.traffic_class | ||
259 | } | ||
260 | }; | ||
261 | |||
262 | av.grh.dgid = mcast->mcmember.mgid; | ||
263 | |||
264 | if (ib_sa_rate_enum_to_int(mcast->mcmember.rate) > 0) | ||
265 | av.static_rate = (2 * priv->local_rate - | ||
266 | ib_sa_rate_enum_to_int(mcast->mcmember.rate) - 1) / | ||
267 | (priv->local_rate ? priv->local_rate : 1); | ||
268 | |||
269 | ipoib_dbg_mcast(priv, "static_rate %d for local port %dX, mcmember %dX\n", | ||
270 | av.static_rate, priv->local_rate, | ||
271 | ib_sa_rate_enum_to_int(mcast->mcmember.rate)); | ||
272 | |||
273 | mcast->ah = ipoib_create_ah(dev, priv->pd, &av); | ||
274 | if (!mcast->ah) { | ||
275 | ipoib_warn(priv, "ib_address_create failed\n"); | ||
276 | } else { | ||
277 | ipoib_dbg_mcast(priv, "MGID " IPOIB_GID_FMT | ||
278 | " AV %p, LID 0x%04x, SL %d\n", | ||
279 | IPOIB_GID_ARG(mcast->mcmember.mgid), | ||
280 | mcast->ah->ah, | ||
281 | be16_to_cpu(mcast->mcmember.mlid), | ||
282 | mcast->mcmember.sl); | ||
283 | } | ||
284 | } | ||
285 | |||
286 | /* actually send any queued packets */ | ||
287 | while (!skb_queue_empty(&mcast->pkt_queue)) { | ||
288 | struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); | ||
289 | |||
290 | skb->dev = dev; | ||
291 | |||
292 | if (!skb->dst || !skb->dst->neighbour) { | ||
293 | /* put pseudoheader back on for next time */ | ||
294 | skb_push(skb, sizeof (struct ipoib_pseudoheader)); | ||
295 | } | ||
296 | |||
297 | if (dev_queue_xmit(skb)) | ||
298 | ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n"); | ||
299 | } | ||
300 | |||
301 | return 0; | ||
302 | } | ||
303 | |||
304 | static void | ||
305 | ipoib_mcast_sendonly_join_complete(int status, | ||
306 | struct ib_sa_mcmember_rec *mcmember, | ||
307 | void *mcast_ptr) | ||
308 | { | ||
309 | struct ipoib_mcast *mcast = mcast_ptr; | ||
310 | struct net_device *dev = mcast->dev; | ||
311 | |||
312 | if (!status) | ||
313 | ipoib_mcast_join_finish(mcast, mcmember); | ||
314 | else { | ||
315 | if (mcast->logcount++ < 20) | ||
316 | ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for " | ||
317 | IPOIB_GID_FMT ", status %d\n", | ||
318 | IPOIB_GID_ARG(mcast->mcmember.mgid), status); | ||
319 | |||
320 | /* Flush out any queued packets */ | ||
321 | while (!skb_queue_empty(&mcast->pkt_queue)) { | ||
322 | struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); | ||
323 | |||
324 | skb->dev = dev; | ||
325 | |||
326 | dev_kfree_skb_any(skb); | ||
327 | } | ||
328 | |||
329 | /* Clear the busy flag so we try again */ | ||
330 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | ||
331 | } | ||
332 | |||
333 | complete(&mcast->done); | ||
334 | } | ||
335 | |||
336 | static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) | ||
337 | { | ||
338 | struct net_device *dev = mcast->dev; | ||
339 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
340 | struct ib_sa_mcmember_rec rec = { | ||
341 | #if 0 /* Some SMs don't support send-only yet */ | ||
342 | .join_state = 4 | ||
343 | #else | ||
344 | .join_state = 1 | ||
345 | #endif | ||
346 | }; | ||
347 | int ret = 0; | ||
348 | |||
349 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { | ||
350 | ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n"); | ||
351 | return -ENODEV; | ||
352 | } | ||
353 | |||
354 | if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) { | ||
355 | ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n"); | ||
356 | return -EBUSY; | ||
357 | } | ||
358 | |||
359 | rec.mgid = mcast->mcmember.mgid; | ||
360 | rec.port_gid = priv->local_gid; | ||
361 | rec.pkey = be16_to_cpu(priv->pkey); | ||
362 | |||
363 | ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, | ||
364 | IB_SA_MCMEMBER_REC_MGID | | ||
365 | IB_SA_MCMEMBER_REC_PORT_GID | | ||
366 | IB_SA_MCMEMBER_REC_PKEY | | ||
367 | IB_SA_MCMEMBER_REC_JOIN_STATE, | ||
368 | 1000, GFP_ATOMIC, | ||
369 | ipoib_mcast_sendonly_join_complete, | ||
370 | mcast, &mcast->query); | ||
371 | if (ret < 0) { | ||
372 | ipoib_warn(priv, "ib_sa_mcmember_rec_set failed (ret = %d)\n", | ||
373 | ret); | ||
374 | } else { | ||
375 | ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT | ||
376 | ", starting join\n", | ||
377 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | ||
378 | |||
379 | mcast->query_id = ret; | ||
380 | } | ||
381 | |||
382 | return ret; | ||
383 | } | ||
384 | |||
385 | static void ipoib_mcast_join_complete(int status, | ||
386 | struct ib_sa_mcmember_rec *mcmember, | ||
387 | void *mcast_ptr) | ||
388 | { | ||
389 | struct ipoib_mcast *mcast = mcast_ptr; | ||
390 | struct net_device *dev = mcast->dev; | ||
391 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
392 | |||
393 | ipoib_dbg_mcast(priv, "join completion for " IPOIB_GID_FMT | ||
394 | " (status %d)\n", | ||
395 | IPOIB_GID_ARG(mcast->mcmember.mgid), status); | ||
396 | |||
397 | if (!status && !ipoib_mcast_join_finish(mcast, mcmember)) { | ||
398 | mcast->backoff = HZ; | ||
399 | down(&mcast_mutex); | ||
400 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) | ||
401 | queue_work(ipoib_workqueue, &priv->mcast_task); | ||
402 | up(&mcast_mutex); | ||
403 | complete(&mcast->done); | ||
404 | return; | ||
405 | } | ||
406 | |||
407 | if (status == -EINTR) { | ||
408 | complete(&mcast->done); | ||
409 | return; | ||
410 | } | ||
411 | |||
412 | if (status && mcast->logcount++ < 20) { | ||
413 | if (status == -ETIMEDOUT || status == -EINTR) { | ||
414 | ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT | ||
415 | ", status %d\n", | ||
416 | IPOIB_GID_ARG(mcast->mcmember.mgid), | ||
417 | status); | ||
418 | } else { | ||
419 | ipoib_warn(priv, "multicast join failed for " | ||
420 | IPOIB_GID_FMT ", status %d\n", | ||
421 | IPOIB_GID_ARG(mcast->mcmember.mgid), | ||
422 | status); | ||
423 | } | ||
424 | } | ||
425 | |||
426 | mcast->backoff *= 2; | ||
427 | if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) | ||
428 | mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; | ||
429 | |||
430 | mcast->query = NULL; | ||
431 | |||
432 | down(&mcast_mutex); | ||
433 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) { | ||
434 | if (status == -ETIMEDOUT) | ||
435 | queue_work(ipoib_workqueue, &priv->mcast_task); | ||
436 | else | ||
437 | queue_delayed_work(ipoib_workqueue, &priv->mcast_task, | ||
438 | mcast->backoff * HZ); | ||
439 | } else | ||
440 | complete(&mcast->done); | ||
441 | up(&mcast_mutex); | ||
442 | |||
443 | return; | ||
444 | } | ||
445 | |||
446 | static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, | ||
447 | int create) | ||
448 | { | ||
449 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
450 | struct ib_sa_mcmember_rec rec = { | ||
451 | .join_state = 1 | ||
452 | }; | ||
453 | ib_sa_comp_mask comp_mask; | ||
454 | int ret = 0; | ||
455 | |||
456 | ipoib_dbg_mcast(priv, "joining MGID " IPOIB_GID_FMT "\n", | ||
457 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | ||
458 | |||
459 | rec.mgid = mcast->mcmember.mgid; | ||
460 | rec.port_gid = priv->local_gid; | ||
461 | rec.pkey = be16_to_cpu(priv->pkey); | ||
462 | |||
463 | comp_mask = | ||
464 | IB_SA_MCMEMBER_REC_MGID | | ||
465 | IB_SA_MCMEMBER_REC_PORT_GID | | ||
466 | IB_SA_MCMEMBER_REC_PKEY | | ||
467 | IB_SA_MCMEMBER_REC_JOIN_STATE; | ||
468 | |||
469 | if (create) { | ||
470 | comp_mask |= | ||
471 | IB_SA_MCMEMBER_REC_QKEY | | ||
472 | IB_SA_MCMEMBER_REC_SL | | ||
473 | IB_SA_MCMEMBER_REC_FLOW_LABEL | | ||
474 | IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; | ||
475 | |||
476 | rec.qkey = priv->broadcast->mcmember.qkey; | ||
477 | rec.sl = priv->broadcast->mcmember.sl; | ||
478 | rec.flow_label = priv->broadcast->mcmember.flow_label; | ||
479 | rec.traffic_class = priv->broadcast->mcmember.traffic_class; | ||
480 | } | ||
481 | |||
482 | ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, comp_mask, | ||
483 | mcast->backoff * 1000, GFP_ATOMIC, | ||
484 | ipoib_mcast_join_complete, | ||
485 | mcast, &mcast->query); | ||
486 | |||
487 | if (ret < 0) { | ||
488 | ipoib_warn(priv, "ib_sa_mcmember_rec_set failed, status %d\n", ret); | ||
489 | |||
490 | mcast->backoff *= 2; | ||
491 | if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) | ||
492 | mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; | ||
493 | |||
494 | down(&mcast_mutex); | ||
495 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) | ||
496 | queue_delayed_work(ipoib_workqueue, | ||
497 | &priv->mcast_task, | ||
498 | mcast->backoff); | ||
499 | up(&mcast_mutex); | ||
500 | } else | ||
501 | mcast->query_id = ret; | ||
502 | } | ||
503 | |||
504 | void ipoib_mcast_join_task(void *dev_ptr) | ||
505 | { | ||
506 | struct net_device *dev = dev_ptr; | ||
507 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
508 | |||
509 | if (!test_bit(IPOIB_MCAST_RUN, &priv->flags)) | ||
510 | return; | ||
511 | |||
512 | if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) | ||
513 | ipoib_warn(priv, "ib_gid_entry_get() failed\n"); | ||
514 | else | ||
515 | memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); | ||
516 | |||
517 | { | ||
518 | struct ib_port_attr attr; | ||
519 | |||
520 | if (!ib_query_port(priv->ca, priv->port, &attr)) { | ||
521 | priv->local_lid = attr.lid; | ||
522 | priv->local_rate = attr.active_speed * | ||
523 | ib_width_enum_to_int(attr.active_width); | ||
524 | } else | ||
525 | ipoib_warn(priv, "ib_query_port failed\n"); | ||
526 | } | ||
527 | |||
528 | if (!priv->broadcast) { | ||
529 | priv->broadcast = ipoib_mcast_alloc(dev, 1); | ||
530 | if (!priv->broadcast) { | ||
531 | ipoib_warn(priv, "failed to allocate broadcast group\n"); | ||
532 | down(&mcast_mutex); | ||
533 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) | ||
534 | queue_delayed_work(ipoib_workqueue, | ||
535 | &priv->mcast_task, HZ); | ||
536 | up(&mcast_mutex); | ||
537 | return; | ||
538 | } | ||
539 | |||
540 | memcpy(priv->broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4, | ||
541 | sizeof (union ib_gid)); | ||
542 | |||
543 | spin_lock_irq(&priv->lock); | ||
544 | __ipoib_mcast_add(dev, priv->broadcast); | ||
545 | spin_unlock_irq(&priv->lock); | ||
546 | } | ||
547 | |||
548 | if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { | ||
549 | ipoib_mcast_join(dev, priv->broadcast, 0); | ||
550 | return; | ||
551 | } | ||
552 | |||
553 | while (1) { | ||
554 | struct ipoib_mcast *mcast = NULL; | ||
555 | |||
556 | spin_lock_irq(&priv->lock); | ||
557 | list_for_each_entry(mcast, &priv->multicast_list, list) { | ||
558 | if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) | ||
559 | && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) | ||
560 | && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { | ||
561 | /* Found the next unjoined group */ | ||
562 | break; | ||
563 | } | ||
564 | } | ||
565 | spin_unlock_irq(&priv->lock); | ||
566 | |||
567 | if (&mcast->list == &priv->multicast_list) { | ||
568 | /* All done */ | ||
569 | break; | ||
570 | } | ||
571 | |||
572 | ipoib_mcast_join(dev, mcast, 1); | ||
573 | return; | ||
574 | } | ||
575 | |||
576 | priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) - | ||
577 | IPOIB_ENCAP_LEN; | ||
578 | dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); | ||
579 | |||
580 | ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); | ||
581 | |||
582 | clear_bit(IPOIB_MCAST_RUN, &priv->flags); | ||
583 | netif_carrier_on(dev); | ||
584 | } | ||
585 | |||
586 | int ipoib_mcast_start_thread(struct net_device *dev) | ||
587 | { | ||
588 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
589 | |||
590 | ipoib_dbg_mcast(priv, "starting multicast thread\n"); | ||
591 | |||
592 | down(&mcast_mutex); | ||
593 | if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags)) | ||
594 | queue_work(ipoib_workqueue, &priv->mcast_task); | ||
595 | up(&mcast_mutex); | ||
596 | |||
597 | return 0; | ||
598 | } | ||
599 | |||
600 | int ipoib_mcast_stop_thread(struct net_device *dev) | ||
601 | { | ||
602 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
603 | struct ipoib_mcast *mcast; | ||
604 | |||
605 | ipoib_dbg_mcast(priv, "stopping multicast thread\n"); | ||
606 | |||
607 | down(&mcast_mutex); | ||
608 | clear_bit(IPOIB_MCAST_RUN, &priv->flags); | ||
609 | cancel_delayed_work(&priv->mcast_task); | ||
610 | up(&mcast_mutex); | ||
611 | |||
612 | flush_workqueue(ipoib_workqueue); | ||
613 | |||
614 | if (priv->broadcast && priv->broadcast->query) { | ||
615 | ib_sa_cancel_query(priv->broadcast->query_id, priv->broadcast->query); | ||
616 | priv->broadcast->query = NULL; | ||
617 | ipoib_dbg_mcast(priv, "waiting for bcast\n"); | ||
618 | wait_for_completion(&priv->broadcast->done); | ||
619 | } | ||
620 | |||
621 | list_for_each_entry(mcast, &priv->multicast_list, list) { | ||
622 | if (mcast->query) { | ||
623 | ib_sa_cancel_query(mcast->query_id, mcast->query); | ||
624 | mcast->query = NULL; | ||
625 | ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n", | ||
626 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | ||
627 | wait_for_completion(&mcast->done); | ||
628 | } | ||
629 | } | ||
630 | |||
631 | return 0; | ||
632 | } | ||
633 | |||
634 | static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) | ||
635 | { | ||
636 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
637 | struct ib_sa_mcmember_rec rec = { | ||
638 | .join_state = 1 | ||
639 | }; | ||
640 | int ret = 0; | ||
641 | |||
642 | if (!test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) | ||
643 | return 0; | ||
644 | |||
645 | ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n", | ||
646 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | ||
647 | |||
648 | rec.mgid = mcast->mcmember.mgid; | ||
649 | rec.port_gid = priv->local_gid; | ||
650 | rec.pkey = be16_to_cpu(priv->pkey); | ||
651 | |||
652 | /* Remove ourselves from the multicast group */ | ||
653 | ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid), | ||
654 | &mcast->mcmember.mgid); | ||
655 | if (ret) | ||
656 | ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret); | ||
657 | |||
658 | /* | ||
659 | * Just make one shot at leaving and don't wait for a reply; | ||
660 | * if we fail, too bad. | ||
661 | */ | ||
662 | ret = ib_sa_mcmember_rec_delete(priv->ca, priv->port, &rec, | ||
663 | IB_SA_MCMEMBER_REC_MGID | | ||
664 | IB_SA_MCMEMBER_REC_PORT_GID | | ||
665 | IB_SA_MCMEMBER_REC_PKEY | | ||
666 | IB_SA_MCMEMBER_REC_JOIN_STATE, | ||
667 | 0, GFP_ATOMIC, NULL, | ||
668 | mcast, &mcast->query); | ||
669 | if (ret < 0) | ||
670 | ipoib_warn(priv, "ib_sa_mcmember_rec_delete failed " | ||
671 | "for leave (result = %d)\n", ret); | ||
672 | |||
673 | return 0; | ||
674 | } | ||
675 | |||
676 | void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid, | ||
677 | struct sk_buff *skb) | ||
678 | { | ||
679 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
680 | struct ipoib_mcast *mcast; | ||
681 | |||
682 | /* | ||
683 | * We can only be called from ipoib_start_xmit, so we're | ||
684 | * inside tx_lock -- no need to save/restore flags. | ||
685 | */ | ||
686 | spin_lock(&priv->lock); | ||
687 | |||
688 | mcast = __ipoib_mcast_find(dev, mgid); | ||
689 | if (!mcast) { | ||
690 | /* Let's create a new send only group now */ | ||
691 | ipoib_dbg_mcast(priv, "setting up send only multicast group for " | ||
692 | IPOIB_GID_FMT "\n", IPOIB_GID_ARG(*mgid)); | ||
693 | |||
694 | mcast = ipoib_mcast_alloc(dev, 0); | ||
695 | if (!mcast) { | ||
696 | ipoib_warn(priv, "unable to allocate memory for " | ||
697 | "multicast structure\n"); | ||
698 | dev_kfree_skb_any(skb); | ||
699 | goto out; | ||
700 | } | ||
701 | |||
702 | set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); | ||
703 | mcast->mcmember.mgid = *mgid; | ||
704 | __ipoib_mcast_add(dev, mcast); | ||
705 | list_add_tail(&mcast->list, &priv->multicast_list); | ||
706 | } | ||
707 | |||
708 | if (!mcast->ah) { | ||
709 | if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) | ||
710 | skb_queue_tail(&mcast->pkt_queue, skb); | ||
711 | else | ||
712 | dev_kfree_skb_any(skb); | ||
713 | |||
714 | if (mcast->query) | ||
715 | ipoib_dbg_mcast(priv, "no address vector, " | ||
716 | "but multicast join already started\n"); | ||
717 | else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) | ||
718 | ipoib_mcast_sendonly_join(mcast); | ||
719 | |||
720 | /* | ||
721 | * If lookup completes between here and out:, don't | ||
722 | * want to send packet twice. | ||
723 | */ | ||
724 | mcast = NULL; | ||
725 | } | ||
726 | |||
727 | out: | ||
728 | if (mcast && mcast->ah) { | ||
729 | if (skb->dst && | ||
730 | skb->dst->neighbour && | ||
731 | !*to_ipoib_neigh(skb->dst->neighbour)) { | ||
732 | struct ipoib_neigh *neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); | ||
733 | |||
734 | if (neigh) { | ||
735 | kref_get(&mcast->ah->ref); | ||
736 | neigh->ah = mcast->ah; | ||
737 | neigh->neighbour = skb->dst->neighbour; | ||
738 | *to_ipoib_neigh(skb->dst->neighbour) = neigh; | ||
739 | list_add_tail(&neigh->list, &mcast->neigh_list); | ||
740 | } | ||
741 | } | ||
742 | |||
743 | ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); | ||
744 | } | ||
745 | |||
746 | spin_unlock(&priv->lock); | ||
747 | } | ||
748 | |||
749 | void ipoib_mcast_dev_flush(struct net_device *dev) | ||
750 | { | ||
751 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
752 | LIST_HEAD(remove_list); | ||
753 | struct ipoib_mcast *mcast, *tmcast, *nmcast; | ||
754 | unsigned long flags; | ||
755 | |||
756 | ipoib_dbg_mcast(priv, "flushing multicast list\n"); | ||
757 | |||
758 | spin_lock_irqsave(&priv->lock, flags); | ||
759 | list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { | ||
760 | nmcast = ipoib_mcast_alloc(dev, 0); | ||
761 | if (nmcast) { | ||
762 | nmcast->flags = | ||
763 | mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY); | ||
764 | |||
765 | nmcast->mcmember.mgid = mcast->mcmember.mgid; | ||
766 | |||
767 | /* Add the new group in before the to-be-destroyed group */ | ||
768 | list_add_tail(&nmcast->list, &mcast->list); | ||
769 | list_del_init(&mcast->list); | ||
770 | |||
771 | rb_replace_node(&mcast->rb_node, &nmcast->rb_node, | ||
772 | &priv->multicast_tree); | ||
773 | |||
774 | list_add_tail(&mcast->list, &remove_list); | ||
775 | } else { | ||
776 | ipoib_warn(priv, "could not reallocate multicast group " | ||
777 | IPOIB_GID_FMT "\n", | ||
778 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | ||
779 | } | ||
780 | } | ||
781 | |||
782 | if (priv->broadcast) { | ||
783 | nmcast = ipoib_mcast_alloc(dev, 0); | ||
784 | if (nmcast) { | ||
785 | nmcast->mcmember.mgid = priv->broadcast->mcmember.mgid; | ||
786 | |||
787 | rb_replace_node(&priv->broadcast->rb_node, | ||
788 | &nmcast->rb_node, | ||
789 | &priv->multicast_tree); | ||
790 | |||
791 | list_add_tail(&priv->broadcast->list, &remove_list); | ||
792 | } | ||
793 | |||
794 | priv->broadcast = nmcast; | ||
795 | } | ||
796 | |||
797 | spin_unlock_irqrestore(&priv->lock, flags); | ||
798 | |||
799 | list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { | ||
800 | ipoib_mcast_leave(dev, mcast); | ||
801 | ipoib_mcast_free(mcast); | ||
802 | } | ||
803 | } | ||
804 | |||
805 | void ipoib_mcast_dev_down(struct net_device *dev) | ||
806 | { | ||
807 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
808 | unsigned long flags; | ||
809 | |||
810 | /* Delete broadcast since it will be recreated */ | ||
811 | if (priv->broadcast) { | ||
812 | ipoib_dbg_mcast(priv, "deleting broadcast group\n"); | ||
813 | |||
814 | spin_lock_irqsave(&priv->lock, flags); | ||
815 | rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree); | ||
816 | spin_unlock_irqrestore(&priv->lock, flags); | ||
817 | ipoib_mcast_leave(dev, priv->broadcast); | ||
818 | ipoib_mcast_free(priv->broadcast); | ||
819 | priv->broadcast = NULL; | ||
820 | } | ||
821 | } | ||
822 | |||
823 | void ipoib_mcast_restart_task(void *dev_ptr) | ||
824 | { | ||
825 | struct net_device *dev = dev_ptr; | ||
826 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
827 | struct dev_mc_list *mclist; | ||
828 | struct ipoib_mcast *mcast, *tmcast; | ||
829 | LIST_HEAD(remove_list); | ||
830 | unsigned long flags; | ||
831 | |||
832 | ipoib_dbg_mcast(priv, "restarting multicast task\n"); | ||
833 | |||
834 | ipoib_mcast_stop_thread(dev); | ||
835 | |||
836 | spin_lock_irqsave(&priv->lock, flags); | ||
837 | |||
838 | /* | ||
839 | * Unfortunately, the networking core only gives us a list of all of | ||
840 | * the multicast hardware addresses. We need to figure out which ones | ||
841 | * are new and which ones have been removed | ||
842 | */ | ||
843 | |||
844 | /* Clear out the found flag */ | ||
845 | list_for_each_entry(mcast, &priv->multicast_list, list) | ||
846 | clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); | ||
847 | |||
848 | /* Mark all of the entries that are found or don't exist */ | ||
849 | for (mclist = dev->mc_list; mclist; mclist = mclist->next) { | ||
850 | union ib_gid mgid; | ||
851 | |||
852 | memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid); | ||
853 | |||
854 | /* Add in the P_Key */ | ||
855 | mgid.raw[4] = (priv->pkey >> 8) & 0xff; | ||
856 | mgid.raw[5] = priv->pkey & 0xff; | ||
857 | |||
858 | mcast = __ipoib_mcast_find(dev, &mgid); | ||
859 | if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { | ||
860 | struct ipoib_mcast *nmcast; | ||
861 | |||
862 | /* Not found or send-only group, let's add a new entry */ | ||
863 | ipoib_dbg_mcast(priv, "adding multicast entry for mgid " | ||
864 | IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid)); | ||
865 | |||
866 | nmcast = ipoib_mcast_alloc(dev, 0); | ||
867 | if (!nmcast) { | ||
868 | ipoib_warn(priv, "unable to allocate memory for multicast structure\n"); | ||
869 | continue; | ||
870 | } | ||
871 | |||
872 | set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags); | ||
873 | |||
874 | nmcast->mcmember.mgid = mgid; | ||
875 | |||
876 | if (mcast) { | ||
877 | /* Destroy the send only entry */ | ||
878 | list_del(&mcast->list); | ||
879 | list_add_tail(&mcast->list, &remove_list); | ||
880 | |||
881 | rb_replace_node(&mcast->rb_node, | ||
882 | &nmcast->rb_node, | ||
883 | &priv->multicast_tree); | ||
884 | } else | ||
885 | __ipoib_mcast_add(dev, nmcast); | ||
886 | |||
887 | list_add_tail(&nmcast->list, &priv->multicast_list); | ||
888 | } | ||
889 | |||
890 | if (mcast) | ||
891 | set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); | ||
892 | } | ||
893 | |||
894 | /* Remove all of the entries don't exist anymore */ | ||
895 | list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { | ||
896 | if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) && | ||
897 | !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { | ||
898 | ipoib_dbg_mcast(priv, "deleting multicast group " IPOIB_GID_FMT "\n", | ||
899 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | ||
900 | |||
901 | rb_erase(&mcast->rb_node, &priv->multicast_tree); | ||
902 | |||
903 | /* Move to the remove list */ | ||
904 | list_del(&mcast->list); | ||
905 | list_add_tail(&mcast->list, &remove_list); | ||
906 | } | ||
907 | } | ||
908 | spin_unlock_irqrestore(&priv->lock, flags); | ||
909 | |||
910 | /* We have to cancel outside of the spinlock */ | ||
911 | list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { | ||
912 | ipoib_mcast_leave(mcast->dev, mcast); | ||
913 | ipoib_mcast_free(mcast); | ||
914 | } | ||
915 | |||
916 | if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) | ||
917 | ipoib_mcast_start_thread(dev); | ||
918 | } | ||
919 | |||
920 | struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev) | ||
921 | { | ||
922 | struct ipoib_mcast_iter *iter; | ||
923 | |||
924 | iter = kmalloc(sizeof *iter, GFP_KERNEL); | ||
925 | if (!iter) | ||
926 | return NULL; | ||
927 | |||
928 | iter->dev = dev; | ||
929 | memset(iter->mgid.raw, 0, sizeof iter->mgid); | ||
930 | |||
931 | if (ipoib_mcast_iter_next(iter)) { | ||
932 | ipoib_mcast_iter_free(iter); | ||
933 | return NULL; | ||
934 | } | ||
935 | |||
936 | return iter; | ||
937 | } | ||
938 | |||
939 | void ipoib_mcast_iter_free(struct ipoib_mcast_iter *iter) | ||
940 | { | ||
941 | kfree(iter); | ||
942 | } | ||
943 | |||
944 | int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter) | ||
945 | { | ||
946 | struct ipoib_dev_priv *priv = netdev_priv(iter->dev); | ||
947 | struct rb_node *n; | ||
948 | struct ipoib_mcast *mcast; | ||
949 | int ret = 1; | ||
950 | |||
951 | spin_lock_irq(&priv->lock); | ||
952 | |||
953 | n = rb_first(&priv->multicast_tree); | ||
954 | |||
955 | while (n) { | ||
956 | mcast = rb_entry(n, struct ipoib_mcast, rb_node); | ||
957 | |||
958 | if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw, | ||
959 | sizeof (union ib_gid)) < 0) { | ||
960 | iter->mgid = mcast->mcmember.mgid; | ||
961 | iter->created = mcast->created; | ||
962 | iter->queuelen = skb_queue_len(&mcast->pkt_queue); | ||
963 | iter->complete = !!mcast->ah; | ||
964 | iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY)); | ||
965 | |||
966 | ret = 0; | ||
967 | |||
968 | break; | ||
969 | } | ||
970 | |||
971 | n = rb_next(n); | ||
972 | } | ||
973 | |||
974 | spin_unlock_irq(&priv->lock); | ||
975 | |||
976 | return ret; | ||
977 | } | ||
978 | |||
979 | void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter, | ||
980 | union ib_gid *mgid, | ||
981 | unsigned long *created, | ||
982 | unsigned int *queuelen, | ||
983 | unsigned int *complete, | ||
984 | unsigned int *send_only) | ||
985 | { | ||
986 | *mgid = iter->mgid; | ||
987 | *created = iter->created; | ||
988 | *queuelen = iter->queuelen; | ||
989 | *complete = iter->complete; | ||
990 | *send_only = iter->send_only; | ||
991 | } | ||