diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-02-14 00:16:39 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-02-14 00:16:39 -0500 |
commit | 93bbad8fe13a25dcf7f3bc628a71d1a7642ae61b (patch) | |
tree | 0f8fa56347979a3d2dae89f905d134d191d5c88a /drivers/infiniband/ulp | |
parent | 9468482bd4c3b89abe04a770848d5eaa1ea830b0 (diff) | |
parent | b2875d4c39759a732203db32f245cc6d8bbdd7cf (diff) |
Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband
* 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband:
IB/mthca: Always fill MTTs from CPU
IB/mthca: Merge MR and FMR space on 64-bit systems
IB/mthca: Fix access to MTT and MPT tables on non-cache-coherent CPUs
IB/mthca: Give reserved MTTs a separate cache line
IB/mthca: Fix reserved MTTs calculation on mem-free HCAs
RDMA/cxgb3: Add driver for Chelsio T3 RNIC
IB: Remove redundant "_wq" from workqueue names
RDMA/cma: Increment port number after close to avoid re-use
IB/ehca: Fix memleak on module unloading
IB/mthca: Work around gcc bug on sparc64
IPoIB: Connected mode experimental support
IB/core: Use ARRAY_SIZE macro for mandatory_table
IB/mthca: Use correct structure size in call to memset()
Diffstat (limited to 'drivers/infiniband/ulp')
-rw-r--r-- | drivers/infiniband/ulp/ipoib/Kconfig | 16 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/Makefile | 1 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib.h | 215 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_cm.c | 1237 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_ib.c | 29 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_main.c | 63 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 40 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 2 |
9 files changed, 1575 insertions, 32 deletions
diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig index c75322d820d4..af78ccc4ce71 100644 --- a/drivers/infiniband/ulp/ipoib/Kconfig +++ b/drivers/infiniband/ulp/ipoib/Kconfig | |||
@@ -1,6 +1,6 @@ | |||
1 | config INFINIBAND_IPOIB | 1 | config INFINIBAND_IPOIB |
2 | tristate "IP-over-InfiniBand" | 2 | tristate "IP-over-InfiniBand" |
3 | depends on INFINIBAND && NETDEVICES && INET | 3 | depends on INFINIBAND && NETDEVICES && INET && (IPV6 || IPV6=n) |
4 | ---help--- | 4 | ---help--- |
5 | Support for the IP-over-InfiniBand protocol (IPoIB). This | 5 | Support for the IP-over-InfiniBand protocol (IPoIB). This |
6 | transports IP packets over InfiniBand so you can use your IB | 6 | transports IP packets over InfiniBand so you can use your IB |
@@ -8,6 +8,20 @@ config INFINIBAND_IPOIB | |||
8 | 8 | ||
9 | See Documentation/infiniband/ipoib.txt for more information | 9 | See Documentation/infiniband/ipoib.txt for more information |
10 | 10 | ||
11 | config INFINIBAND_IPOIB_CM | ||
12 | bool "IP-over-InfiniBand Connected Mode support" | ||
13 | depends on INFINIBAND_IPOIB && EXPERIMENTAL | ||
14 | default n | ||
15 | ---help--- | ||
16 | This option enables experimental support for IPoIB connected mode. | ||
17 | After enabling this option, you need to switch to connected mode through | ||
18 | /sys/class/net/ibXXX/mode to actually create connections, and then increase | ||
19 | the interface MTU with e.g. ifconfig ib0 mtu 65520. | ||
20 | |||
21 | WARNING: Enabling connected mode will trigger some | ||
22 | packet drops for multicast and UD mode traffic from this interface, | ||
23 | unless you limit mtu for these destinations to 2044. | ||
24 | |||
11 | config INFINIBAND_IPOIB_DEBUG | 25 | config INFINIBAND_IPOIB_DEBUG |
12 | bool "IP-over-InfiniBand debugging" if EMBEDDED | 26 | bool "IP-over-InfiniBand debugging" if EMBEDDED |
13 | depends on INFINIBAND_IPOIB | 27 | depends on INFINIBAND_IPOIB |
diff --git a/drivers/infiniband/ulp/ipoib/Makefile b/drivers/infiniband/ulp/ipoib/Makefile index 8935e74ae3f8..98ee38e8c2c4 100644 --- a/drivers/infiniband/ulp/ipoib/Makefile +++ b/drivers/infiniband/ulp/ipoib/Makefile | |||
@@ -5,5 +5,6 @@ ib_ipoib-y := ipoib_main.o \ | |||
5 | ipoib_multicast.o \ | 5 | ipoib_multicast.o \ |
6 | ipoib_verbs.o \ | 6 | ipoib_verbs.o \ |
7 | ipoib_vlan.o | 7 | ipoib_vlan.o |
8 | ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_CM) += ipoib_cm.o | ||
8 | ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG) += ipoib_fs.o | 9 | ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG) += ipoib_fs.o |
9 | 10 | ||
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 07deee8f81ce..2594db2030b3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h | |||
@@ -62,6 +62,10 @@ enum { | |||
62 | 62 | ||
63 | IPOIB_ENCAP_LEN = 4, | 63 | IPOIB_ENCAP_LEN = 4, |
64 | 64 | ||
65 | IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */ | ||
66 | IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, | ||
67 | IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, | ||
68 | IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE, | ||
65 | IPOIB_RX_RING_SIZE = 128, | 69 | IPOIB_RX_RING_SIZE = 128, |
66 | IPOIB_TX_RING_SIZE = 64, | 70 | IPOIB_TX_RING_SIZE = 64, |
67 | IPOIB_MAX_QUEUE_SIZE = 8192, | 71 | IPOIB_MAX_QUEUE_SIZE = 8192, |
@@ -81,6 +85,8 @@ enum { | |||
81 | IPOIB_MCAST_RUN = 6, | 85 | IPOIB_MCAST_RUN = 6, |
82 | IPOIB_STOP_REAPER = 7, | 86 | IPOIB_STOP_REAPER = 7, |
83 | IPOIB_MCAST_STARTED = 8, | 87 | IPOIB_MCAST_STARTED = 8, |
88 | IPOIB_FLAG_NETIF_STOPPED = 9, | ||
89 | IPOIB_FLAG_ADMIN_CM = 10, | ||
84 | 90 | ||
85 | IPOIB_MAX_BACKOFF_SECONDS = 16, | 91 | IPOIB_MAX_BACKOFF_SECONDS = 16, |
86 | 92 | ||
@@ -90,6 +96,13 @@ enum { | |||
90 | IPOIB_MCAST_FLAG_ATTACHED = 3, | 96 | IPOIB_MCAST_FLAG_ATTACHED = 3, |
91 | }; | 97 | }; |
92 | 98 | ||
99 | #define IPOIB_OP_RECV (1ul << 31) | ||
100 | #ifdef CONFIG_INFINIBAND_IPOIB_CM | ||
101 | #define IPOIB_CM_OP_SRQ (1ul << 30) | ||
102 | #else | ||
103 | #define IPOIB_CM_OP_SRQ (0) | ||
104 | #endif | ||
105 | |||
93 | /* structs */ | 106 | /* structs */ |
94 | 107 | ||
95 | struct ipoib_header { | 108 | struct ipoib_header { |
@@ -113,6 +126,59 @@ struct ipoib_tx_buf { | |||
113 | u64 mapping; | 126 | u64 mapping; |
114 | }; | 127 | }; |
115 | 128 | ||
129 | struct ib_cm_id; | ||
130 | |||
131 | struct ipoib_cm_data { | ||
132 | __be32 qpn; /* High byte MUST be ignored on receive */ | ||
133 | __be32 mtu; | ||
134 | }; | ||
135 | |||
136 | struct ipoib_cm_rx { | ||
137 | struct ib_cm_id *id; | ||
138 | struct ib_qp *qp; | ||
139 | struct list_head list; | ||
140 | struct net_device *dev; | ||
141 | unsigned long jiffies; | ||
142 | }; | ||
143 | |||
144 | struct ipoib_cm_tx { | ||
145 | struct ib_cm_id *id; | ||
146 | struct ib_cq *cq; | ||
147 | struct ib_qp *qp; | ||
148 | struct list_head list; | ||
149 | struct net_device *dev; | ||
150 | struct ipoib_neigh *neigh; | ||
151 | struct ipoib_path *path; | ||
152 | struct ipoib_tx_buf *tx_ring; | ||
153 | unsigned tx_head; | ||
154 | unsigned tx_tail; | ||
155 | unsigned long flags; | ||
156 | u32 mtu; | ||
157 | struct ib_wc ibwc[IPOIB_NUM_WC]; | ||
158 | }; | ||
159 | |||
160 | struct ipoib_cm_rx_buf { | ||
161 | struct sk_buff *skb; | ||
162 | u64 mapping[IPOIB_CM_RX_SG]; | ||
163 | }; | ||
164 | |||
165 | struct ipoib_cm_dev_priv { | ||
166 | struct ib_srq *srq; | ||
167 | struct ipoib_cm_rx_buf *srq_ring; | ||
168 | struct ib_cm_id *id; | ||
169 | struct list_head passive_ids; | ||
170 | struct work_struct start_task; | ||
171 | struct work_struct reap_task; | ||
172 | struct work_struct skb_task; | ||
173 | struct delayed_work stale_task; | ||
174 | struct sk_buff_head skb_queue; | ||
175 | struct list_head start_list; | ||
176 | struct list_head reap_list; | ||
177 | struct ib_wc ibwc[IPOIB_NUM_WC]; | ||
178 | struct ib_sge rx_sge[IPOIB_CM_RX_SG]; | ||
179 | struct ib_recv_wr rx_wr; | ||
180 | }; | ||
181 | |||
116 | /* | 182 | /* |
117 | * Device private locking: tx_lock protects members used in TX fast | 183 | * Device private locking: tx_lock protects members used in TX fast |
118 | * path (and we use LLTX so upper layers don't do extra locking). | 184 | * path (and we use LLTX so upper layers don't do extra locking). |
@@ -179,6 +245,10 @@ struct ipoib_dev_priv { | |||
179 | struct list_head child_intfs; | 245 | struct list_head child_intfs; |
180 | struct list_head list; | 246 | struct list_head list; |
181 | 247 | ||
248 | #ifdef CONFIG_INFINIBAND_IPOIB_CM | ||
249 | struct ipoib_cm_dev_priv cm; | ||
250 | #endif | ||
251 | |||
182 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG | 252 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG |
183 | struct list_head fs_list; | 253 | struct list_head fs_list; |
184 | struct dentry *mcg_dentry; | 254 | struct dentry *mcg_dentry; |
@@ -212,6 +282,9 @@ struct ipoib_path { | |||
212 | 282 | ||
213 | struct ipoib_neigh { | 283 | struct ipoib_neigh { |
214 | struct ipoib_ah *ah; | 284 | struct ipoib_ah *ah; |
285 | #ifdef CONFIG_INFINIBAND_IPOIB_CM | ||
286 | struct ipoib_cm_tx *cm; | ||
287 | #endif | ||
215 | union ib_gid dgid; | 288 | union ib_gid dgid; |
216 | struct sk_buff_head queue; | 289 | struct sk_buff_head queue; |
217 | 290 | ||
@@ -315,6 +388,146 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey); | |||
315 | void ipoib_pkey_poll(struct work_struct *work); | 388 | void ipoib_pkey_poll(struct work_struct *work); |
316 | int ipoib_pkey_dev_delay_open(struct net_device *dev); | 389 | int ipoib_pkey_dev_delay_open(struct net_device *dev); |
317 | 390 | ||
391 | #ifdef CONFIG_INFINIBAND_IPOIB_CM | ||
392 | |||
393 | #define IPOIB_FLAGS_RC 0x80 | ||
394 | #define IPOIB_FLAGS_UC 0x40 | ||
395 | |||
396 | /* We don't support UC connections at the moment */ | ||
397 | #define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC)) | ||
398 | |||
399 | static inline int ipoib_cm_admin_enabled(struct net_device *dev) | ||
400 | { | ||
401 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
402 | return IPOIB_CM_SUPPORTED(dev->dev_addr) && | ||
403 | test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); | ||
404 | } | ||
405 | |||
406 | static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n) | ||
407 | { | ||
408 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
409 | return IPOIB_CM_SUPPORTED(n->ha) && | ||
410 | test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); | ||
411 | } | ||
412 | |||
413 | static inline int ipoib_cm_up(struct ipoib_neigh *neigh) | ||
414 | |||
415 | { | ||
416 | return test_bit(IPOIB_FLAG_OPER_UP, &neigh->cm->flags); | ||
417 | } | ||
418 | |||
419 | static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh) | ||
420 | { | ||
421 | return neigh->cm; | ||
422 | } | ||
423 | |||
424 | static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx) | ||
425 | { | ||
426 | neigh->cm = tx; | ||
427 | } | ||
428 | |||
429 | void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx); | ||
430 | int ipoib_cm_dev_open(struct net_device *dev); | ||
431 | void ipoib_cm_dev_stop(struct net_device *dev); | ||
432 | int ipoib_cm_dev_init(struct net_device *dev); | ||
433 | int ipoib_cm_add_mode_attr(struct net_device *dev); | ||
434 | void ipoib_cm_dev_cleanup(struct net_device *dev); | ||
435 | struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, | ||
436 | struct ipoib_neigh *neigh); | ||
437 | void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx); | ||
438 | void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, | ||
439 | unsigned int mtu); | ||
440 | void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc); | ||
441 | #else | ||
442 | |||
443 | struct ipoib_cm_tx; | ||
444 | |||
445 | static inline int ipoib_cm_admin_enabled(struct net_device *dev) | ||
446 | { | ||
447 | return 0; | ||
448 | } | ||
449 | static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n) | ||
450 | |||
451 | { | ||
452 | return 0; | ||
453 | } | ||
454 | |||
455 | static inline int ipoib_cm_up(struct ipoib_neigh *neigh) | ||
456 | |||
457 | { | ||
458 | return 0; | ||
459 | } | ||
460 | |||
461 | static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh) | ||
462 | { | ||
463 | return NULL; | ||
464 | } | ||
465 | |||
466 | static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx) | ||
467 | { | ||
468 | } | ||
469 | |||
470 | static inline | ||
471 | void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) | ||
472 | { | ||
473 | return; | ||
474 | } | ||
475 | |||
476 | static inline | ||
477 | int ipoib_cm_dev_open(struct net_device *dev) | ||
478 | { | ||
479 | return 0; | ||
480 | } | ||
481 | |||
482 | static inline | ||
483 | void ipoib_cm_dev_stop(struct net_device *dev) | ||
484 | { | ||
485 | return; | ||
486 | } | ||
487 | |||
488 | static inline | ||
489 | int ipoib_cm_dev_init(struct net_device *dev) | ||
490 | { | ||
491 | return -ENOSYS; | ||
492 | } | ||
493 | |||
494 | static inline | ||
495 | void ipoib_cm_dev_cleanup(struct net_device *dev) | ||
496 | { | ||
497 | return; | ||
498 | } | ||
499 | |||
500 | static inline | ||
501 | struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, | ||
502 | struct ipoib_neigh *neigh) | ||
503 | { | ||
504 | return NULL; | ||
505 | } | ||
506 | |||
507 | static inline | ||
508 | void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) | ||
509 | { | ||
510 | return; | ||
511 | } | ||
512 | |||
513 | static inline | ||
514 | int ipoib_cm_add_mode_attr(struct net_device *dev) | ||
515 | { | ||
516 | return 0; | ||
517 | } | ||
518 | |||
519 | static inline void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, | ||
520 | unsigned int mtu) | ||
521 | { | ||
522 | dev_kfree_skb_any(skb); | ||
523 | } | ||
524 | |||
525 | static inline void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) | ||
526 | { | ||
527 | } | ||
528 | |||
529 | #endif | ||
530 | |||
318 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG | 531 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG |
319 | void ipoib_create_debug_files(struct net_device *dev); | 532 | void ipoib_create_debug_files(struct net_device *dev); |
320 | void ipoib_delete_debug_files(struct net_device *dev); | 533 | void ipoib_delete_debug_files(struct net_device *dev); |
@@ -392,4 +605,6 @@ extern int ipoib_debug_level; | |||
392 | 605 | ||
393 | #define IPOIB_GID_ARG(gid) IPOIB_GID_RAW_ARG((gid).raw) | 606 | #define IPOIB_GID_ARG(gid) IPOIB_GID_RAW_ARG((gid).raw) |
394 | 607 | ||
608 | #define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff) | ||
609 | |||
395 | #endif /* _IPOIB_H */ | 610 | #endif /* _IPOIB_H */ |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c new file mode 100644 index 000000000000..2d483874a589 --- /dev/null +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c | |||
@@ -0,0 +1,1237 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Mellanox Technologies. All rights reserved | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | * $Id$ | ||
33 | */ | ||
34 | |||
35 | #include <rdma/ib_cm.h> | ||
36 | #include <rdma/ib_cache.h> | ||
37 | #include <net/dst.h> | ||
38 | #include <net/icmp.h> | ||
39 | #include <linux/icmpv6.h> | ||
40 | |||
41 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA | ||
42 | static int data_debug_level; | ||
43 | |||
44 | module_param_named(cm_data_debug_level, data_debug_level, int, 0644); | ||
45 | MODULE_PARM_DESC(cm_data_debug_level, | ||
46 | "Enable data path debug tracing for connected mode if > 0"); | ||
47 | #endif | ||
48 | |||
49 | #include "ipoib.h" | ||
50 | |||
51 | #define IPOIB_CM_IETF_ID 0x1000000000000000ULL | ||
52 | |||
53 | #define IPOIB_CM_RX_UPDATE_TIME (256 * HZ) | ||
54 | #define IPOIB_CM_RX_TIMEOUT (2 * 256 * HZ) | ||
55 | #define IPOIB_CM_RX_DELAY (3 * 256 * HZ) | ||
56 | #define IPOIB_CM_RX_UPDATE_MASK (0x3) | ||
57 | |||
58 | struct ipoib_cm_id { | ||
59 | struct ib_cm_id *id; | ||
60 | int flags; | ||
61 | u32 remote_qpn; | ||
62 | u32 remote_mtu; | ||
63 | }; | ||
64 | |||
65 | static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, | ||
66 | struct ib_cm_event *event); | ||
67 | |||
68 | static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, | ||
69 | u64 mapping[IPOIB_CM_RX_SG]) | ||
70 | { | ||
71 | int i; | ||
72 | |||
73 | ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); | ||
74 | |||
75 | for (i = 0; i < IPOIB_CM_RX_SG - 1; ++i) | ||
76 | ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); | ||
77 | } | ||
78 | |||
79 | static int ipoib_cm_post_receive(struct net_device *dev, int id) | ||
80 | { | ||
81 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
82 | struct ib_recv_wr *bad_wr; | ||
83 | int i, ret; | ||
84 | |||
85 | priv->cm.rx_wr.wr_id = id | IPOIB_CM_OP_SRQ; | ||
86 | |||
87 | for (i = 0; i < IPOIB_CM_RX_SG; ++i) | ||
88 | priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; | ||
89 | |||
90 | ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); | ||
91 | if (unlikely(ret)) { | ||
92 | ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); | ||
93 | ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[id].mapping); | ||
94 | dev_kfree_skb_any(priv->cm.srq_ring[id].skb); | ||
95 | priv->cm.srq_ring[id].skb = NULL; | ||
96 | } | ||
97 | |||
98 | return ret; | ||
99 | } | ||
100 | |||
101 | static int ipoib_cm_alloc_rx_skb(struct net_device *dev, int id, | ||
102 | u64 mapping[IPOIB_CM_RX_SG]) | ||
103 | { | ||
104 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
105 | struct sk_buff *skb; | ||
106 | int i; | ||
107 | |||
108 | skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12); | ||
109 | if (unlikely(!skb)) | ||
110 | return -ENOMEM; | ||
111 | |||
112 | /* | ||
113 | * IPoIB adds a 4 byte header. So we need 12 more bytes to align the | ||
114 | * IP header to a multiple of 16. | ||
115 | */ | ||
116 | skb_reserve(skb, 12); | ||
117 | |||
118 | mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE, | ||
119 | DMA_FROM_DEVICE); | ||
120 | if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) { | ||
121 | dev_kfree_skb_any(skb); | ||
122 | return -EIO; | ||
123 | } | ||
124 | |||
125 | for (i = 0; i < IPOIB_CM_RX_SG - 1; i++) { | ||
126 | struct page *page = alloc_page(GFP_ATOMIC); | ||
127 | |||
128 | if (!page) | ||
129 | goto partial_error; | ||
130 | skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE); | ||
131 | |||
132 | mapping[i + 1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[i].page, | ||
133 | 0, PAGE_SIZE, DMA_TO_DEVICE); | ||
134 | if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1]))) | ||
135 | goto partial_error; | ||
136 | } | ||
137 | |||
138 | priv->cm.srq_ring[id].skb = skb; | ||
139 | return 0; | ||
140 | |||
141 | partial_error: | ||
142 | |||
143 | ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); | ||
144 | |||
145 | for (; i >= 0; --i) | ||
146 | ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); | ||
147 | |||
148 | kfree_skb(skb); | ||
149 | return -ENOMEM; | ||
150 | } | ||
151 | |||
152 | static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, | ||
153 | struct ipoib_cm_rx *p) | ||
154 | { | ||
155 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
156 | struct ib_qp_init_attr attr = { | ||
157 | .send_cq = priv->cq, /* does not matter, we never send anything */ | ||
158 | .recv_cq = priv->cq, | ||
159 | .srq = priv->cm.srq, | ||
160 | .cap.max_send_wr = 1, /* FIXME: 0 Seems not to work */ | ||
161 | .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */ | ||
162 | .sq_sig_type = IB_SIGNAL_ALL_WR, | ||
163 | .qp_type = IB_QPT_RC, | ||
164 | .qp_context = p, | ||
165 | }; | ||
166 | return ib_create_qp(priv->pd, &attr); | ||
167 | } | ||
168 | |||
169 | static int ipoib_cm_modify_rx_qp(struct net_device *dev, | ||
170 | struct ib_cm_id *cm_id, struct ib_qp *qp, | ||
171 | unsigned psn) | ||
172 | { | ||
173 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
174 | struct ib_qp_attr qp_attr; | ||
175 | int qp_attr_mask, ret; | ||
176 | |||
177 | qp_attr.qp_state = IB_QPS_INIT; | ||
178 | ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); | ||
179 | if (ret) { | ||
180 | ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret); | ||
181 | return ret; | ||
182 | } | ||
183 | ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); | ||
184 | if (ret) { | ||
185 | ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret); | ||
186 | return ret; | ||
187 | } | ||
188 | qp_attr.qp_state = IB_QPS_RTR; | ||
189 | ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); | ||
190 | if (ret) { | ||
191 | ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret); | ||
192 | return ret; | ||
193 | } | ||
194 | qp_attr.rq_psn = psn; | ||
195 | ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); | ||
196 | if (ret) { | ||
197 | ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); | ||
198 | return ret; | ||
199 | } | ||
200 | return 0; | ||
201 | } | ||
202 | |||
203 | static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, | ||
204 | struct ib_qp *qp, struct ib_cm_req_event_param *req, | ||
205 | unsigned psn) | ||
206 | { | ||
207 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
208 | struct ipoib_cm_data data = {}; | ||
209 | struct ib_cm_rep_param rep = {}; | ||
210 | |||
211 | data.qpn = cpu_to_be32(priv->qp->qp_num); | ||
212 | data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE); | ||
213 | |||
214 | rep.private_data = &data; | ||
215 | rep.private_data_len = sizeof data; | ||
216 | rep.flow_control = 0; | ||
217 | rep.rnr_retry_count = req->rnr_retry_count; | ||
218 | rep.target_ack_delay = 20; /* FIXME */ | ||
219 | rep.srq = 1; | ||
220 | rep.qp_num = qp->qp_num; | ||
221 | rep.starting_psn = psn; | ||
222 | return ib_send_cm_rep(cm_id, &rep); | ||
223 | } | ||
224 | |||
225 | static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) | ||
226 | { | ||
227 | struct net_device *dev = cm_id->context; | ||
228 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
229 | struct ipoib_cm_rx *p; | ||
230 | unsigned long flags; | ||
231 | unsigned psn; | ||
232 | int ret; | ||
233 | |||
234 | ipoib_dbg(priv, "REQ arrived\n"); | ||
235 | p = kzalloc(sizeof *p, GFP_KERNEL); | ||
236 | if (!p) | ||
237 | return -ENOMEM; | ||
238 | p->dev = dev; | ||
239 | p->id = cm_id; | ||
240 | p->qp = ipoib_cm_create_rx_qp(dev, p); | ||
241 | if (IS_ERR(p->qp)) { | ||
242 | ret = PTR_ERR(p->qp); | ||
243 | goto err_qp; | ||
244 | } | ||
245 | |||
246 | psn = random32() & 0xffffff; | ||
247 | ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn); | ||
248 | if (ret) | ||
249 | goto err_modify; | ||
250 | |||
251 | ret = ipoib_cm_send_rep(dev, cm_id, p->qp, &event->param.req_rcvd, psn); | ||
252 | if (ret) { | ||
253 | ipoib_warn(priv, "failed to send REP: %d\n", ret); | ||
254 | goto err_rep; | ||
255 | } | ||
256 | |||
257 | cm_id->context = p; | ||
258 | p->jiffies = jiffies; | ||
259 | spin_lock_irqsave(&priv->lock, flags); | ||
260 | list_add(&p->list, &priv->cm.passive_ids); | ||
261 | spin_unlock_irqrestore(&priv->lock, flags); | ||
262 | queue_delayed_work(ipoib_workqueue, | ||
263 | &priv->cm.stale_task, IPOIB_CM_RX_DELAY); | ||
264 | return 0; | ||
265 | |||
266 | err_rep: | ||
267 | err_modify: | ||
268 | ib_destroy_qp(p->qp); | ||
269 | err_qp: | ||
270 | kfree(p); | ||
271 | return ret; | ||
272 | } | ||
273 | |||
274 | static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id, | ||
275 | struct ib_cm_event *event) | ||
276 | { | ||
277 | struct ipoib_cm_rx *p; | ||
278 | struct ipoib_dev_priv *priv; | ||
279 | unsigned long flags; | ||
280 | int ret; | ||
281 | |||
282 | switch (event->event) { | ||
283 | case IB_CM_REQ_RECEIVED: | ||
284 | return ipoib_cm_req_handler(cm_id, event); | ||
285 | case IB_CM_DREQ_RECEIVED: | ||
286 | p = cm_id->context; | ||
287 | ib_send_cm_drep(cm_id, NULL, 0); | ||
288 | /* Fall through */ | ||
289 | case IB_CM_REJ_RECEIVED: | ||
290 | p = cm_id->context; | ||
291 | priv = netdev_priv(p->dev); | ||
292 | spin_lock_irqsave(&priv->lock, flags); | ||
293 | if (list_empty(&p->list)) | ||
294 | ret = 0; /* Connection is going away already. */ | ||
295 | else { | ||
296 | list_del_init(&p->list); | ||
297 | ret = -ECONNRESET; | ||
298 | } | ||
299 | spin_unlock_irqrestore(&priv->lock, flags); | ||
300 | if (ret) { | ||
301 | ib_destroy_qp(p->qp); | ||
302 | kfree(p); | ||
303 | return ret; | ||
304 | } | ||
305 | return 0; | ||
306 | default: | ||
307 | return 0; | ||
308 | } | ||
309 | } | ||
310 | /* Adjust length of skb with fragments to match received data */ | ||
311 | static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, | ||
312 | unsigned int length) | ||
313 | { | ||
314 | int i, num_frags; | ||
315 | unsigned int size; | ||
316 | |||
317 | /* put header into skb */ | ||
318 | size = min(length, hdr_space); | ||
319 | skb->tail += size; | ||
320 | skb->len += size; | ||
321 | length -= size; | ||
322 | |||
323 | num_frags = skb_shinfo(skb)->nr_frags; | ||
324 | for (i = 0; i < num_frags; i++) { | ||
325 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | ||
326 | |||
327 | if (length == 0) { | ||
328 | /* don't need this page */ | ||
329 | __free_page(frag->page); | ||
330 | --skb_shinfo(skb)->nr_frags; | ||
331 | } else { | ||
332 | size = min(length, (unsigned) PAGE_SIZE); | ||
333 | |||
334 | frag->size = size; | ||
335 | skb->data_len += size; | ||
336 | skb->truesize += size; | ||
337 | skb->len += size; | ||
338 | length -= size; | ||
339 | } | ||
340 | } | ||
341 | } | ||
342 | |||
343 | void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) | ||
344 | { | ||
345 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
346 | unsigned int wr_id = wc->wr_id & ~IPOIB_CM_OP_SRQ; | ||
347 | struct sk_buff *skb; | ||
348 | struct ipoib_cm_rx *p; | ||
349 | unsigned long flags; | ||
350 | u64 mapping[IPOIB_CM_RX_SG]; | ||
351 | |||
352 | ipoib_dbg_data(priv, "cm recv completion: id %d, op %d, status: %d\n", | ||
353 | wr_id, wc->opcode, wc->status); | ||
354 | |||
355 | if (unlikely(wr_id >= ipoib_recvq_size)) { | ||
356 | ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", | ||
357 | wr_id, ipoib_recvq_size); | ||
358 | return; | ||
359 | } | ||
360 | |||
361 | skb = priv->cm.srq_ring[wr_id].skb; | ||
362 | |||
363 | if (unlikely(wc->status != IB_WC_SUCCESS)) { | ||
364 | ipoib_dbg(priv, "cm recv error " | ||
365 | "(status=%d, wrid=%d vend_err %x)\n", | ||
366 | wc->status, wr_id, wc->vendor_err); | ||
367 | ++priv->stats.rx_dropped; | ||
368 | goto repost; | ||
369 | } | ||
370 | |||
371 | if (!likely(wr_id & IPOIB_CM_RX_UPDATE_MASK)) { | ||
372 | p = wc->qp->qp_context; | ||
373 | if (time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { | ||
374 | spin_lock_irqsave(&priv->lock, flags); | ||
375 | p->jiffies = jiffies; | ||
376 | /* Move this entry to list head, but do | ||
377 | * not re-add it if it has been removed. */ | ||
378 | if (!list_empty(&p->list)) | ||
379 | list_move(&p->list, &priv->cm.passive_ids); | ||
380 | spin_unlock_irqrestore(&priv->lock, flags); | ||
381 | queue_delayed_work(ipoib_workqueue, | ||
382 | &priv->cm.stale_task, IPOIB_CM_RX_DELAY); | ||
383 | } | ||
384 | } | ||
385 | |||
386 | if (unlikely(ipoib_cm_alloc_rx_skb(dev, wr_id, mapping))) { | ||
387 | /* | ||
388 | * If we can't allocate a new RX buffer, dump | ||
389 | * this packet and reuse the old buffer. | ||
390 | */ | ||
391 | ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id); | ||
392 | ++priv->stats.rx_dropped; | ||
393 | goto repost; | ||
394 | } | ||
395 | |||
396 | ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[wr_id].mapping); | ||
397 | memcpy(priv->cm.srq_ring[wr_id].mapping, mapping, sizeof mapping); | ||
398 | |||
399 | ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", | ||
400 | wc->byte_len, wc->slid); | ||
401 | |||
402 | skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len); | ||
403 | |||
404 | skb->protocol = ((struct ipoib_header *) skb->data)->proto; | ||
405 | skb->mac.raw = skb->data; | ||
406 | skb_pull(skb, IPOIB_ENCAP_LEN); | ||
407 | |||
408 | dev->last_rx = jiffies; | ||
409 | ++priv->stats.rx_packets; | ||
410 | priv->stats.rx_bytes += skb->len; | ||
411 | |||
412 | skb->dev = dev; | ||
413 | /* XXX get correct PACKET_ type here */ | ||
414 | skb->pkt_type = PACKET_HOST; | ||
415 | netif_rx_ni(skb); | ||
416 | |||
417 | repost: | ||
418 | if (unlikely(ipoib_cm_post_receive(dev, wr_id))) | ||
419 | ipoib_warn(priv, "ipoib_cm_post_receive failed " | ||
420 | "for buf %d\n", wr_id); | ||
421 | } | ||
422 | |||
423 | static inline int post_send(struct ipoib_dev_priv *priv, | ||
424 | struct ipoib_cm_tx *tx, | ||
425 | unsigned int wr_id, | ||
426 | u64 addr, int len) | ||
427 | { | ||
428 | struct ib_send_wr *bad_wr; | ||
429 | |||
430 | priv->tx_sge.addr = addr; | ||
431 | priv->tx_sge.length = len; | ||
432 | |||
433 | priv->tx_wr.wr_id = wr_id; | ||
434 | |||
435 | return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr); | ||
436 | } | ||
437 | |||
438 | void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) | ||
439 | { | ||
440 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
441 | struct ipoib_tx_buf *tx_req; | ||
442 | u64 addr; | ||
443 | |||
444 | if (unlikely(skb->len > tx->mtu)) { | ||
445 | ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", | ||
446 | skb->len, tx->mtu); | ||
447 | ++priv->stats.tx_dropped; | ||
448 | ++priv->stats.tx_errors; | ||
449 | ipoib_cm_skb_too_long(dev, skb, tx->mtu - INFINIBAND_ALEN); | ||
450 | return; | ||
451 | } | ||
452 | |||
453 | ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n", | ||
454 | tx->tx_head, skb->len, tx->qp->qp_num); | ||
455 | |||
456 | /* | ||
457 | * We put the skb into the tx_ring _before_ we call post_send() | ||
458 | * because it's entirely possible that the completion handler will | ||
459 | * run before we execute anything after the post_send(). That | ||
460 | * means we have to make sure everything is properly recorded and | ||
461 | * our state is consistent before we call post_send(). | ||
462 | */ | ||
463 | tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)]; | ||
464 | tx_req->skb = skb; | ||
465 | addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE); | ||
466 | if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { | ||
467 | ++priv->stats.tx_errors; | ||
468 | dev_kfree_skb_any(skb); | ||
469 | return; | ||
470 | } | ||
471 | |||
472 | tx_req->mapping = addr; | ||
473 | |||
474 | if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), | ||
475 | addr, skb->len))) { | ||
476 | ipoib_warn(priv, "post_send failed\n"); | ||
477 | ++priv->stats.tx_errors; | ||
478 | ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE); | ||
479 | dev_kfree_skb_any(skb); | ||
480 | } else { | ||
481 | dev->trans_start = jiffies; | ||
482 | ++tx->tx_head; | ||
483 | |||
484 | if (tx->tx_head - tx->tx_tail == ipoib_sendq_size) { | ||
485 | ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", | ||
486 | tx->qp->qp_num); | ||
487 | netif_stop_queue(dev); | ||
488 | set_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags); | ||
489 | } | ||
490 | } | ||
491 | } | ||
492 | |||
493 | static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx, | ||
494 | struct ib_wc *wc) | ||
495 | { | ||
496 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
497 | unsigned int wr_id = wc->wr_id; | ||
498 | struct ipoib_tx_buf *tx_req; | ||
499 | unsigned long flags; | ||
500 | |||
501 | ipoib_dbg_data(priv, "cm send completion: id %d, op %d, status: %d\n", | ||
502 | wr_id, wc->opcode, wc->status); | ||
503 | |||
504 | if (unlikely(wr_id >= ipoib_sendq_size)) { | ||
505 | ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n", | ||
506 | wr_id, ipoib_sendq_size); | ||
507 | return; | ||
508 | } | ||
509 | |||
510 | tx_req = &tx->tx_ring[wr_id]; | ||
511 | |||
512 | ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE); | ||
513 | |||
514 | /* FIXME: is this right? Shouldn't we only increment on success? */ | ||
515 | ++priv->stats.tx_packets; | ||
516 | priv->stats.tx_bytes += tx_req->skb->len; | ||
517 | |||
518 | dev_kfree_skb_any(tx_req->skb); | ||
519 | |||
520 | spin_lock_irqsave(&priv->tx_lock, flags); | ||
521 | ++tx->tx_tail; | ||
522 | if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags)) && | ||
523 | tx->tx_head - tx->tx_tail <= ipoib_sendq_size >> 1) { | ||
524 | clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags); | ||
525 | netif_wake_queue(dev); | ||
526 | } | ||
527 | |||
528 | if (wc->status != IB_WC_SUCCESS && | ||
529 | wc->status != IB_WC_WR_FLUSH_ERR) { | ||
530 | struct ipoib_neigh *neigh; | ||
531 | |||
532 | ipoib_dbg(priv, "failed cm send event " | ||
533 | "(status=%d, wrid=%d vend_err %x)\n", | ||
534 | wc->status, wr_id, wc->vendor_err); | ||
535 | |||
536 | spin_lock(&priv->lock); | ||
537 | neigh = tx->neigh; | ||
538 | |||
539 | if (neigh) { | ||
540 | neigh->cm = NULL; | ||
541 | list_del(&neigh->list); | ||
542 | if (neigh->ah) | ||
543 | ipoib_put_ah(neigh->ah); | ||
544 | ipoib_neigh_free(dev, neigh); | ||
545 | |||
546 | tx->neigh = NULL; | ||
547 | } | ||
548 | |||
549 | /* queue would be re-started anyway when TX is destroyed, | ||
550 | * but it makes sense to do it ASAP here. */ | ||
551 | if (test_and_clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags)) | ||
552 | netif_wake_queue(dev); | ||
553 | |||
554 | if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { | ||
555 | list_move(&tx->list, &priv->cm.reap_list); | ||
556 | queue_work(ipoib_workqueue, &priv->cm.reap_task); | ||
557 | } | ||
558 | |||
559 | clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags); | ||
560 | |||
561 | spin_unlock(&priv->lock); | ||
562 | } | ||
563 | |||
564 | spin_unlock_irqrestore(&priv->tx_lock, flags); | ||
565 | } | ||
566 | |||
567 | static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr) | ||
568 | { | ||
569 | struct ipoib_cm_tx *tx = tx_ptr; | ||
570 | int n, i; | ||
571 | |||
572 | ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); | ||
573 | do { | ||
574 | n = ib_poll_cq(cq, IPOIB_NUM_WC, tx->ibwc); | ||
575 | for (i = 0; i < n; ++i) | ||
576 | ipoib_cm_handle_tx_wc(tx->dev, tx, tx->ibwc + i); | ||
577 | } while (n == IPOIB_NUM_WC); | ||
578 | } | ||
579 | |||
580 | int ipoib_cm_dev_open(struct net_device *dev) | ||
581 | { | ||
582 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
583 | int ret; | ||
584 | |||
585 | if (!IPOIB_CM_SUPPORTED(dev->dev_addr)) | ||
586 | return 0; | ||
587 | |||
588 | priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev); | ||
589 | if (IS_ERR(priv->cm.id)) { | ||
590 | printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name); | ||
591 | return IS_ERR(priv->cm.id); | ||
592 | } | ||
593 | |||
594 | ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num), | ||
595 | 0, NULL); | ||
596 | if (ret) { | ||
597 | printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name, | ||
598 | IPOIB_CM_IETF_ID | priv->qp->qp_num); | ||
599 | ib_destroy_cm_id(priv->cm.id); | ||
600 | return ret; | ||
601 | } | ||
602 | return 0; | ||
603 | } | ||
604 | |||
605 | void ipoib_cm_dev_stop(struct net_device *dev) | ||
606 | { | ||
607 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
608 | struct ipoib_cm_rx *p; | ||
609 | unsigned long flags; | ||
610 | |||
611 | if (!IPOIB_CM_SUPPORTED(dev->dev_addr)) | ||
612 | return; | ||
613 | |||
614 | ib_destroy_cm_id(priv->cm.id); | ||
615 | spin_lock_irqsave(&priv->lock, flags); | ||
616 | while (!list_empty(&priv->cm.passive_ids)) { | ||
617 | p = list_entry(priv->cm.passive_ids.next, typeof(*p), list); | ||
618 | list_del_init(&p->list); | ||
619 | spin_unlock_irqrestore(&priv->lock, flags); | ||
620 | ib_destroy_cm_id(p->id); | ||
621 | ib_destroy_qp(p->qp); | ||
622 | kfree(p); | ||
623 | spin_lock_irqsave(&priv->lock, flags); | ||
624 | } | ||
625 | spin_unlock_irqrestore(&priv->lock, flags); | ||
626 | |||
627 | cancel_delayed_work(&priv->cm.stale_task); | ||
628 | } | ||
629 | |||
630 | static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) | ||
631 | { | ||
632 | struct ipoib_cm_tx *p = cm_id->context; | ||
633 | struct ipoib_dev_priv *priv = netdev_priv(p->dev); | ||
634 | struct ipoib_cm_data *data = event->private_data; | ||
635 | struct sk_buff_head skqueue; | ||
636 | struct ib_qp_attr qp_attr; | ||
637 | int qp_attr_mask, ret; | ||
638 | struct sk_buff *skb; | ||
639 | unsigned long flags; | ||
640 | |||
641 | p->mtu = be32_to_cpu(data->mtu); | ||
642 | |||
643 | if (p->mtu < priv->dev->mtu + IPOIB_ENCAP_LEN) { | ||
644 | ipoib_warn(priv, "Rejecting connection: mtu %d < device mtu %d + 4\n", | ||
645 | p->mtu, priv->dev->mtu); | ||
646 | return -EINVAL; | ||
647 | } | ||
648 | |||
649 | qp_attr.qp_state = IB_QPS_RTR; | ||
650 | ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); | ||
651 | if (ret) { | ||
652 | ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret); | ||
653 | return ret; | ||
654 | } | ||
655 | |||
656 | qp_attr.rq_psn = 0 /* FIXME */; | ||
657 | ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask); | ||
658 | if (ret) { | ||
659 | ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); | ||
660 | return ret; | ||
661 | } | ||
662 | |||
663 | qp_attr.qp_state = IB_QPS_RTS; | ||
664 | ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); | ||
665 | if (ret) { | ||
666 | ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret); | ||
667 | return ret; | ||
668 | } | ||
669 | ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask); | ||
670 | if (ret) { | ||
671 | ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret); | ||
672 | return ret; | ||
673 | } | ||
674 | |||
675 | skb_queue_head_init(&skqueue); | ||
676 | |||
677 | spin_lock_irqsave(&priv->lock, flags); | ||
678 | set_bit(IPOIB_FLAG_OPER_UP, &p->flags); | ||
679 | if (p->neigh) | ||
680 | while ((skb = __skb_dequeue(&p->neigh->queue))) | ||
681 | __skb_queue_tail(&skqueue, skb); | ||
682 | spin_unlock_irqrestore(&priv->lock, flags); | ||
683 | |||
684 | while ((skb = __skb_dequeue(&skqueue))) { | ||
685 | skb->dev = p->dev; | ||
686 | if (dev_queue_xmit(skb)) | ||
687 | ipoib_warn(priv, "dev_queue_xmit failed " | ||
688 | "to requeue packet\n"); | ||
689 | } | ||
690 | |||
691 | ret = ib_send_cm_rtu(cm_id, NULL, 0); | ||
692 | if (ret) { | ||
693 | ipoib_warn(priv, "failed to send RTU: %d\n", ret); | ||
694 | return ret; | ||
695 | } | ||
696 | return 0; | ||
697 | } | ||
698 | |||
699 | static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ib_cq *cq) | ||
700 | { | ||
701 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
702 | struct ib_qp_init_attr attr = {}; | ||
703 | attr.recv_cq = priv->cq; | ||
704 | attr.srq = priv->cm.srq; | ||
705 | attr.cap.max_send_wr = ipoib_sendq_size; | ||
706 | attr.cap.max_send_sge = 1; | ||
707 | attr.sq_sig_type = IB_SIGNAL_ALL_WR; | ||
708 | attr.qp_type = IB_QPT_RC; | ||
709 | attr.send_cq = cq; | ||
710 | return ib_create_qp(priv->pd, &attr); | ||
711 | } | ||
712 | |||
713 | static int ipoib_cm_send_req(struct net_device *dev, | ||
714 | struct ib_cm_id *id, struct ib_qp *qp, | ||
715 | u32 qpn, | ||
716 | struct ib_sa_path_rec *pathrec) | ||
717 | { | ||
718 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
719 | struct ipoib_cm_data data = {}; | ||
720 | struct ib_cm_req_param req = {}; | ||
721 | |||
722 | data.qpn = cpu_to_be32(priv->qp->qp_num); | ||
723 | data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE); | ||
724 | |||
725 | req.primary_path = pathrec; | ||
726 | req.alternate_path = NULL; | ||
727 | req.service_id = cpu_to_be64(IPOIB_CM_IETF_ID | qpn); | ||
728 | req.qp_num = qp->qp_num; | ||
729 | req.qp_type = qp->qp_type; | ||
730 | req.private_data = &data; | ||
731 | req.private_data_len = sizeof data; | ||
732 | req.flow_control = 0; | ||
733 | |||
734 | req.starting_psn = 0; /* FIXME */ | ||
735 | |||
736 | /* | ||
737 | * Pick some arbitrary defaults here; we could make these | ||
738 | * module parameters if anyone cared about setting them. | ||
739 | */ | ||
740 | req.responder_resources = 4; | ||
741 | req.remote_cm_response_timeout = 20; | ||
742 | req.local_cm_response_timeout = 20; | ||
743 | req.retry_count = 0; /* RFC draft warns against retries */ | ||
744 | req.rnr_retry_count = 0; /* RFC draft warns against retries */ | ||
745 | req.max_cm_retries = 15; | ||
746 | req.srq = 1; | ||
747 | return ib_send_cm_req(id, &req); | ||
748 | } | ||
749 | |||
750 | static int ipoib_cm_modify_tx_init(struct net_device *dev, | ||
751 | struct ib_cm_id *cm_id, struct ib_qp *qp) | ||
752 | { | ||
753 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
754 | struct ib_qp_attr qp_attr; | ||
755 | int qp_attr_mask, ret; | ||
756 | ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index); | ||
757 | if (ret) { | ||
758 | ipoib_warn(priv, "pkey 0x%x not in cache: %d\n", priv->pkey, ret); | ||
759 | return ret; | ||
760 | } | ||
761 | |||
762 | qp_attr.qp_state = IB_QPS_INIT; | ||
763 | qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE; | ||
764 | qp_attr.port_num = priv->port; | ||
765 | qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; | ||
766 | |||
767 | ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); | ||
768 | if (ret) { | ||
769 | ipoib_warn(priv, "failed to modify tx QP to INIT: %d\n", ret); | ||
770 | return ret; | ||
771 | } | ||
772 | return 0; | ||
773 | } | ||
774 | |||
775 | static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, | ||
776 | struct ib_sa_path_rec *pathrec) | ||
777 | { | ||
778 | struct ipoib_dev_priv *priv = netdev_priv(p->dev); | ||
779 | int ret; | ||
780 | |||
781 | p->tx_ring = kzalloc(ipoib_sendq_size * sizeof *p->tx_ring, | ||
782 | GFP_KERNEL); | ||
783 | if (!p->tx_ring) { | ||
784 | ipoib_warn(priv, "failed to allocate tx ring\n"); | ||
785 | ret = -ENOMEM; | ||
786 | goto err_tx; | ||
787 | } | ||
788 | |||
789 | p->cq = ib_create_cq(priv->ca, ipoib_cm_tx_completion, NULL, p, | ||
790 | ipoib_sendq_size + 1); | ||
791 | if (IS_ERR(p->cq)) { | ||
792 | ret = PTR_ERR(p->cq); | ||
793 | ipoib_warn(priv, "failed to allocate tx cq: %d\n", ret); | ||
794 | goto err_cq; | ||
795 | } | ||
796 | |||
797 | ret = ib_req_notify_cq(p->cq, IB_CQ_NEXT_COMP); | ||
798 | if (ret) { | ||
799 | ipoib_warn(priv, "failed to request completion notification: %d\n", ret); | ||
800 | goto err_req_notify; | ||
801 | } | ||
802 | |||
803 | p->qp = ipoib_cm_create_tx_qp(p->dev, p->cq); | ||
804 | if (IS_ERR(p->qp)) { | ||
805 | ret = PTR_ERR(p->qp); | ||
806 | ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret); | ||
807 | goto err_qp; | ||
808 | } | ||
809 | |||
810 | p->id = ib_create_cm_id(priv->ca, ipoib_cm_tx_handler, p); | ||
811 | if (IS_ERR(p->id)) { | ||
812 | ret = PTR_ERR(p->id); | ||
813 | ipoib_warn(priv, "failed to create tx cm id: %d\n", ret); | ||
814 | goto err_id; | ||
815 | } | ||
816 | |||
817 | ret = ipoib_cm_modify_tx_init(p->dev, p->id, p->qp); | ||
818 | if (ret) { | ||
819 | ipoib_warn(priv, "failed to modify tx qp to rtr: %d\n", ret); | ||
820 | goto err_modify; | ||
821 | } | ||
822 | |||
823 | ret = ipoib_cm_send_req(p->dev, p->id, p->qp, qpn, pathrec); | ||
824 | if (ret) { | ||
825 | ipoib_warn(priv, "failed to send cm req: %d\n", ret); | ||
826 | goto err_send_cm; | ||
827 | } | ||
828 | |||
829 | ipoib_dbg(priv, "Request connection 0x%x for gid " IPOIB_GID_FMT " qpn 0x%x\n", | ||
830 | p->qp->qp_num, IPOIB_GID_ARG(pathrec->dgid), qpn); | ||
831 | |||
832 | return 0; | ||
833 | |||
834 | err_send_cm: | ||
835 | err_modify: | ||
836 | ib_destroy_cm_id(p->id); | ||
837 | err_id: | ||
838 | p->id = NULL; | ||
839 | ib_destroy_qp(p->qp); | ||
840 | err_req_notify: | ||
841 | err_qp: | ||
842 | p->qp = NULL; | ||
843 | ib_destroy_cq(p->cq); | ||
844 | err_cq: | ||
845 | p->cq = NULL; | ||
846 | err_tx: | ||
847 | return ret; | ||
848 | } | ||
849 | |||
850 | static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) | ||
851 | { | ||
852 | struct ipoib_dev_priv *priv = netdev_priv(p->dev); | ||
853 | struct ipoib_tx_buf *tx_req; | ||
854 | |||
855 | ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n", | ||
856 | p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail); | ||
857 | |||
858 | if (p->id) | ||
859 | ib_destroy_cm_id(p->id); | ||
860 | |||
861 | if (p->qp) | ||
862 | ib_destroy_qp(p->qp); | ||
863 | |||
864 | if (p->cq) | ||
865 | ib_destroy_cq(p->cq); | ||
866 | |||
867 | if (test_bit(IPOIB_FLAG_NETIF_STOPPED, &p->flags)) | ||
868 | netif_wake_queue(p->dev); | ||
869 | |||
870 | if (p->tx_ring) { | ||
871 | while ((int) p->tx_tail - (int) p->tx_head < 0) { | ||
872 | tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; | ||
873 | ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, | ||
874 | DMA_TO_DEVICE); | ||
875 | dev_kfree_skb_any(tx_req->skb); | ||
876 | ++p->tx_tail; | ||
877 | } | ||
878 | |||
879 | kfree(p->tx_ring); | ||
880 | } | ||
881 | |||
882 | kfree(p); | ||
883 | } | ||
884 | |||
885 | static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, | ||
886 | struct ib_cm_event *event) | ||
887 | { | ||
888 | struct ipoib_cm_tx *tx = cm_id->context; | ||
889 | struct ipoib_dev_priv *priv = netdev_priv(tx->dev); | ||
890 | struct net_device *dev = priv->dev; | ||
891 | struct ipoib_neigh *neigh; | ||
892 | unsigned long flags; | ||
893 | int ret; | ||
894 | |||
895 | switch (event->event) { | ||
896 | case IB_CM_DREQ_RECEIVED: | ||
897 | ipoib_dbg(priv, "DREQ received.\n"); | ||
898 | ib_send_cm_drep(cm_id, NULL, 0); | ||
899 | break; | ||
900 | case IB_CM_REP_RECEIVED: | ||
901 | ipoib_dbg(priv, "REP received.\n"); | ||
902 | ret = ipoib_cm_rep_handler(cm_id, event); | ||
903 | if (ret) | ||
904 | ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, | ||
905 | NULL, 0, NULL, 0); | ||
906 | break; | ||
907 | case IB_CM_REQ_ERROR: | ||
908 | case IB_CM_REJ_RECEIVED: | ||
909 | case IB_CM_TIMEWAIT_EXIT: | ||
910 | ipoib_dbg(priv, "CM error %d.\n", event->event); | ||
911 | spin_lock_irqsave(&priv->tx_lock, flags); | ||
912 | spin_lock(&priv->lock); | ||
913 | neigh = tx->neigh; | ||
914 | |||
915 | if (neigh) { | ||
916 | neigh->cm = NULL; | ||
917 | list_del(&neigh->list); | ||
918 | if (neigh->ah) | ||
919 | ipoib_put_ah(neigh->ah); | ||
920 | ipoib_neigh_free(dev, neigh); | ||
921 | |||
922 | tx->neigh = NULL; | ||
923 | } | ||
924 | |||
925 | if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { | ||
926 | list_move(&tx->list, &priv->cm.reap_list); | ||
927 | queue_work(ipoib_workqueue, &priv->cm.reap_task); | ||
928 | } | ||
929 | |||
930 | spin_unlock(&priv->lock); | ||
931 | spin_unlock_irqrestore(&priv->tx_lock, flags); | ||
932 | break; | ||
933 | default: | ||
934 | break; | ||
935 | } | ||
936 | |||
937 | return 0; | ||
938 | } | ||
939 | |||
940 | struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, | ||
941 | struct ipoib_neigh *neigh) | ||
942 | { | ||
943 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
944 | struct ipoib_cm_tx *tx; | ||
945 | |||
946 | tx = kzalloc(sizeof *tx, GFP_ATOMIC); | ||
947 | if (!tx) | ||
948 | return NULL; | ||
949 | |||
950 | neigh->cm = tx; | ||
951 | tx->neigh = neigh; | ||
952 | tx->path = path; | ||
953 | tx->dev = dev; | ||
954 | list_add(&tx->list, &priv->cm.start_list); | ||
955 | set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags); | ||
956 | queue_work(ipoib_workqueue, &priv->cm.start_task); | ||
957 | return tx; | ||
958 | } | ||
959 | |||
960 | void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) | ||
961 | { | ||
962 | struct ipoib_dev_priv *priv = netdev_priv(tx->dev); | ||
963 | if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { | ||
964 | list_move(&tx->list, &priv->cm.reap_list); | ||
965 | queue_work(ipoib_workqueue, &priv->cm.reap_task); | ||
966 | ipoib_dbg(priv, "Reap connection for gid " IPOIB_GID_FMT "\n", | ||
967 | IPOIB_GID_ARG(tx->neigh->dgid)); | ||
968 | tx->neigh = NULL; | ||
969 | } | ||
970 | } | ||
971 | |||
972 | static void ipoib_cm_tx_start(struct work_struct *work) | ||
973 | { | ||
974 | struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, | ||
975 | cm.start_task); | ||
976 | struct net_device *dev = priv->dev; | ||
977 | struct ipoib_neigh *neigh; | ||
978 | struct ipoib_cm_tx *p; | ||
979 | unsigned long flags; | ||
980 | int ret; | ||
981 | |||
982 | struct ib_sa_path_rec pathrec; | ||
983 | u32 qpn; | ||
984 | |||
985 | spin_lock_irqsave(&priv->tx_lock, flags); | ||
986 | spin_lock(&priv->lock); | ||
987 | while (!list_empty(&priv->cm.start_list)) { | ||
988 | p = list_entry(priv->cm.start_list.next, typeof(*p), list); | ||
989 | list_del_init(&p->list); | ||
990 | neigh = p->neigh; | ||
991 | qpn = IPOIB_QPN(neigh->neighbour->ha); | ||
992 | memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); | ||
993 | spin_unlock(&priv->lock); | ||
994 | spin_unlock_irqrestore(&priv->tx_lock, flags); | ||
995 | ret = ipoib_cm_tx_init(p, qpn, &pathrec); | ||
996 | spin_lock_irqsave(&priv->tx_lock, flags); | ||
997 | spin_lock(&priv->lock); | ||
998 | if (ret) { | ||
999 | neigh = p->neigh; | ||
1000 | if (neigh) { | ||
1001 | neigh->cm = NULL; | ||
1002 | list_del(&neigh->list); | ||
1003 | if (neigh->ah) | ||
1004 | ipoib_put_ah(neigh->ah); | ||
1005 | ipoib_neigh_free(dev, neigh); | ||
1006 | } | ||
1007 | list_del(&p->list); | ||
1008 | kfree(p); | ||
1009 | } | ||
1010 | } | ||
1011 | spin_unlock(&priv->lock); | ||
1012 | spin_unlock_irqrestore(&priv->tx_lock, flags); | ||
1013 | } | ||
1014 | |||
1015 | static void ipoib_cm_tx_reap(struct work_struct *work) | ||
1016 | { | ||
1017 | struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, | ||
1018 | cm.reap_task); | ||
1019 | struct ipoib_cm_tx *p; | ||
1020 | unsigned long flags; | ||
1021 | |||
1022 | spin_lock_irqsave(&priv->tx_lock, flags); | ||
1023 | spin_lock(&priv->lock); | ||
1024 | while (!list_empty(&priv->cm.reap_list)) { | ||
1025 | p = list_entry(priv->cm.reap_list.next, typeof(*p), list); | ||
1026 | list_del(&p->list); | ||
1027 | spin_unlock(&priv->lock); | ||
1028 | spin_unlock_irqrestore(&priv->tx_lock, flags); | ||
1029 | ipoib_cm_tx_destroy(p); | ||
1030 | spin_lock_irqsave(&priv->tx_lock, flags); | ||
1031 | spin_lock(&priv->lock); | ||
1032 | } | ||
1033 | spin_unlock(&priv->lock); | ||
1034 | spin_unlock_irqrestore(&priv->tx_lock, flags); | ||
1035 | } | ||
1036 | |||
1037 | static void ipoib_cm_skb_reap(struct work_struct *work) | ||
1038 | { | ||
1039 | struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, | ||
1040 | cm.skb_task); | ||
1041 | struct net_device *dev = priv->dev; | ||
1042 | struct sk_buff *skb; | ||
1043 | unsigned long flags; | ||
1044 | |||
1045 | unsigned mtu = priv->mcast_mtu; | ||
1046 | |||
1047 | spin_lock_irqsave(&priv->tx_lock, flags); | ||
1048 | spin_lock(&priv->lock); | ||
1049 | while ((skb = skb_dequeue(&priv->cm.skb_queue))) { | ||
1050 | spin_unlock(&priv->lock); | ||
1051 | spin_unlock_irqrestore(&priv->tx_lock, flags); | ||
1052 | if (skb->protocol == htons(ETH_P_IP)) | ||
1053 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); | ||
1054 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
1055 | else if (skb->protocol == htons(ETH_P_IPV6)) | ||
1056 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); | ||
1057 | #endif | ||
1058 | dev_kfree_skb_any(skb); | ||
1059 | spin_lock_irqsave(&priv->tx_lock, flags); | ||
1060 | spin_lock(&priv->lock); | ||
1061 | } | ||
1062 | spin_unlock(&priv->lock); | ||
1063 | spin_unlock_irqrestore(&priv->tx_lock, flags); | ||
1064 | } | ||
1065 | |||
1066 | void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, | ||
1067 | unsigned int mtu) | ||
1068 | { | ||
1069 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
1070 | int e = skb_queue_empty(&priv->cm.skb_queue); | ||
1071 | |||
1072 | if (skb->dst) | ||
1073 | skb->dst->ops->update_pmtu(skb->dst, mtu); | ||
1074 | |||
1075 | skb_queue_tail(&priv->cm.skb_queue, skb); | ||
1076 | if (e) | ||
1077 | queue_work(ipoib_workqueue, &priv->cm.skb_task); | ||
1078 | } | ||
1079 | |||
1080 | static void ipoib_cm_stale_task(struct work_struct *work) | ||
1081 | { | ||
1082 | struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, | ||
1083 | cm.stale_task.work); | ||
1084 | struct ipoib_cm_rx *p; | ||
1085 | unsigned long flags; | ||
1086 | |||
1087 | spin_lock_irqsave(&priv->lock, flags); | ||
1088 | while (!list_empty(&priv->cm.passive_ids)) { | ||
1089 | /* List if sorted by LRU, start from tail, | ||
1090 | * stop when we see a recently used entry */ | ||
1091 | p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list); | ||
1092 | if (time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT)) | ||
1093 | break; | ||
1094 | list_del_init(&p->list); | ||
1095 | spin_unlock_irqrestore(&priv->lock, flags); | ||
1096 | ib_destroy_cm_id(p->id); | ||
1097 | ib_destroy_qp(p->qp); | ||
1098 | kfree(p); | ||
1099 | spin_lock_irqsave(&priv->lock, flags); | ||
1100 | } | ||
1101 | spin_unlock_irqrestore(&priv->lock, flags); | ||
1102 | } | ||
1103 | |||
1104 | |||
1105 | static ssize_t show_mode(struct device *d, struct device_attribute *attr, | ||
1106 | char *buf) | ||
1107 | { | ||
1108 | struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(d)); | ||
1109 | |||
1110 | if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) | ||
1111 | return sprintf(buf, "connected\n"); | ||
1112 | else | ||
1113 | return sprintf(buf, "datagram\n"); | ||
1114 | } | ||
1115 | |||
1116 | static ssize_t set_mode(struct device *d, struct device_attribute *attr, | ||
1117 | const char *buf, size_t count) | ||
1118 | { | ||
1119 | struct net_device *dev = to_net_dev(d); | ||
1120 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
1121 | |||
1122 | /* flush paths if we switch modes so that connections are restarted */ | ||
1123 | if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) { | ||
1124 | set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); | ||
1125 | ipoib_warn(priv, "enabling connected mode " | ||
1126 | "will cause multicast packet drops\n"); | ||
1127 | ipoib_flush_paths(dev); | ||
1128 | return count; | ||
1129 | } | ||
1130 | |||
1131 | if (!strcmp(buf, "datagram\n")) { | ||
1132 | clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); | ||
1133 | dev->mtu = min(priv->mcast_mtu, dev->mtu); | ||
1134 | ipoib_flush_paths(dev); | ||
1135 | return count; | ||
1136 | } | ||
1137 | |||
1138 | return -EINVAL; | ||
1139 | } | ||
1140 | |||
1141 | static DEVICE_ATTR(mode, S_IWUGO | S_IRUGO, show_mode, set_mode); | ||
1142 | |||
1143 | int ipoib_cm_add_mode_attr(struct net_device *dev) | ||
1144 | { | ||
1145 | return device_create_file(&dev->dev, &dev_attr_mode); | ||
1146 | } | ||
1147 | |||
1148 | int ipoib_cm_dev_init(struct net_device *dev) | ||
1149 | { | ||
1150 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
1151 | struct ib_srq_init_attr srq_init_attr = { | ||
1152 | .attr = { | ||
1153 | .max_wr = ipoib_recvq_size, | ||
1154 | .max_sge = IPOIB_CM_RX_SG | ||
1155 | } | ||
1156 | }; | ||
1157 | int ret, i; | ||
1158 | |||
1159 | INIT_LIST_HEAD(&priv->cm.passive_ids); | ||
1160 | INIT_LIST_HEAD(&priv->cm.reap_list); | ||
1161 | INIT_LIST_HEAD(&priv->cm.start_list); | ||
1162 | INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start); | ||
1163 | INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap); | ||
1164 | INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap); | ||
1165 | INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task); | ||
1166 | |||
1167 | skb_queue_head_init(&priv->cm.skb_queue); | ||
1168 | |||
1169 | priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); | ||
1170 | if (IS_ERR(priv->cm.srq)) { | ||
1171 | ret = PTR_ERR(priv->cm.srq); | ||
1172 | priv->cm.srq = NULL; | ||
1173 | return ret; | ||
1174 | } | ||
1175 | |||
1176 | priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring, | ||
1177 | GFP_KERNEL); | ||
1178 | if (!priv->cm.srq_ring) { | ||
1179 | printk(KERN_WARNING "%s: failed to allocate CM ring (%d entries)\n", | ||
1180 | priv->ca->name, ipoib_recvq_size); | ||
1181 | ipoib_cm_dev_cleanup(dev); | ||
1182 | return -ENOMEM; | ||
1183 | } | ||
1184 | |||
1185 | for (i = 0; i < IPOIB_CM_RX_SG; ++i) | ||
1186 | priv->cm.rx_sge[i].lkey = priv->mr->lkey; | ||
1187 | |||
1188 | priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE; | ||
1189 | for (i = 1; i < IPOIB_CM_RX_SG; ++i) | ||
1190 | priv->cm.rx_sge[i].length = PAGE_SIZE; | ||
1191 | priv->cm.rx_wr.next = NULL; | ||
1192 | priv->cm.rx_wr.sg_list = priv->cm.rx_sge; | ||
1193 | priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG; | ||
1194 | |||
1195 | for (i = 0; i < ipoib_recvq_size; ++i) { | ||
1196 | if (ipoib_cm_alloc_rx_skb(dev, i, priv->cm.srq_ring[i].mapping)) { | ||
1197 | ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); | ||
1198 | ipoib_cm_dev_cleanup(dev); | ||
1199 | return -ENOMEM; | ||
1200 | } | ||
1201 | if (ipoib_cm_post_receive(dev, i)) { | ||
1202 | ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i); | ||
1203 | ipoib_cm_dev_cleanup(dev); | ||
1204 | return -EIO; | ||
1205 | } | ||
1206 | } | ||
1207 | |||
1208 | priv->dev->dev_addr[0] = IPOIB_FLAGS_RC; | ||
1209 | return 0; | ||
1210 | } | ||
1211 | |||
1212 | void ipoib_cm_dev_cleanup(struct net_device *dev) | ||
1213 | { | ||
1214 | struct ipoib_dev_priv *priv = netdev_priv(dev); | ||
1215 | int i, ret; | ||
1216 | |||
1217 | if (!priv->cm.srq) | ||
1218 | return; | ||
1219 | |||
1220 | ipoib_dbg(priv, "Cleanup ipoib connected mode.\n"); | ||
1221 | |||
1222 | ret = ib_destroy_srq(priv->cm.srq); | ||
1223 | if (ret) | ||
1224 | ipoib_warn(priv, "ib_destroy_srq failed: %d\n", ret); | ||
1225 | |||
1226 | priv->cm.srq = NULL; | ||
1227 | if (!priv->cm.srq_ring) | ||
1228 | return; | ||
1229 | for (i = 0; i < ipoib_recvq_size; ++i) | ||
1230 | if (priv->cm.srq_ring[i].skb) { | ||
1231 | ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[i].mapping); | ||
1232 | dev_kfree_skb_any(priv->cm.srq_ring[i].skb); | ||
1233 | priv->cm.srq_ring[i].skb = NULL; | ||
1234 | } | ||
1235 | kfree(priv->cm.srq_ring); | ||
1236 | priv->cm.srq_ring = NULL; | ||
1237 | } | ||
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 59d9594ed6d9..f2aa923ddbea 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c | |||
@@ -50,8 +50,6 @@ MODULE_PARM_DESC(data_debug_level, | |||
50 | "Enable data path debug tracing if > 0"); | 50 | "Enable data path debug tracing if > 0"); |
51 | #endif | 51 | #endif |
52 | 52 | ||
53 | #define IPOIB_OP_RECV (1ul << 31) | ||
54 | |||
55 | static DEFINE_MUTEX(pkey_mutex); | 53 | static DEFINE_MUTEX(pkey_mutex); |
56 | 54 | ||
57 | struct ipoib_ah *ipoib_create_ah(struct net_device *dev, | 55 | struct ipoib_ah *ipoib_create_ah(struct net_device *dev, |
@@ -268,10 +266,11 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) | |||
268 | 266 | ||
269 | spin_lock_irqsave(&priv->tx_lock, flags); | 267 | spin_lock_irqsave(&priv->tx_lock, flags); |
270 | ++priv->tx_tail; | 268 | ++priv->tx_tail; |
271 | if (netif_queue_stopped(dev) && | 269 | if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags)) && |
272 | test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) && | 270 | priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) { |
273 | priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) | 271 | clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags); |
274 | netif_wake_queue(dev); | 272 | netif_wake_queue(dev); |
273 | } | ||
275 | spin_unlock_irqrestore(&priv->tx_lock, flags); | 274 | spin_unlock_irqrestore(&priv->tx_lock, flags); |
276 | 275 | ||
277 | if (wc->status != IB_WC_SUCCESS && | 276 | if (wc->status != IB_WC_SUCCESS && |
@@ -283,7 +282,9 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) | |||
283 | 282 | ||
284 | static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc) | 283 | static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc) |
285 | { | 284 | { |
286 | if (wc->wr_id & IPOIB_OP_RECV) | 285 | if (wc->wr_id & IPOIB_CM_OP_SRQ) |
286 | ipoib_cm_handle_rx_wc(dev, wc); | ||
287 | else if (wc->wr_id & IPOIB_OP_RECV) | ||
287 | ipoib_ib_handle_rx_wc(dev, wc); | 288 | ipoib_ib_handle_rx_wc(dev, wc); |
288 | else | 289 | else |
289 | ipoib_ib_handle_tx_wc(dev, wc); | 290 | ipoib_ib_handle_tx_wc(dev, wc); |
@@ -327,12 +328,12 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, | |||
327 | struct ipoib_tx_buf *tx_req; | 328 | struct ipoib_tx_buf *tx_req; |
328 | u64 addr; | 329 | u64 addr; |
329 | 330 | ||
330 | if (unlikely(skb->len > dev->mtu + INFINIBAND_ALEN)) { | 331 | if (unlikely(skb->len > priv->mcast_mtu + INFINIBAND_ALEN)) { |
331 | ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", | 332 | ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", |
332 | skb->len, dev->mtu + INFINIBAND_ALEN); | 333 | skb->len, priv->mcast_mtu + INFINIBAND_ALEN); |
333 | ++priv->stats.tx_dropped; | 334 | ++priv->stats.tx_dropped; |
334 | ++priv->stats.tx_errors; | 335 | ++priv->stats.tx_errors; |
335 | dev_kfree_skb_any(skb); | 336 | ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu); |
336 | return; | 337 | return; |
337 | } | 338 | } |
338 | 339 | ||
@@ -372,6 +373,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, | |||
372 | if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) { | 373 | if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) { |
373 | ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); | 374 | ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); |
374 | netif_stop_queue(dev); | 375 | netif_stop_queue(dev); |
376 | set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags); | ||
375 | } | 377 | } |
376 | } | 378 | } |
377 | } | 379 | } |
@@ -424,6 +426,13 @@ int ipoib_ib_dev_open(struct net_device *dev) | |||
424 | return -1; | 426 | return -1; |
425 | } | 427 | } |
426 | 428 | ||
429 | ret = ipoib_cm_dev_open(dev); | ||
430 | if (ret) { | ||
431 | ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); | ||
432 | ipoib_ib_dev_stop(dev); | ||
433 | return -1; | ||
434 | } | ||
435 | |||
427 | clear_bit(IPOIB_STOP_REAPER, &priv->flags); | 436 | clear_bit(IPOIB_STOP_REAPER, &priv->flags); |
428 | queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ); | 437 | queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ); |
429 | 438 | ||
@@ -509,6 +518,8 @@ int ipoib_ib_dev_stop(struct net_device *dev) | |||
509 | 518 | ||
510 | clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); | 519 | clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); |
511 | 520 | ||
521 | ipoib_cm_dev_stop(dev); | ||
522 | |||
512 | /* | 523 | /* |
513 | * Move our QP to the error state and then reinitialize in | 524 | * Move our QP to the error state and then reinitialize in |
514 | * when all work requests have completed or have been flushed. | 525 | * when all work requests have completed or have been flushed. |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index af5ee2ec4499..18d27fd352ad 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c | |||
@@ -49,8 +49,6 @@ | |||
49 | 49 | ||
50 | #include <net/dst.h> | 50 | #include <net/dst.h> |
51 | 51 | ||
52 | #define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff) | ||
53 | |||
54 | MODULE_AUTHOR("Roland Dreier"); | 52 | MODULE_AUTHOR("Roland Dreier"); |
55 | MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); | 53 | MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); |
56 | MODULE_LICENSE("Dual BSD/GPL"); | 54 | MODULE_LICENSE("Dual BSD/GPL"); |
@@ -145,6 +143,8 @@ static int ipoib_stop(struct net_device *dev) | |||
145 | 143 | ||
146 | netif_stop_queue(dev); | 144 | netif_stop_queue(dev); |
147 | 145 | ||
146 | clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags); | ||
147 | |||
148 | /* | 148 | /* |
149 | * Now flush workqueue to make sure a scheduled task doesn't | 149 | * Now flush workqueue to make sure a scheduled task doesn't |
150 | * bring our internal state back up. | 150 | * bring our internal state back up. |
@@ -178,8 +178,18 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) | |||
178 | { | 178 | { |
179 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 179 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
180 | 180 | ||
181 | if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) | 181 | /* dev->mtu > 2K ==> connected mode */ |
182 | if (ipoib_cm_admin_enabled(dev) && new_mtu <= IPOIB_CM_MTU) { | ||
183 | if (new_mtu > priv->mcast_mtu) | ||
184 | ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", | ||
185 | priv->mcast_mtu); | ||
186 | dev->mtu = new_mtu; | ||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) { | ||
182 | return -EINVAL; | 191 | return -EINVAL; |
192 | } | ||
183 | 193 | ||
184 | priv->admin_mtu = new_mtu; | 194 | priv->admin_mtu = new_mtu; |
185 | 195 | ||
@@ -414,6 +424,20 @@ static void path_rec_completion(int status, | |||
414 | memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, | 424 | memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, |
415 | sizeof(union ib_gid)); | 425 | sizeof(union ib_gid)); |
416 | 426 | ||
427 | if (ipoib_cm_enabled(dev, neigh->neighbour)) { | ||
428 | if (!ipoib_cm_get(neigh)) | ||
429 | ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, | ||
430 | path, | ||
431 | neigh)); | ||
432 | if (!ipoib_cm_get(neigh)) { | ||
433 | list_del(&neigh->list); | ||
434 | if (neigh->ah) | ||
435 | ipoib_put_ah(neigh->ah); | ||
436 | ipoib_neigh_free(dev, neigh); | ||
437 | continue; | ||
438 | } | ||
439 | } | ||
440 | |||
417 | while ((skb = __skb_dequeue(&neigh->queue))) | 441 | while ((skb = __skb_dequeue(&neigh->queue))) |
418 | __skb_queue_tail(&skqueue, skb); | 442 | __skb_queue_tail(&skqueue, skb); |
419 | } | 443 | } |
@@ -520,7 +544,25 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) | |||
520 | memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, | 544 | memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, |
521 | sizeof(union ib_gid)); | 545 | sizeof(union ib_gid)); |
522 | 546 | ||
523 | ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha)); | 547 | if (ipoib_cm_enabled(dev, neigh->neighbour)) { |
548 | if (!ipoib_cm_get(neigh)) | ||
549 | ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh)); | ||
550 | if (!ipoib_cm_get(neigh)) { | ||
551 | list_del(&neigh->list); | ||
552 | if (neigh->ah) | ||
553 | ipoib_put_ah(neigh->ah); | ||
554 | ipoib_neigh_free(dev, neigh); | ||
555 | goto err_drop; | ||
556 | } | ||
557 | if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) | ||
558 | __skb_queue_tail(&neigh->queue, skb); | ||
559 | else { | ||
560 | ipoib_warn(priv, "queue length limit %d. Packet drop.\n", | ||
561 | skb_queue_len(&neigh->queue)); | ||
562 | goto err_drop; | ||
563 | } | ||
564 | } else | ||
565 | ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha)); | ||
524 | } else { | 566 | } else { |
525 | neigh->ah = NULL; | 567 | neigh->ah = NULL; |
526 | 568 | ||
@@ -538,6 +580,7 @@ err_list: | |||
538 | 580 | ||
539 | err_path: | 581 | err_path: |
540 | ipoib_neigh_free(dev, neigh); | 582 | ipoib_neigh_free(dev, neigh); |
583 | err_drop: | ||
541 | ++priv->stats.tx_dropped; | 584 | ++priv->stats.tx_dropped; |
542 | dev_kfree_skb_any(skb); | 585 | dev_kfree_skb_any(skb); |
543 | 586 | ||
@@ -640,7 +683,12 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
640 | 683 | ||
641 | neigh = *to_ipoib_neigh(skb->dst->neighbour); | 684 | neigh = *to_ipoib_neigh(skb->dst->neighbour); |
642 | 685 | ||
643 | if (likely(neigh->ah)) { | 686 | if (ipoib_cm_get(neigh)) { |
687 | if (ipoib_cm_up(neigh)) { | ||
688 | ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); | ||
689 | goto out; | ||
690 | } | ||
691 | } else if (neigh->ah) { | ||
644 | if (unlikely(memcmp(&neigh->dgid.raw, | 692 | if (unlikely(memcmp(&neigh->dgid.raw, |
645 | skb->dst->neighbour->ha + 4, | 693 | skb->dst->neighbour->ha + 4, |
646 | sizeof(union ib_gid)))) { | 694 | sizeof(union ib_gid)))) { |
@@ -805,6 +853,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour) | |||
805 | neigh->neighbour = neighbour; | 853 | neigh->neighbour = neighbour; |
806 | *to_ipoib_neigh(neighbour) = neigh; | 854 | *to_ipoib_neigh(neighbour) = neigh; |
807 | skb_queue_head_init(&neigh->queue); | 855 | skb_queue_head_init(&neigh->queue); |
856 | ipoib_cm_set(neigh, NULL); | ||
808 | 857 | ||
809 | return neigh; | 858 | return neigh; |
810 | } | 859 | } |
@@ -818,6 +867,8 @@ void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh) | |||
818 | ++priv->stats.tx_dropped; | 867 | ++priv->stats.tx_dropped; |
819 | dev_kfree_skb_any(skb); | 868 | dev_kfree_skb_any(skb); |
820 | } | 869 | } |
870 | if (ipoib_cm_get(neigh)) | ||
871 | ipoib_cm_destroy_tx(ipoib_cm_get(neigh)); | ||
821 | kfree(neigh); | 872 | kfree(neigh); |
822 | } | 873 | } |
823 | 874 | ||
@@ -1080,6 +1131,8 @@ static struct net_device *ipoib_add_port(const char *format, | |||
1080 | 1131 | ||
1081 | ipoib_create_debug_files(priv->dev); | 1132 | ipoib_create_debug_files(priv->dev); |
1082 | 1133 | ||
1134 | if (ipoib_cm_add_mode_attr(priv->dev)) | ||
1135 | goto sysfs_failed; | ||
1083 | if (ipoib_add_pkey_attr(priv->dev)) | 1136 | if (ipoib_add_pkey_attr(priv->dev)) |
1084 | goto sysfs_failed; | 1137 | goto sysfs_failed; |
1085 | if (device_create_file(&priv->dev->dev, &dev_attr_create_child)) | 1138 | if (device_create_file(&priv->dev->dev, &dev_attr_create_child)) |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index b04b72ca32ed..fea737f520fd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |||
@@ -597,7 +597,9 @@ void ipoib_mcast_join_task(struct work_struct *work) | |||
597 | 597 | ||
598 | priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) - | 598 | priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) - |
599 | IPOIB_ENCAP_LEN; | 599 | IPOIB_ENCAP_LEN; |
600 | dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); | 600 | |
601 | if (!ipoib_cm_admin_enabled(dev)) | ||
602 | dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); | ||
601 | 603 | ||
602 | ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); | 604 | ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); |
603 | 605 | ||
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 7b717c648f72..3cb551b88756 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c | |||
@@ -168,35 +168,41 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) | |||
168 | .qp_type = IB_QPT_UD | 168 | .qp_type = IB_QPT_UD |
169 | }; | 169 | }; |
170 | 170 | ||
171 | int ret, size; | ||
172 | |||
171 | priv->pd = ib_alloc_pd(priv->ca); | 173 | priv->pd = ib_alloc_pd(priv->ca); |
172 | if (IS_ERR(priv->pd)) { | 174 | if (IS_ERR(priv->pd)) { |
173 | printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name); | 175 | printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name); |
174 | return -ENODEV; | 176 | return -ENODEV; |
175 | } | 177 | } |
176 | 178 | ||
177 | priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, | 179 | priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE); |
178 | ipoib_sendq_size + ipoib_recvq_size + 1); | 180 | if (IS_ERR(priv->mr)) { |
181 | printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name); | ||
182 | goto out_free_pd; | ||
183 | } | ||
184 | |||
185 | size = ipoib_sendq_size + ipoib_recvq_size + 1; | ||
186 | ret = ipoib_cm_dev_init(dev); | ||
187 | if (!ret) | ||
188 | size += ipoib_recvq_size; | ||
189 | |||
190 | priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size); | ||
179 | if (IS_ERR(priv->cq)) { | 191 | if (IS_ERR(priv->cq)) { |
180 | printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); | 192 | printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); |
181 | goto out_free_pd; | 193 | goto out_free_mr; |
182 | } | 194 | } |
183 | 195 | ||
184 | if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP)) | 196 | if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP)) |
185 | goto out_free_cq; | 197 | goto out_free_cq; |
186 | 198 | ||
187 | priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE); | ||
188 | if (IS_ERR(priv->mr)) { | ||
189 | printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name); | ||
190 | goto out_free_cq; | ||
191 | } | ||
192 | |||
193 | init_attr.send_cq = priv->cq; | 199 | init_attr.send_cq = priv->cq; |
194 | init_attr.recv_cq = priv->cq, | 200 | init_attr.recv_cq = priv->cq, |
195 | 201 | ||
196 | priv->qp = ib_create_qp(priv->pd, &init_attr); | 202 | priv->qp = ib_create_qp(priv->pd, &init_attr); |
197 | if (IS_ERR(priv->qp)) { | 203 | if (IS_ERR(priv->qp)) { |
198 | printk(KERN_WARNING "%s: failed to create QP\n", ca->name); | 204 | printk(KERN_WARNING "%s: failed to create QP\n", ca->name); |
199 | goto out_free_mr; | 205 | goto out_free_cq; |
200 | } | 206 | } |
201 | 207 | ||
202 | priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff; | 208 | priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff; |
@@ -212,12 +218,12 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) | |||
212 | 218 | ||
213 | return 0; | 219 | return 0; |
214 | 220 | ||
215 | out_free_mr: | ||
216 | ib_dereg_mr(priv->mr); | ||
217 | |||
218 | out_free_cq: | 221 | out_free_cq: |
219 | ib_destroy_cq(priv->cq); | 222 | ib_destroy_cq(priv->cq); |
220 | 223 | ||
224 | out_free_mr: | ||
225 | ib_dereg_mr(priv->mr); | ||
226 | |||
221 | out_free_pd: | 227 | out_free_pd: |
222 | ib_dealloc_pd(priv->pd); | 228 | ib_dealloc_pd(priv->pd); |
223 | return -ENODEV; | 229 | return -ENODEV; |
@@ -235,12 +241,14 @@ void ipoib_transport_dev_cleanup(struct net_device *dev) | |||
235 | clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); | 241 | clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); |
236 | } | 242 | } |
237 | 243 | ||
238 | if (ib_dereg_mr(priv->mr)) | ||
239 | ipoib_warn(priv, "ib_dereg_mr failed\n"); | ||
240 | |||
241 | if (ib_destroy_cq(priv->cq)) | 244 | if (ib_destroy_cq(priv->cq)) |
242 | ipoib_warn(priv, "ib_cq_destroy failed\n"); | 245 | ipoib_warn(priv, "ib_cq_destroy failed\n"); |
243 | 246 | ||
247 | ipoib_cm_dev_cleanup(dev); | ||
248 | |||
249 | if (ib_dereg_mr(priv->mr)) | ||
250 | ipoib_warn(priv, "ib_dereg_mr failed\n"); | ||
251 | |||
244 | if (ib_dealloc_pd(priv->pd)) | 252 | if (ib_dealloc_pd(priv->pd)) |
245 | ipoib_warn(priv, "ib_dealloc_pd failed\n"); | 253 | ipoib_warn(priv, "ib_dealloc_pd failed\n"); |
246 | } | 254 | } |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 085eafe6667c..6762988439d1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c | |||
@@ -115,6 +115,8 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) | |||
115 | 115 | ||
116 | ipoib_create_debug_files(priv->dev); | 116 | ipoib_create_debug_files(priv->dev); |
117 | 117 | ||
118 | if (ipoib_cm_add_mode_attr(priv->dev)) | ||
119 | goto sysfs_failed; | ||
118 | if (ipoib_add_pkey_attr(priv->dev)) | 120 | if (ipoib_add_pkey_attr(priv->dev)) |
119 | goto sysfs_failed; | 121 | goto sysfs_failed; |
120 | 122 | ||