aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2006-04-12 19:07:54 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-04-12 19:07:54 -0400
commit907d91d708d9999bec0185d630062576ac4181a7 (patch)
treefaf16e6a0ceaa70f55cbb1a8abc7814f5e106e4e
parent646e120ffe885d70c75460f515d56a5c0d72002e (diff)
parent59fef3b1e96217c6e736372ff8cc95cbcca1b6aa (diff)
Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband
* 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband: IB/mthca: Fix max_srq_sge returned by ib_query_device for Tavor devices IB/cache: Use correct pointer to calculate size IPoIB: Use spin_lock_irq() instead of spin_lock_irqsave() IPoIB: Close race in ipoib_flush_paths() IB/mthca: Disable tuning PCI read burst size IPoIB: Make send and receive queue sizes tunable IPoIB: Wait for join to finish before freeing mcast struct IB: simplify static rate encoding IPoIB: Consolidate private neighbour data handling IB/srp: Fix memory leak in options parsing IB/mthca: Always build debugging code unless CONFIG_EMBEDDED=y IPoIB: Always build debugging code unless CONFIG_EMBEDDED=y IB/mad: fix oops in cancel_mads
-rw-r--r--drivers/infiniband/core/cache.c2
-rw-r--r--drivers/infiniband/core/mad.c2
-rw-r--r--drivers/infiniband/core/verbs.c34
-rw-r--r--drivers/infiniband/hw/mthca/Kconfig11
-rw-r--r--drivers/infiniband/hw/mthca/Makefile4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c100
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.h1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h23
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c42
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c28
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c46
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c27
-rw-r--r--drivers/infiniband/ulp/ipoib/Kconfig3
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c22
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c88
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c58
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c6
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c1
-rw-r--r--include/rdma/ib_sa.h28
-rw-r--r--include/rdma/ib_verbs.h28
25 files changed, 430 insertions, 142 deletions
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index c57a3871184c..50364c0b090c 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -302,7 +302,7 @@ static void ib_cache_setup_one(struct ib_device *device)
302 kmalloc(sizeof *device->cache.pkey_cache * 302 kmalloc(sizeof *device->cache.pkey_cache *
303 (end_port(device) - start_port(device) + 1), GFP_KERNEL); 303 (end_port(device) - start_port(device) + 1), GFP_KERNEL);
304 device->cache.gid_cache = 304 device->cache.gid_cache =
305 kmalloc(sizeof *device->cache.pkey_cache * 305 kmalloc(sizeof *device->cache.gid_cache *
306 (end_port(device) - start_port(device) + 1), GFP_KERNEL); 306 (end_port(device) - start_port(device) + 1), GFP_KERNEL);
307 307
308 if (!device->cache.pkey_cache || !device->cache.gid_cache) { 308 if (!device->cache.pkey_cache || !device->cache.gid_cache) {
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index ba54c856b0e5..3a702da83e41 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -2311,6 +2311,7 @@ static void local_completions(void *data)
2311 local = list_entry(mad_agent_priv->local_list.next, 2311 local = list_entry(mad_agent_priv->local_list.next,
2312 struct ib_mad_local_private, 2312 struct ib_mad_local_private,
2313 completion_list); 2313 completion_list);
2314 list_del(&local->completion_list);
2314 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 2315 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2315 if (local->mad_priv) { 2316 if (local->mad_priv) {
2316 recv_mad_agent = local->recv_mad_agent; 2317 recv_mad_agent = local->recv_mad_agent;
@@ -2362,7 +2363,6 @@ local_send_completion:
2362 &mad_send_wc); 2363 &mad_send_wc);
2363 2364
2364 spin_lock_irqsave(&mad_agent_priv->lock, flags); 2365 spin_lock_irqsave(&mad_agent_priv->lock, flags);
2365 list_del(&local->completion_list);
2366 atomic_dec(&mad_agent_priv->refcount); 2366 atomic_dec(&mad_agent_priv->refcount);
2367 if (!recv) 2367 if (!recv)
2368 kmem_cache_free(ib_mad_cache, local->mad_priv); 2368 kmem_cache_free(ib_mad_cache, local->mad_priv);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index cae0845f472a..b78e7dc69330 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -45,6 +45,40 @@
45#include <rdma/ib_verbs.h> 45#include <rdma/ib_verbs.h>
46#include <rdma/ib_cache.h> 46#include <rdma/ib_cache.h>
47 47
48int ib_rate_to_mult(enum ib_rate rate)
49{
50 switch (rate) {
51 case IB_RATE_2_5_GBPS: return 1;
52 case IB_RATE_5_GBPS: return 2;
53 case IB_RATE_10_GBPS: return 4;
54 case IB_RATE_20_GBPS: return 8;
55 case IB_RATE_30_GBPS: return 12;
56 case IB_RATE_40_GBPS: return 16;
57 case IB_RATE_60_GBPS: return 24;
58 case IB_RATE_80_GBPS: return 32;
59 case IB_RATE_120_GBPS: return 48;
60 default: return -1;
61 }
62}
63EXPORT_SYMBOL(ib_rate_to_mult);
64
65enum ib_rate mult_to_ib_rate(int mult)
66{
67 switch (mult) {
68 case 1: return IB_RATE_2_5_GBPS;
69 case 2: return IB_RATE_5_GBPS;
70 case 4: return IB_RATE_10_GBPS;
71 case 8: return IB_RATE_20_GBPS;
72 case 12: return IB_RATE_30_GBPS;
73 case 16: return IB_RATE_40_GBPS;
74 case 24: return IB_RATE_60_GBPS;
75 case 32: return IB_RATE_80_GBPS;
76 case 48: return IB_RATE_120_GBPS;
77 default: return IB_RATE_PORT_CURRENT;
78 }
79}
80EXPORT_SYMBOL(mult_to_ib_rate);
81
48/* Protection domains */ 82/* Protection domains */
49 83
50struct ib_pd *ib_alloc_pd(struct ib_device *device) 84struct ib_pd *ib_alloc_pd(struct ib_device *device)
diff --git a/drivers/infiniband/hw/mthca/Kconfig b/drivers/infiniband/hw/mthca/Kconfig
index e88be85b3d5c..9aa5a4468a75 100644
--- a/drivers/infiniband/hw/mthca/Kconfig
+++ b/drivers/infiniband/hw/mthca/Kconfig
@@ -7,10 +7,11 @@ config INFINIBAND_MTHCA
7 ("Tavor") and the MT25208 PCI Express HCA ("Arbel"). 7 ("Tavor") and the MT25208 PCI Express HCA ("Arbel").
8 8
9config INFINIBAND_MTHCA_DEBUG 9config INFINIBAND_MTHCA_DEBUG
10 bool "Verbose debugging output" 10 bool "Verbose debugging output" if EMBEDDED
11 depends on INFINIBAND_MTHCA 11 depends on INFINIBAND_MTHCA
12 default n 12 default y
13 ---help--- 13 ---help---
14 This option causes the mthca driver produce a bunch of debug 14 This option causes debugging code to be compiled into the
15 messages. Select this is you are developing the driver or 15 mthca driver. The output can be turned on via the
16 trying to diagnose a problem. 16 debug_level module parameter (which can also be set after
17 the driver is loaded through sysfs).
diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile
index 47ec5a7cba0b..e388d95d0cf1 100644
--- a/drivers/infiniband/hw/mthca/Makefile
+++ b/drivers/infiniband/hw/mthca/Makefile
@@ -1,7 +1,3 @@
1ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
2EXTRA_CFLAGS += -DDEBUG
3endif
4
5obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o 1obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o
6 2
7ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \ 3ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index bc5bdcbe51b5..b12aa03be251 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -42,6 +42,20 @@
42 42
43#include "mthca_dev.h" 43#include "mthca_dev.h"
44 44
45enum {
46 MTHCA_RATE_TAVOR_FULL = 0,
47 MTHCA_RATE_TAVOR_1X = 1,
48 MTHCA_RATE_TAVOR_4X = 2,
49 MTHCA_RATE_TAVOR_1X_DDR = 3
50};
51
52enum {
53 MTHCA_RATE_MEMFREE_FULL = 0,
54 MTHCA_RATE_MEMFREE_QUARTER = 1,
55 MTHCA_RATE_MEMFREE_EIGHTH = 2,
56 MTHCA_RATE_MEMFREE_HALF = 3
57};
58
45struct mthca_av { 59struct mthca_av {
46 __be32 port_pd; 60 __be32 port_pd;
47 u8 reserved1; 61 u8 reserved1;
@@ -55,6 +69,90 @@ struct mthca_av {
55 __be32 dgid[4]; 69 __be32 dgid[4];
56}; 70};
57 71
72static enum ib_rate memfree_rate_to_ib(u8 mthca_rate, u8 port_rate)
73{
74 switch (mthca_rate) {
75 case MTHCA_RATE_MEMFREE_EIGHTH:
76 return mult_to_ib_rate(port_rate >> 3);
77 case MTHCA_RATE_MEMFREE_QUARTER:
78 return mult_to_ib_rate(port_rate >> 2);
79 case MTHCA_RATE_MEMFREE_HALF:
80 return mult_to_ib_rate(port_rate >> 1);
81 case MTHCA_RATE_MEMFREE_FULL:
82 default:
83 return mult_to_ib_rate(port_rate);
84 }
85}
86
87static enum ib_rate tavor_rate_to_ib(u8 mthca_rate, u8 port_rate)
88{
89 switch (mthca_rate) {
90 case MTHCA_RATE_TAVOR_1X: return IB_RATE_2_5_GBPS;
91 case MTHCA_RATE_TAVOR_1X_DDR: return IB_RATE_5_GBPS;
92 case MTHCA_RATE_TAVOR_4X: return IB_RATE_10_GBPS;
93 default: return port_rate;
94 }
95}
96
97enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port)
98{
99 if (mthca_is_memfree(dev)) {
100 /* Handle old Arbel FW */
101 if (dev->limits.stat_rate_support == 0x3 && mthca_rate)
102 return IB_RATE_2_5_GBPS;
103
104 return memfree_rate_to_ib(mthca_rate, dev->rate[port - 1]);
105 } else
106 return tavor_rate_to_ib(mthca_rate, dev->rate[port - 1]);
107}
108
109static u8 ib_rate_to_memfree(u8 req_rate, u8 cur_rate)
110{
111 if (cur_rate <= req_rate)
112 return 0;
113
114 /*
115 * Inter-packet delay (IPD) to get from rate X down to a rate
116 * no more than Y is (X - 1) / Y.
117 */
118 switch ((cur_rate - 1) / req_rate) {
119 case 0: return MTHCA_RATE_MEMFREE_FULL;
120 case 1: return MTHCA_RATE_MEMFREE_HALF;
121 case 2: /* fall through */
122 case 3: return MTHCA_RATE_MEMFREE_QUARTER;
123 default: return MTHCA_RATE_MEMFREE_EIGHTH;
124 }
125}
126
127static u8 ib_rate_to_tavor(u8 static_rate)
128{
129 switch (static_rate) {
130 case IB_RATE_2_5_GBPS: return MTHCA_RATE_TAVOR_1X;
131 case IB_RATE_5_GBPS: return MTHCA_RATE_TAVOR_1X_DDR;
132 case IB_RATE_10_GBPS: return MTHCA_RATE_TAVOR_4X;
133 default: return MTHCA_RATE_TAVOR_FULL;
134 }
135}
136
137u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port)
138{
139 u8 rate;
140
141 if (!static_rate || ib_rate_to_mult(static_rate) >= dev->rate[port - 1])
142 return 0;
143
144 if (mthca_is_memfree(dev))
145 rate = ib_rate_to_memfree(ib_rate_to_mult(static_rate),
146 dev->rate[port - 1]);
147 else
148 rate = ib_rate_to_tavor(static_rate);
149
150 if (!(dev->limits.stat_rate_support & (1 << rate)))
151 rate = 1;
152
153 return rate;
154}
155
58int mthca_create_ah(struct mthca_dev *dev, 156int mthca_create_ah(struct mthca_dev *dev,
59 struct mthca_pd *pd, 157 struct mthca_pd *pd,
60 struct ib_ah_attr *ah_attr, 158 struct ib_ah_attr *ah_attr,
@@ -107,7 +205,7 @@ on_hca_fail:
107 av->g_slid = ah_attr->src_path_bits; 205 av->g_slid = ah_attr->src_path_bits;
108 av->dlid = cpu_to_be16(ah_attr->dlid); 206 av->dlid = cpu_to_be16(ah_attr->dlid);
109 av->msg_sr = (3 << 4) | /* 2K message */ 207 av->msg_sr = (3 << 4) | /* 2K message */
110 ah_attr->static_rate; 208 mthca_get_rate(dev, ah_attr->static_rate, ah_attr->port_num);
111 av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); 209 av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
112 if (ah_attr->ah_flags & IB_AH_GRH) { 210 if (ah_attr->ah_flags & IB_AH_GRH) {
113 av->g_slid |= 0x80; 211 av->g_slid |= 0x80;
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 343eca507870..1985b5dfa481 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -965,6 +965,7 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
965 u32 *outbox; 965 u32 *outbox;
966 u8 field; 966 u8 field;
967 u16 size; 967 u16 size;
968 u16 stat_rate;
968 int err; 969 int err;
969 970
970#define QUERY_DEV_LIM_OUT_SIZE 0x100 971#define QUERY_DEV_LIM_OUT_SIZE 0x100
@@ -995,6 +996,7 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
995#define QUERY_DEV_LIM_MTU_WIDTH_OFFSET 0x36 996#define QUERY_DEV_LIM_MTU_WIDTH_OFFSET 0x36
996#define QUERY_DEV_LIM_VL_PORT_OFFSET 0x37 997#define QUERY_DEV_LIM_VL_PORT_OFFSET 0x37
997#define QUERY_DEV_LIM_MAX_GID_OFFSET 0x3b 998#define QUERY_DEV_LIM_MAX_GID_OFFSET 0x3b
999#define QUERY_DEV_LIM_RATE_SUPPORT_OFFSET 0x3c
998#define QUERY_DEV_LIM_MAX_PKEY_OFFSET 0x3f 1000#define QUERY_DEV_LIM_MAX_PKEY_OFFSET 0x3f
999#define QUERY_DEV_LIM_FLAGS_OFFSET 0x44 1001#define QUERY_DEV_LIM_FLAGS_OFFSET 0x44
1000#define QUERY_DEV_LIM_RSVD_UAR_OFFSET 0x48 1002#define QUERY_DEV_LIM_RSVD_UAR_OFFSET 0x48
@@ -1086,6 +1088,8 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
1086 dev_lim->num_ports = field & 0xf; 1088 dev_lim->num_ports = field & 0xf;
1087 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_GID_OFFSET); 1089 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_GID_OFFSET);
1088 dev_lim->max_gids = 1 << (field & 0xf); 1090 dev_lim->max_gids = 1 << (field & 0xf);
1091 MTHCA_GET(stat_rate, outbox, QUERY_DEV_LIM_RATE_SUPPORT_OFFSET);
1092 dev_lim->stat_rate_support = stat_rate;
1089 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_PKEY_OFFSET); 1093 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_PKEY_OFFSET);
1090 dev_lim->max_pkeys = 1 << (field & 0xf); 1094 dev_lim->max_pkeys = 1 << (field & 0xf);
1091 MTHCA_GET(dev_lim->flags, outbox, QUERY_DEV_LIM_FLAGS_OFFSET); 1095 MTHCA_GET(dev_lim->flags, outbox, QUERY_DEV_LIM_FLAGS_OFFSET);
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h
index e4ec35c40dd3..2f976f2051d6 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.h
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.h
@@ -146,6 +146,7 @@ struct mthca_dev_lim {
146 int max_vl; 146 int max_vl;
147 int num_ports; 147 int num_ports;
148 int max_gids; 148 int max_gids;
149 u16 stat_rate_support;
149 int max_pkeys; 150 int max_pkeys;
150 u32 flags; 151 u32 flags;
151 int reserved_uars; 152 int reserved_uars;
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index ad52edbefe98..4c1dcb4c1822 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -151,6 +151,7 @@ struct mthca_limits {
151 int reserved_qps; 151 int reserved_qps;
152 int num_srqs; 152 int num_srqs;
153 int max_srq_wqes; 153 int max_srq_wqes;
154 int max_srq_sge;
154 int reserved_srqs; 155 int reserved_srqs;
155 int num_eecs; 156 int num_eecs;
156 int reserved_eecs; 157 int reserved_eecs;
@@ -172,6 +173,7 @@ struct mthca_limits {
172 int reserved_pds; 173 int reserved_pds;
173 u32 page_size_cap; 174 u32 page_size_cap;
174 u32 flags; 175 u32 flags;
176 u16 stat_rate_support;
175 u8 port_width_cap; 177 u8 port_width_cap;
176}; 178};
177 179
@@ -353,10 +355,24 @@ struct mthca_dev {
353 struct ib_mad_agent *send_agent[MTHCA_MAX_PORTS][2]; 355 struct ib_mad_agent *send_agent[MTHCA_MAX_PORTS][2];
354 struct ib_ah *sm_ah[MTHCA_MAX_PORTS]; 356 struct ib_ah *sm_ah[MTHCA_MAX_PORTS];
355 spinlock_t sm_lock; 357 spinlock_t sm_lock;
358 u8 rate[MTHCA_MAX_PORTS];
356}; 359};
357 360
358#define mthca_dbg(mdev, format, arg...) \ 361#ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
359 dev_dbg(&mdev->pdev->dev, format, ## arg) 362extern int mthca_debug_level;
363
364#define mthca_dbg(mdev, format, arg...) \
365 do { \
366 if (mthca_debug_level) \
367 dev_printk(KERN_DEBUG, &mdev->pdev->dev, format, ## arg); \
368 } while (0)
369
370#else /* CONFIG_INFINIBAND_MTHCA_DEBUG */
371
372#define mthca_dbg(mdev, format, arg...) do { (void) mdev; } while (0)
373
374#endif /* CONFIG_INFINIBAND_MTHCA_DEBUG */
375
360#define mthca_err(mdev, format, arg...) \ 376#define mthca_err(mdev, format, arg...) \
361 dev_err(&mdev->pdev->dev, format, ## arg) 377 dev_err(&mdev->pdev->dev, format, ## arg)
362#define mthca_info(mdev, format, arg...) \ 378#define mthca_info(mdev, format, arg...) \
@@ -492,6 +508,7 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
492int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, 508int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
493 enum ib_srq_attr_mask attr_mask); 509 enum ib_srq_attr_mask attr_mask);
494int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); 510int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
511int mthca_max_srq_sge(struct mthca_dev *dev);
495void mthca_srq_event(struct mthca_dev *dev, u32 srqn, 512void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
496 enum ib_event_type event_type); 513 enum ib_event_type event_type);
497void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); 514void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr);
@@ -542,6 +559,8 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
542 struct ib_ud_header *header); 559 struct ib_ud_header *header);
543int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr); 560int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr);
544int mthca_ah_grh_present(struct mthca_ah *ah); 561int mthca_ah_grh_present(struct mthca_ah *ah);
562u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port);
563enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port);
545 564
546int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); 565int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
547int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); 566int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index dfb482eac9a2..f235c7ea42f0 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -49,6 +49,30 @@ enum {
49 MTHCA_VENDOR_CLASS2 = 0xa 49 MTHCA_VENDOR_CLASS2 = 0xa
50}; 50};
51 51
52int mthca_update_rate(struct mthca_dev *dev, u8 port_num)
53{
54 struct ib_port_attr *tprops = NULL;
55 int ret;
56
57 tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
58 if (!tprops)
59 return -ENOMEM;
60
61 ret = ib_query_port(&dev->ib_dev, port_num, tprops);
62 if (ret) {
63 printk(KERN_WARNING "ib_query_port failed (%d) for %s port %d\n",
64 ret, dev->ib_dev.name, port_num);
65 goto out;
66 }
67
68 dev->rate[port_num - 1] = tprops->active_speed *
69 ib_width_enum_to_int(tprops->active_width);
70
71out:
72 kfree(tprops);
73 return ret;
74}
75
52static void update_sm_ah(struct mthca_dev *dev, 76static void update_sm_ah(struct mthca_dev *dev,
53 u8 port_num, u16 lid, u8 sl) 77 u8 port_num, u16 lid, u8 sl)
54{ 78{
@@ -90,6 +114,7 @@ static void smp_snoop(struct ib_device *ibdev,
90 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && 114 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
91 mad->mad_hdr.method == IB_MGMT_METHOD_SET) { 115 mad->mad_hdr.method == IB_MGMT_METHOD_SET) {
92 if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) { 116 if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
117 mthca_update_rate(to_mdev(ibdev), port_num);
93 update_sm_ah(to_mdev(ibdev), port_num, 118 update_sm_ah(to_mdev(ibdev), port_num,
94 be16_to_cpup((__be16 *) (mad->data + 58)), 119 be16_to_cpup((__be16 *) (mad->data + 58)),
95 (*(u8 *) (mad->data + 76)) & 0xf); 120 (*(u8 *) (mad->data + 76)) & 0xf);
@@ -246,6 +271,7 @@ int mthca_create_agents(struct mthca_dev *dev)
246{ 271{
247 struct ib_mad_agent *agent; 272 struct ib_mad_agent *agent;
248 int p, q; 273 int p, q;
274 int ret;
249 275
250 spin_lock_init(&dev->sm_lock); 276 spin_lock_init(&dev->sm_lock);
251 277
@@ -255,11 +281,23 @@ int mthca_create_agents(struct mthca_dev *dev)
255 q ? IB_QPT_GSI : IB_QPT_SMI, 281 q ? IB_QPT_GSI : IB_QPT_SMI,
256 NULL, 0, send_handler, 282 NULL, 0, send_handler,
257 NULL, NULL); 283 NULL, NULL);
258 if (IS_ERR(agent)) 284 if (IS_ERR(agent)) {
285 ret = PTR_ERR(agent);
259 goto err; 286 goto err;
287 }
260 dev->send_agent[p][q] = agent; 288 dev->send_agent[p][q] = agent;
261 } 289 }
262 290
291
292 for (p = 1; p <= dev->limits.num_ports; ++p) {
293 ret = mthca_update_rate(dev, p);
294 if (ret) {
295 mthca_err(dev, "Failed to obtain port %d rate."
296 " aborting.\n", p);
297 goto err;
298 }
299 }
300
263 return 0; 301 return 0;
264 302
265err: 303err:
@@ -268,7 +306,7 @@ err:
268 if (dev->send_agent[p][q]) 306 if (dev->send_agent[p][q])
269 ib_unregister_mad_agent(dev->send_agent[p][q]); 307 ib_unregister_mad_agent(dev->send_agent[p][q]);
270 308
271 return PTR_ERR(agent); 309 return ret;
272} 310}
273 311
274void __devexit mthca_free_agents(struct mthca_dev *dev) 312void __devexit mthca_free_agents(struct mthca_dev *dev)
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 266f347c6707..9b9ff7bff357 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -52,6 +52,14 @@ MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver");
52MODULE_LICENSE("Dual BSD/GPL"); 52MODULE_LICENSE("Dual BSD/GPL");
53MODULE_VERSION(DRV_VERSION); 53MODULE_VERSION(DRV_VERSION);
54 54
55#ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
56
57int mthca_debug_level = 0;
58module_param_named(debug_level, mthca_debug_level, int, 0644);
59MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
60
61#endif /* CONFIG_INFINIBAND_MTHCA_DEBUG */
62
55#ifdef CONFIG_PCI_MSI 63#ifdef CONFIG_PCI_MSI
56 64
57static int msi_x = 0; 65static int msi_x = 0;
@@ -69,6 +77,10 @@ MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero");
69 77
70#endif /* CONFIG_PCI_MSI */ 78#endif /* CONFIG_PCI_MSI */
71 79
80static int tune_pci = 0;
81module_param(tune_pci, int, 0444);
82MODULE_PARM_DESC(tune_pci, "increase PCI burst from the default set by BIOS if nonzero");
83
72static const char mthca_version[] __devinitdata = 84static const char mthca_version[] __devinitdata =
73 DRV_NAME ": Mellanox InfiniBand HCA driver v" 85 DRV_NAME ": Mellanox InfiniBand HCA driver v"
74 DRV_VERSION " (" DRV_RELDATE ")\n"; 86 DRV_VERSION " (" DRV_RELDATE ")\n";
@@ -90,6 +102,9 @@ static int __devinit mthca_tune_pci(struct mthca_dev *mdev)
90 int cap; 102 int cap;
91 u16 val; 103 u16 val;
92 104
105 if (!tune_pci)
106 return 0;
107
93 /* First try to max out Read Byte Count */ 108 /* First try to max out Read Byte Count */
94 cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX); 109 cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX);
95 if (cap) { 110 if (cap) {
@@ -176,6 +191,7 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim
176 mdev->limits.reserved_srqs = dev_lim->reserved_srqs; 191 mdev->limits.reserved_srqs = dev_lim->reserved_srqs;
177 mdev->limits.reserved_eecs = dev_lim->reserved_eecs; 192 mdev->limits.reserved_eecs = dev_lim->reserved_eecs;
178 mdev->limits.max_desc_sz = dev_lim->max_desc_sz; 193 mdev->limits.max_desc_sz = dev_lim->max_desc_sz;
194 mdev->limits.max_srq_sge = mthca_max_srq_sge(mdev);
179 /* 195 /*
180 * Subtract 1 from the limit because we need to allocate a 196 * Subtract 1 from the limit because we need to allocate a
181 * spare CQE so the HCA HW can tell the difference between an 197 * spare CQE so the HCA HW can tell the difference between an
@@ -191,6 +207,18 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim
191 mdev->limits.port_width_cap = dev_lim->max_port_width; 207 mdev->limits.port_width_cap = dev_lim->max_port_width;
192 mdev->limits.page_size_cap = ~(u32) (dev_lim->min_page_sz - 1); 208 mdev->limits.page_size_cap = ~(u32) (dev_lim->min_page_sz - 1);
193 mdev->limits.flags = dev_lim->flags; 209 mdev->limits.flags = dev_lim->flags;
210 /*
211 * For old FW that doesn't return static rate support, use a
212 * value of 0x3 (only static rate values of 0 or 1 are handled),
213 * except on Sinai, where even old FW can handle static rate
214 * values of 2 and 3.
215 */
216 if (dev_lim->stat_rate_support)
217 mdev->limits.stat_rate_support = dev_lim->stat_rate_support;
218 else if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
219 mdev->limits.stat_rate_support = 0xf;
220 else
221 mdev->limits.stat_rate_support = 0x3;
194 222
195 /* IB_DEVICE_RESIZE_MAX_WR not supported by driver. 223 /* IB_DEVICE_RESIZE_MAX_WR not supported by driver.
196 May be doable since hardware supports it for SRQ. 224 May be doable since hardware supports it for SRQ.
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 2c250bc11c33..565a24b1756f 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -106,7 +106,7 @@ static int mthca_query_device(struct ib_device *ibdev,
106 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; 106 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
107 props->max_srq = mdev->limits.num_srqs - mdev->limits.reserved_srqs; 107 props->max_srq = mdev->limits.num_srqs - mdev->limits.reserved_srqs;
108 props->max_srq_wr = mdev->limits.max_srq_wqes; 108 props->max_srq_wr = mdev->limits.max_srq_wqes;
109 props->max_srq_sge = mdev->limits.max_sg; 109 props->max_srq_sge = mdev->limits.max_srq_sge;
110 props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay; 110 props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay;
111 props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? 111 props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ?
112 IB_ATOMIC_HCA : IB_ATOMIC_NONE; 112 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 2e7f52136965..6676a786d690 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -257,6 +257,8 @@ struct mthca_qp {
257 atomic_t refcount; 257 atomic_t refcount;
258 u32 qpn; 258 u32 qpn;
259 int is_direct; 259 int is_direct;
260 u8 port; /* for SQP and memfree use only */
261 u8 alt_port; /* for memfree use only */
260 u8 transport; 262 u8 transport;
261 u8 state; 263 u8 state;
262 u8 atomic_rd_en; 264 u8 atomic_rd_en;
@@ -278,7 +280,6 @@ struct mthca_qp {
278 280
279struct mthca_sqp { 281struct mthca_sqp {
280 struct mthca_qp qp; 282 struct mthca_qp qp;
281 int port;
282 int pkey_index; 283 int pkey_index;
283 u32 qkey; 284 u32 qkey;
284 u32 send_psn; 285 u32 send_psn;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 057c8e6af87b..f37b0e367323 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -248,6 +248,9 @@ void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
248 return; 248 return;
249 } 249 }
250 250
251 if (event_type == IB_EVENT_PATH_MIG)
252 qp->port = qp->alt_port;
253
251 event.device = &dev->ib_dev; 254 event.device = &dev->ib_dev;
252 event.event = event_type; 255 event.event = event_type;
253 event.element.qp = &qp->ibqp; 256 event.element.qp = &qp->ibqp;
@@ -392,10 +395,16 @@ static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr,
392{ 395{
393 memset(ib_ah_attr, 0, sizeof *path); 396 memset(ib_ah_attr, 0, sizeof *path);
394 ib_ah_attr->port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3; 397 ib_ah_attr->port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3;
398
399 if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->limits.num_ports)
400 return;
401
395 ib_ah_attr->dlid = be16_to_cpu(path->rlid); 402 ib_ah_attr->dlid = be16_to_cpu(path->rlid);
396 ib_ah_attr->sl = be32_to_cpu(path->sl_tclass_flowlabel) >> 28; 403 ib_ah_attr->sl = be32_to_cpu(path->sl_tclass_flowlabel) >> 28;
397 ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f; 404 ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f;
398 ib_ah_attr->static_rate = path->static_rate & 0x7; 405 ib_ah_attr->static_rate = mthca_rate_to_ib(dev,
406 path->static_rate & 0x7,
407 ib_ah_attr->port_num);
399 ib_ah_attr->ah_flags = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0; 408 ib_ah_attr->ah_flags = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
400 if (ib_ah_attr->ah_flags) { 409 if (ib_ah_attr->ah_flags) {
401 ib_ah_attr->grh.sgid_index = path->mgid_index & (dev->limits.gid_table_len - 1); 410 ib_ah_attr->grh.sgid_index = path->mgid_index & (dev->limits.gid_table_len - 1);
@@ -455,8 +464,10 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
455 qp_attr->cap.max_recv_sge = qp->rq.max_gs; 464 qp_attr->cap.max_recv_sge = qp->rq.max_gs;
456 qp_attr->cap.max_inline_data = qp->max_inline_data; 465 qp_attr->cap.max_inline_data = qp->max_inline_data;
457 466
458 to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path); 467 if (qp->transport == RC || qp->transport == UC) {
459 to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path); 468 to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
469 to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
470 }
460 471
461 qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f; 472 qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
462 qp_attr->alt_pkey_index = be32_to_cpu(context->alt_path.port_pkey) & 0x7f; 473 qp_attr->alt_pkey_index = be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
@@ -484,11 +495,11 @@ out:
484} 495}
485 496
486static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah, 497static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah,
487 struct mthca_qp_path *path) 498 struct mthca_qp_path *path, u8 port)
488{ 499{
489 path->g_mylmc = ah->src_path_bits & 0x7f; 500 path->g_mylmc = ah->src_path_bits & 0x7f;
490 path->rlid = cpu_to_be16(ah->dlid); 501 path->rlid = cpu_to_be16(ah->dlid);
491 path->static_rate = !!ah->static_rate; 502 path->static_rate = mthca_get_rate(dev, ah->static_rate, port);
492 503
493 if (ah->ah_flags & IB_AH_GRH) { 504 if (ah->ah_flags & IB_AH_GRH) {
494 if (ah->grh.sgid_index >= dev->limits.gid_table_len) { 505 if (ah->grh.sgid_index >= dev->limits.gid_table_len) {
@@ -634,7 +645,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
634 645
635 if (qp->transport == MLX) 646 if (qp->transport == MLX)
636 qp_context->pri_path.port_pkey |= 647 qp_context->pri_path.port_pkey |=
637 cpu_to_be32(to_msqp(qp)->port << 24); 648 cpu_to_be32(qp->port << 24);
638 else { 649 else {
639 if (attr_mask & IB_QP_PORT) { 650 if (attr_mask & IB_QP_PORT) {
640 qp_context->pri_path.port_pkey |= 651 qp_context->pri_path.port_pkey |=
@@ -657,7 +668,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
657 } 668 }
658 669
659 if (attr_mask & IB_QP_AV) { 670 if (attr_mask & IB_QP_AV) {
660 if (mthca_path_set(dev, &attr->ah_attr, &qp_context->pri_path)) 671 if (mthca_path_set(dev, &attr->ah_attr, &qp_context->pri_path,
672 attr_mask & IB_QP_PORT ? attr->port_num : qp->port))
661 return -EINVAL; 673 return -EINVAL;
662 674
663 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH); 675 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
@@ -681,7 +693,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
681 return -EINVAL; 693 return -EINVAL;
682 } 694 }
683 695
684 if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path)) 696 if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path,
697 attr->alt_ah_attr.port_num))
685 return -EINVAL; 698 return -EINVAL;
686 699
687 qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index | 700 qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index |
@@ -791,6 +804,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
791 qp->atomic_rd_en = attr->qp_access_flags; 804 qp->atomic_rd_en = attr->qp_access_flags;
792 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 805 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
793 qp->resp_depth = attr->max_dest_rd_atomic; 806 qp->resp_depth = attr->max_dest_rd_atomic;
807 if (attr_mask & IB_QP_PORT)
808 qp->port = attr->port_num;
809 if (attr_mask & IB_QP_ALT_PATH)
810 qp->alt_port = attr->alt_port_num;
794 811
795 if (is_sqp(dev, qp)) 812 if (is_sqp(dev, qp))
796 store_attrs(to_msqp(qp), attr, attr_mask); 813 store_attrs(to_msqp(qp), attr, attr_mask);
@@ -802,13 +819,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
802 if (is_qp0(dev, qp)) { 819 if (is_qp0(dev, qp)) {
803 if (cur_state != IB_QPS_RTR && 820 if (cur_state != IB_QPS_RTR &&
804 new_state == IB_QPS_RTR) 821 new_state == IB_QPS_RTR)
805 init_port(dev, to_msqp(qp)->port); 822 init_port(dev, qp->port);
806 823
807 if (cur_state != IB_QPS_RESET && 824 if (cur_state != IB_QPS_RESET &&
808 cur_state != IB_QPS_ERR && 825 cur_state != IB_QPS_ERR &&
809 (new_state == IB_QPS_RESET || 826 (new_state == IB_QPS_RESET ||
810 new_state == IB_QPS_ERR)) 827 new_state == IB_QPS_ERR))
811 mthca_CLOSE_IB(dev, to_msqp(qp)->port, &status); 828 mthca_CLOSE_IB(dev, qp->port, &status);
812 } 829 }
813 830
814 /* 831 /*
@@ -1212,6 +1229,9 @@ int mthca_alloc_qp(struct mthca_dev *dev,
1212 if (qp->qpn == -1) 1229 if (qp->qpn == -1)
1213 return -ENOMEM; 1230 return -ENOMEM;
1214 1231
1232 /* initialize port to zero for error-catching. */
1233 qp->port = 0;
1234
1215 err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, 1235 err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
1216 send_policy, qp); 1236 send_policy, qp);
1217 if (err) { 1237 if (err) {
@@ -1261,7 +1281,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
1261 if (err) 1281 if (err)
1262 goto err_out; 1282 goto err_out;
1263 1283
1264 sqp->port = port; 1284 sqp->qp.port = port;
1265 sqp->qp.qpn = mqpn; 1285 sqp->qp.qpn = mqpn;
1266 sqp->qp.transport = MLX; 1286 sqp->qp.transport = MLX;
1267 1287
@@ -1404,10 +1424,10 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
1404 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; 1424 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
1405 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); 1425 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
1406 if (!sqp->qp.ibqp.qp_num) 1426 if (!sqp->qp.ibqp.qp_num)
1407 ib_get_cached_pkey(&dev->ib_dev, sqp->port, 1427 ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
1408 sqp->pkey_index, &pkey); 1428 sqp->pkey_index, &pkey);
1409 else 1429 else
1410 ib_get_cached_pkey(&dev->ib_dev, sqp->port, 1430 ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
1411 wr->wr.ud.pkey_index, &pkey); 1431 wr->wr.ud.pkey_index, &pkey);
1412 sqp->ud_header.bth.pkey = cpu_to_be16(pkey); 1432 sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
1413 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); 1433 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 2dd3aea05341..adcaf85355ae 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -192,7 +192,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
192 192
193 /* Sanity check SRQ size before proceeding */ 193 /* Sanity check SRQ size before proceeding */
194 if (attr->max_wr > dev->limits.max_srq_wqes || 194 if (attr->max_wr > dev->limits.max_srq_wqes ||
195 attr->max_sge > dev->limits.max_sg) 195 attr->max_sge > dev->limits.max_srq_sge)
196 return -EINVAL; 196 return -EINVAL;
197 197
198 srq->max = attr->max_wr; 198 srq->max = attr->max_wr;
@@ -660,6 +660,31 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
660 return err; 660 return err;
661} 661}
662 662
663int mthca_max_srq_sge(struct mthca_dev *dev)
664{
665 if (mthca_is_memfree(dev))
666 return dev->limits.max_sg;
667
668 /*
669 * SRQ allocations are based on powers of 2 for Tavor,
670 * (although they only need to be multiples of 16 bytes).
671 *
672 * Therefore, we need to base the max number of sg entries on
673 * the largest power of 2 descriptor size that is <= to the
674 * actual max WQE descriptor size, rather than return the
675 * max_sg value given by the firmware (which is based on WQE
676 * sizes as multiples of 16, not powers of 2).
677 *
678 * If SRQ implementation is changed for Tavor to be based on
679 * multiples of 16, the calculation below can be deleted and
680 * the FW max_sg value returned.
681 */
682 return min_t(int, dev->limits.max_sg,
683 ((1 << (fls(dev->limits.max_desc_sz) - 1)) -
684 sizeof (struct mthca_next_seg)) /
685 sizeof (struct mthca_data_seg));
686}
687
663int __devinit mthca_init_srq_table(struct mthca_dev *dev) 688int __devinit mthca_init_srq_table(struct mthca_dev *dev)
664{ 689{
665 int err; 690 int err;
diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig
index 8d2e04cac68e..13d6d01c72c0 100644
--- a/drivers/infiniband/ulp/ipoib/Kconfig
+++ b/drivers/infiniband/ulp/ipoib/Kconfig
@@ -10,8 +10,9 @@ config INFINIBAND_IPOIB
10 group: <http://www.ietf.org/html.charters/ipoib-charter.html>. 10 group: <http://www.ietf.org/html.charters/ipoib-charter.html>.
11 11
12config INFINIBAND_IPOIB_DEBUG 12config INFINIBAND_IPOIB_DEBUG
13 bool "IP-over-InfiniBand debugging" 13 bool "IP-over-InfiniBand debugging" if EMBEDDED
14 depends on INFINIBAND_IPOIB 14 depends on INFINIBAND_IPOIB
15 default y
15 ---help--- 16 ---help---
16 This option causes debugging code to be compiled into the 17 This option causes debugging code to be compiled into the
17 IPoIB driver. The output can be turned on via the 18 IPoIB driver. The output can be turned on via the
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index b640107fb732..12a1e0572ef2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -65,6 +65,8 @@ enum {
65 65
66 IPOIB_RX_RING_SIZE = 128, 66 IPOIB_RX_RING_SIZE = 128,
67 IPOIB_TX_RING_SIZE = 64, 67 IPOIB_TX_RING_SIZE = 64,
68 IPOIB_MAX_QUEUE_SIZE = 8192,
69 IPOIB_MIN_QUEUE_SIZE = 2,
68 70
69 IPOIB_NUM_WC = 4, 71 IPOIB_NUM_WC = 4,
70 72
@@ -230,6 +232,9 @@ static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh)
230 INFINIBAND_ALEN, sizeof(void *)); 232 INFINIBAND_ALEN, sizeof(void *));
231} 233}
232 234
235struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh);
236void ipoib_neigh_free(struct ipoib_neigh *neigh);
237
233extern struct workqueue_struct *ipoib_workqueue; 238extern struct workqueue_struct *ipoib_workqueue;
234 239
235/* functions */ 240/* functions */
@@ -329,6 +334,8 @@ static inline void ipoib_unregister_debugfs(void) { }
329#define ipoib_warn(priv, format, arg...) \ 334#define ipoib_warn(priv, format, arg...) \
330 ipoib_printk(KERN_WARNING, priv, format , ## arg) 335 ipoib_printk(KERN_WARNING, priv, format , ## arg)
331 336
337extern int ipoib_sendq_size;
338extern int ipoib_recvq_size;
332 339
333#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 340#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
334extern int ipoib_debug_level; 341extern int ipoib_debug_level;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 685258e34034..5dde380e8dbe 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -213,7 +213,7 @@ static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr)
213 gid_buf, path.pathrec.dlid ? "yes" : "no"); 213 gid_buf, path.pathrec.dlid ? "yes" : "no");
214 214
215 if (path.pathrec.dlid) { 215 if (path.pathrec.dlid) {
216 rate = ib_sa_rate_enum_to_int(path.pathrec.rate) * 25; 216 rate = ib_rate_to_mult(path.pathrec.rate) * 25;
217 217
218 seq_printf(file, 218 seq_printf(file,
219 " DLID: 0x%04x\n" 219 " DLID: 0x%04x\n"
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index ed65202878d8..a54da42849ae 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -161,7 +161,7 @@ static int ipoib_ib_post_receives(struct net_device *dev)
161 struct ipoib_dev_priv *priv = netdev_priv(dev); 161 struct ipoib_dev_priv *priv = netdev_priv(dev);
162 int i; 162 int i;
163 163
164 for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) { 164 for (i = 0; i < ipoib_recvq_size; ++i) {
165 if (ipoib_alloc_rx_skb(dev, i)) { 165 if (ipoib_alloc_rx_skb(dev, i)) {
166 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); 166 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
167 return -ENOMEM; 167 return -ENOMEM;
@@ -187,7 +187,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
187 if (wr_id & IPOIB_OP_RECV) { 187 if (wr_id & IPOIB_OP_RECV) {
188 wr_id &= ~IPOIB_OP_RECV; 188 wr_id &= ~IPOIB_OP_RECV;
189 189
190 if (wr_id < IPOIB_RX_RING_SIZE) { 190 if (wr_id < ipoib_recvq_size) {
191 struct sk_buff *skb = priv->rx_ring[wr_id].skb; 191 struct sk_buff *skb = priv->rx_ring[wr_id].skb;
192 dma_addr_t addr = priv->rx_ring[wr_id].mapping; 192 dma_addr_t addr = priv->rx_ring[wr_id].mapping;
193 193
@@ -252,9 +252,9 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
252 struct ipoib_tx_buf *tx_req; 252 struct ipoib_tx_buf *tx_req;
253 unsigned long flags; 253 unsigned long flags;
254 254
255 if (wr_id >= IPOIB_TX_RING_SIZE) { 255 if (wr_id >= ipoib_sendq_size) {
256 ipoib_warn(priv, "completion event with wrid %d (> %d)\n", 256 ipoib_warn(priv, "completion event with wrid %d (> %d)\n",
257 wr_id, IPOIB_TX_RING_SIZE); 257 wr_id, ipoib_sendq_size);
258 return; 258 return;
259 } 259 }
260 260
@@ -275,7 +275,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
275 spin_lock_irqsave(&priv->tx_lock, flags); 275 spin_lock_irqsave(&priv->tx_lock, flags);
276 ++priv->tx_tail; 276 ++priv->tx_tail;
277 if (netif_queue_stopped(dev) && 277 if (netif_queue_stopped(dev) &&
278 priv->tx_head - priv->tx_tail <= IPOIB_TX_RING_SIZE / 2) 278 priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
279 netif_wake_queue(dev); 279 netif_wake_queue(dev);
280 spin_unlock_irqrestore(&priv->tx_lock, flags); 280 spin_unlock_irqrestore(&priv->tx_lock, flags);
281 281
@@ -344,13 +344,13 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
344 * means we have to make sure everything is properly recorded and 344 * means we have to make sure everything is properly recorded and
345 * our state is consistent before we call post_send(). 345 * our state is consistent before we call post_send().
346 */ 346 */
347 tx_req = &priv->tx_ring[priv->tx_head & (IPOIB_TX_RING_SIZE - 1)]; 347 tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
348 tx_req->skb = skb; 348 tx_req->skb = skb;
349 addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len, 349 addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len,
350 DMA_TO_DEVICE); 350 DMA_TO_DEVICE);
351 pci_unmap_addr_set(tx_req, mapping, addr); 351 pci_unmap_addr_set(tx_req, mapping, addr);
352 352
353 if (unlikely(post_send(priv, priv->tx_head & (IPOIB_TX_RING_SIZE - 1), 353 if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
354 address->ah, qpn, addr, skb->len))) { 354 address->ah, qpn, addr, skb->len))) {
355 ipoib_warn(priv, "post_send failed\n"); 355 ipoib_warn(priv, "post_send failed\n");
356 ++priv->stats.tx_errors; 356 ++priv->stats.tx_errors;
@@ -363,7 +363,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
363 address->last_send = priv->tx_head; 363 address->last_send = priv->tx_head;
364 ++priv->tx_head; 364 ++priv->tx_head;
365 365
366 if (priv->tx_head - priv->tx_tail == IPOIB_TX_RING_SIZE) { 366 if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) {
367 ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); 367 ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
368 netif_stop_queue(dev); 368 netif_stop_queue(dev);
369 } 369 }
@@ -488,7 +488,7 @@ static int recvs_pending(struct net_device *dev)
488 int pending = 0; 488 int pending = 0;
489 int i; 489 int i;
490 490
491 for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) 491 for (i = 0; i < ipoib_recvq_size; ++i)
492 if (priv->rx_ring[i].skb) 492 if (priv->rx_ring[i].skb)
493 ++pending; 493 ++pending;
494 494
@@ -527,7 +527,7 @@ int ipoib_ib_dev_stop(struct net_device *dev)
527 */ 527 */
528 while ((int) priv->tx_tail - (int) priv->tx_head < 0) { 528 while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
529 tx_req = &priv->tx_ring[priv->tx_tail & 529 tx_req = &priv->tx_ring[priv->tx_tail &
530 (IPOIB_TX_RING_SIZE - 1)]; 530 (ipoib_sendq_size - 1)];
531 dma_unmap_single(priv->ca->dma_device, 531 dma_unmap_single(priv->ca->dma_device,
532 pci_unmap_addr(tx_req, mapping), 532 pci_unmap_addr(tx_req, mapping),
533 tx_req->skb->len, 533 tx_req->skb->len,
@@ -536,7 +536,7 @@ int ipoib_ib_dev_stop(struct net_device *dev)
536 ++priv->tx_tail; 536 ++priv->tx_tail;
537 } 537 }
538 538
539 for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) 539 for (i = 0; i < ipoib_recvq_size; ++i)
540 if (priv->rx_ring[i].skb) { 540 if (priv->rx_ring[i].skb) {
541 dma_unmap_single(priv->ca->dma_device, 541 dma_unmap_single(priv->ca->dma_device,
542 pci_unmap_addr(&priv->rx_ring[i], 542 pci_unmap_addr(&priv->rx_ring[i],
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 9b0bd7c746ca..cb078a7d0bf5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -41,6 +41,7 @@
41#include <linux/init.h> 41#include <linux/init.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/vmalloc.h> 43#include <linux/vmalloc.h>
44#include <linux/kernel.h>
44 45
45#include <linux/if_arp.h> /* For ARPHRD_xxx */ 46#include <linux/if_arp.h> /* For ARPHRD_xxx */
46 47
@@ -53,6 +54,14 @@ MODULE_AUTHOR("Roland Dreier");
53MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); 54MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
54MODULE_LICENSE("Dual BSD/GPL"); 55MODULE_LICENSE("Dual BSD/GPL");
55 56
57int ipoib_sendq_size __read_mostly = IPOIB_TX_RING_SIZE;
58int ipoib_recvq_size __read_mostly = IPOIB_RX_RING_SIZE;
59
60module_param_named(send_queue_size, ipoib_sendq_size, int, 0444);
61MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
62module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
63MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
64
56#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 65#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
57int ipoib_debug_level; 66int ipoib_debug_level;
58 67
@@ -252,8 +261,8 @@ static void path_free(struct net_device *dev, struct ipoib_path *path)
252 */ 261 */
253 if (neigh->ah) 262 if (neigh->ah)
254 ipoib_put_ah(neigh->ah); 263 ipoib_put_ah(neigh->ah);
255 *to_ipoib_neigh(neigh->neighbour) = NULL; 264
256 kfree(neigh); 265 ipoib_neigh_free(neigh);
257 } 266 }
258 267
259 spin_unlock_irqrestore(&priv->lock, flags); 268 spin_unlock_irqrestore(&priv->lock, flags);
@@ -327,9 +336,8 @@ void ipoib_flush_paths(struct net_device *dev)
327 struct ipoib_dev_priv *priv = netdev_priv(dev); 336 struct ipoib_dev_priv *priv = netdev_priv(dev);
328 struct ipoib_path *path, *tp; 337 struct ipoib_path *path, *tp;
329 LIST_HEAD(remove_list); 338 LIST_HEAD(remove_list);
330 unsigned long flags;
331 339
332 spin_lock_irqsave(&priv->lock, flags); 340 spin_lock_irq(&priv->lock);
333 341
334 list_splice(&priv->path_list, &remove_list); 342 list_splice(&priv->path_list, &remove_list);
335 INIT_LIST_HEAD(&priv->path_list); 343 INIT_LIST_HEAD(&priv->path_list);
@@ -337,14 +345,15 @@ void ipoib_flush_paths(struct net_device *dev)
337 list_for_each_entry(path, &remove_list, list) 345 list_for_each_entry(path, &remove_list, list)
338 rb_erase(&path->rb_node, &priv->path_tree); 346 rb_erase(&path->rb_node, &priv->path_tree);
339 347
340 spin_unlock_irqrestore(&priv->lock, flags);
341
342 list_for_each_entry_safe(path, tp, &remove_list, list) { 348 list_for_each_entry_safe(path, tp, &remove_list, list) {
343 if (path->query) 349 if (path->query)
344 ib_sa_cancel_query(path->query_id, path->query); 350 ib_sa_cancel_query(path->query_id, path->query);
351 spin_unlock_irq(&priv->lock);
345 wait_for_completion(&path->done); 352 wait_for_completion(&path->done);
346 path_free(dev, path); 353 path_free(dev, path);
354 spin_lock_irq(&priv->lock);
347 } 355 }
356 spin_unlock_irq(&priv->lock);
348} 357}
349 358
350static void path_rec_completion(int status, 359static void path_rec_completion(int status,
@@ -373,16 +382,9 @@ static void path_rec_completion(int status,
373 struct ib_ah_attr av = { 382 struct ib_ah_attr av = {
374 .dlid = be16_to_cpu(pathrec->dlid), 383 .dlid = be16_to_cpu(pathrec->dlid),
375 .sl = pathrec->sl, 384 .sl = pathrec->sl,
376 .port_num = priv->port 385 .port_num = priv->port,
386 .static_rate = pathrec->rate
377 }; 387 };
378 int path_rate = ib_sa_rate_enum_to_int(pathrec->rate);
379
380 if (path_rate > 0 && priv->local_rate > path_rate)
381 av.static_rate = (priv->local_rate - 1) / path_rate;
382
383 ipoib_dbg(priv, "static_rate %d for local port %dX, path %dX\n",
384 av.static_rate, priv->local_rate,
385 ib_sa_rate_enum_to_int(pathrec->rate));
386 388
387 ah = ipoib_create_ah(dev, priv->pd, &av); 389 ah = ipoib_create_ah(dev, priv->pd, &av);
388 } 390 }
@@ -481,7 +483,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
481 struct ipoib_path *path; 483 struct ipoib_path *path;
482 struct ipoib_neigh *neigh; 484 struct ipoib_neigh *neigh;
483 485
484 neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); 486 neigh = ipoib_neigh_alloc(skb->dst->neighbour);
485 if (!neigh) { 487 if (!neigh) {
486 ++priv->stats.tx_dropped; 488 ++priv->stats.tx_dropped;
487 dev_kfree_skb_any(skb); 489 dev_kfree_skb_any(skb);
@@ -489,8 +491,6 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
489 } 491 }
490 492
491 skb_queue_head_init(&neigh->queue); 493 skb_queue_head_init(&neigh->queue);
492 neigh->neighbour = skb->dst->neighbour;
493 *to_ipoib_neigh(skb->dst->neighbour) = neigh;
494 494
495 /* 495 /*
496 * We can only be called from ipoib_start_xmit, so we're 496 * We can only be called from ipoib_start_xmit, so we're
@@ -503,7 +503,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
503 path = path_rec_create(dev, 503 path = path_rec_create(dev,
504 (union ib_gid *) (skb->dst->neighbour->ha + 4)); 504 (union ib_gid *) (skb->dst->neighbour->ha + 4));
505 if (!path) 505 if (!path)
506 goto err; 506 goto err_path;
507 507
508 __path_add(dev, path); 508 __path_add(dev, path);
509 } 509 }
@@ -521,17 +521,17 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
521 __skb_queue_tail(&neigh->queue, skb); 521 __skb_queue_tail(&neigh->queue, skb);
522 522
523 if (!path->query && path_rec_start(dev, path)) 523 if (!path->query && path_rec_start(dev, path))
524 goto err; 524 goto err_list;
525 } 525 }
526 526
527 spin_unlock(&priv->lock); 527 spin_unlock(&priv->lock);
528 return; 528 return;
529 529
530err: 530err_list:
531 *to_ipoib_neigh(skb->dst->neighbour) = NULL;
532 list_del(&neigh->list); 531 list_del(&neigh->list);
533 kfree(neigh);
534 532
533err_path:
534 ipoib_neigh_free(neigh);
535 ++priv->stats.tx_dropped; 535 ++priv->stats.tx_dropped;
536 dev_kfree_skb_any(skb); 536 dev_kfree_skb_any(skb);
537 537
@@ -763,8 +763,7 @@ static void ipoib_neigh_destructor(struct neighbour *n)
763 if (neigh->ah) 763 if (neigh->ah)
764 ah = neigh->ah; 764 ah = neigh->ah;
765 list_del(&neigh->list); 765 list_del(&neigh->list);
766 *to_ipoib_neigh(n) = NULL; 766 ipoib_neigh_free(neigh);
767 kfree(neigh);
768 } 767 }
769 768
770 spin_unlock_irqrestore(&priv->lock, flags); 769 spin_unlock_irqrestore(&priv->lock, flags);
@@ -773,6 +772,26 @@ static void ipoib_neigh_destructor(struct neighbour *n)
773 ipoib_put_ah(ah); 772 ipoib_put_ah(ah);
774} 773}
775 774
775struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour)
776{
777 struct ipoib_neigh *neigh;
778
779 neigh = kmalloc(sizeof *neigh, GFP_ATOMIC);
780 if (!neigh)
781 return NULL;
782
783 neigh->neighbour = neighbour;
784 *to_ipoib_neigh(neighbour) = neigh;
785
786 return neigh;
787}
788
789void ipoib_neigh_free(struct ipoib_neigh *neigh)
790{
791 *to_ipoib_neigh(neigh->neighbour) = NULL;
792 kfree(neigh);
793}
794
776static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms) 795static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms)
777{ 796{
778 parms->neigh_destructor = ipoib_neigh_destructor; 797 parms->neigh_destructor = ipoib_neigh_destructor;
@@ -785,20 +804,19 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
785 struct ipoib_dev_priv *priv = netdev_priv(dev); 804 struct ipoib_dev_priv *priv = netdev_priv(dev);
786 805
787 /* Allocate RX/TX "rings" to hold queued skbs */ 806 /* Allocate RX/TX "rings" to hold queued skbs */
788 807 priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
789 priv->rx_ring = kzalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf),
790 GFP_KERNEL); 808 GFP_KERNEL);
791 if (!priv->rx_ring) { 809 if (!priv->rx_ring) {
792 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", 810 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
793 ca->name, IPOIB_RX_RING_SIZE); 811 ca->name, ipoib_recvq_size);
794 goto out; 812 goto out;
795 } 813 }
796 814
797 priv->tx_ring = kzalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf), 815 priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring,
798 GFP_KERNEL); 816 GFP_KERNEL);
799 if (!priv->tx_ring) { 817 if (!priv->tx_ring) {
800 printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", 818 printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
801 ca->name, IPOIB_TX_RING_SIZE); 819 ca->name, ipoib_sendq_size);
802 goto out_rx_ring_cleanup; 820 goto out_rx_ring_cleanup;
803 } 821 }
804 822
@@ -866,7 +884,7 @@ static void ipoib_setup(struct net_device *dev)
866 dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN; 884 dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN;
867 dev->addr_len = INFINIBAND_ALEN; 885 dev->addr_len = INFINIBAND_ALEN;
868 dev->type = ARPHRD_INFINIBAND; 886 dev->type = ARPHRD_INFINIBAND;
869 dev->tx_queue_len = IPOIB_TX_RING_SIZE * 2; 887 dev->tx_queue_len = ipoib_sendq_size * 2;
870 dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; 888 dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX;
871 889
872 /* MTU will be reset when mcast join happens */ 890 /* MTU will be reset when mcast join happens */
@@ -1118,6 +1136,14 @@ static int __init ipoib_init_module(void)
1118{ 1136{
1119 int ret; 1137 int ret;
1120 1138
1139 ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size);
1140 ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE);
1141 ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE);
1142
1143 ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size);
1144 ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE);
1145 ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE);
1146
1121 ret = ipoib_register_debugfs(); 1147 ret = ipoib_register_debugfs();
1122 if (ret) 1148 if (ret)
1123 return ret; 1149 return ret;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 93c462eaf4fd..1dae4b238252 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -114,8 +114,7 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
114 */ 114 */
115 if (neigh->ah) 115 if (neigh->ah)
116 ipoib_put_ah(neigh->ah); 116 ipoib_put_ah(neigh->ah);
117 *to_ipoib_neigh(neigh->neighbour) = NULL; 117 ipoib_neigh_free(neigh);
118 kfree(neigh);
119 } 118 }
120 119
121 spin_unlock_irqrestore(&priv->lock, flags); 120 spin_unlock_irqrestore(&priv->lock, flags);
@@ -251,6 +250,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
251 .port_num = priv->port, 250 .port_num = priv->port,
252 .sl = mcast->mcmember.sl, 251 .sl = mcast->mcmember.sl,
253 .ah_flags = IB_AH_GRH, 252 .ah_flags = IB_AH_GRH,
253 .static_rate = mcast->mcmember.rate,
254 .grh = { 254 .grh = {
255 .flow_label = be32_to_cpu(mcast->mcmember.flow_label), 255 .flow_label = be32_to_cpu(mcast->mcmember.flow_label),
256 .hop_limit = mcast->mcmember.hop_limit, 256 .hop_limit = mcast->mcmember.hop_limit,
@@ -258,17 +258,8 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
258 .traffic_class = mcast->mcmember.traffic_class 258 .traffic_class = mcast->mcmember.traffic_class
259 } 259 }
260 }; 260 };
261 int path_rate = ib_sa_rate_enum_to_int(mcast->mcmember.rate);
262
263 av.grh.dgid = mcast->mcmember.mgid; 261 av.grh.dgid = mcast->mcmember.mgid;
264 262
265 if (path_rate > 0 && priv->local_rate > path_rate)
266 av.static_rate = (priv->local_rate - 1) / path_rate;
267
268 ipoib_dbg_mcast(priv, "static_rate %d for local port %dX, mcmember %dX\n",
269 av.static_rate, priv->local_rate,
270 ib_sa_rate_enum_to_int(mcast->mcmember.rate));
271
272 ah = ipoib_create_ah(dev, priv->pd, &av); 263 ah = ipoib_create_ah(dev, priv->pd, &av);
273 if (!ah) { 264 if (!ah) {
274 ipoib_warn(priv, "ib_address_create failed\n"); 265 ipoib_warn(priv, "ib_address_create failed\n");
@@ -618,6 +609,22 @@ int ipoib_mcast_start_thread(struct net_device *dev)
618 return 0; 609 return 0;
619} 610}
620 611
612static void wait_for_mcast_join(struct ipoib_dev_priv *priv,
613 struct ipoib_mcast *mcast)
614{
615 spin_lock_irq(&priv->lock);
616 if (mcast && mcast->query) {
617 ib_sa_cancel_query(mcast->query_id, mcast->query);
618 mcast->query = NULL;
619 spin_unlock_irq(&priv->lock);
620 ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n",
621 IPOIB_GID_ARG(mcast->mcmember.mgid));
622 wait_for_completion(&mcast->done);
623 }
624 else
625 spin_unlock_irq(&priv->lock);
626}
627
621int ipoib_mcast_stop_thread(struct net_device *dev, int flush) 628int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
622{ 629{
623 struct ipoib_dev_priv *priv = netdev_priv(dev); 630 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -637,28 +644,10 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
637 if (flush) 644 if (flush)
638 flush_workqueue(ipoib_workqueue); 645 flush_workqueue(ipoib_workqueue);
639 646
640 spin_lock_irq(&priv->lock); 647 wait_for_mcast_join(priv, priv->broadcast);
641 if (priv->broadcast && priv->broadcast->query) {
642 ib_sa_cancel_query(priv->broadcast->query_id, priv->broadcast->query);
643 priv->broadcast->query = NULL;
644 spin_unlock_irq(&priv->lock);
645 ipoib_dbg_mcast(priv, "waiting for bcast\n");
646 wait_for_completion(&priv->broadcast->done);
647 } else
648 spin_unlock_irq(&priv->lock);
649 648
650 list_for_each_entry(mcast, &priv->multicast_list, list) { 649 list_for_each_entry(mcast, &priv->multicast_list, list)
651 spin_lock_irq(&priv->lock); 650 wait_for_mcast_join(priv, mcast);
652 if (mcast->query) {
653 ib_sa_cancel_query(mcast->query_id, mcast->query);
654 mcast->query = NULL;
655 spin_unlock_irq(&priv->lock);
656 ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n",
657 IPOIB_GID_ARG(mcast->mcmember.mgid));
658 wait_for_completion(&mcast->done);
659 } else
660 spin_unlock_irq(&priv->lock);
661 }
662 651
663 return 0; 652 return 0;
664} 653}
@@ -772,13 +761,11 @@ out:
772 if (skb->dst && 761 if (skb->dst &&
773 skb->dst->neighbour && 762 skb->dst->neighbour &&
774 !*to_ipoib_neigh(skb->dst->neighbour)) { 763 !*to_ipoib_neigh(skb->dst->neighbour)) {
775 struct ipoib_neigh *neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); 764 struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour);
776 765
777 if (neigh) { 766 if (neigh) {
778 kref_get(&mcast->ah->ref); 767 kref_get(&mcast->ah->ref);
779 neigh->ah = mcast->ah; 768 neigh->ah = mcast->ah;
780 neigh->neighbour = skb->dst->neighbour;
781 *to_ipoib_neigh(skb->dst->neighbour) = neigh;
782 list_add_tail(&neigh->list, &mcast->neigh_list); 769 list_add_tail(&neigh->list, &mcast->neigh_list);
783 } 770 }
784 } 771 }
@@ -913,6 +900,7 @@ void ipoib_mcast_restart_task(void *dev_ptr)
913 900
914 /* We have to cancel outside of the spinlock */ 901 /* We have to cancel outside of the spinlock */
915 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 902 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
903 wait_for_mcast_join(priv, mcast);
916 ipoib_mcast_leave(mcast->dev, mcast); 904 ipoib_mcast_leave(mcast->dev, mcast);
917 ipoib_mcast_free(mcast); 905 ipoib_mcast_free(mcast);
918 } 906 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 5f0388027b25..1d49d1643c59 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -159,8 +159,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
159 struct ipoib_dev_priv *priv = netdev_priv(dev); 159 struct ipoib_dev_priv *priv = netdev_priv(dev);
160 struct ib_qp_init_attr init_attr = { 160 struct ib_qp_init_attr init_attr = {
161 .cap = { 161 .cap = {
162 .max_send_wr = IPOIB_TX_RING_SIZE, 162 .max_send_wr = ipoib_sendq_size,
163 .max_recv_wr = IPOIB_RX_RING_SIZE, 163 .max_recv_wr = ipoib_recvq_size,
164 .max_send_sge = 1, 164 .max_send_sge = 1,
165 .max_recv_sge = 1 165 .max_recv_sge = 1
166 }, 166 },
@@ -175,7 +175,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
175 } 175 }
176 176
177 priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, 177 priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev,
178 IPOIB_TX_RING_SIZE + IPOIB_RX_RING_SIZE + 1); 178 ipoib_sendq_size + ipoib_recvq_size + 1);
179 if (IS_ERR(priv->cq)) { 179 if (IS_ERR(priv->cq)) {
180 printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); 180 printk(KERN_WARNING "%s: failed to create CQ\n", ca->name);
181 goto out_free_pd; 181 goto out_free_pd;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index fd8a95a9c5d3..5f2b3f6e4c47 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1434,6 +1434,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
1434 p = match_strdup(args); 1434 p = match_strdup(args);
1435 if (strlen(p) != 32) { 1435 if (strlen(p) != 32) {
1436 printk(KERN_WARNING PFX "bad dest GID parameter '%s'\n", p); 1436 printk(KERN_WARNING PFX "bad dest GID parameter '%s'\n", p);
1437 kfree(p);
1437 goto out; 1438 goto out;
1438 } 1439 }
1439 1440
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index f404fe21cc21..ad63c215efe5 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -91,34 +91,6 @@ enum ib_sa_selector {
91 IB_SA_BEST = 3 91 IB_SA_BEST = 3
92}; 92};
93 93
94enum ib_sa_rate {
95 IB_SA_RATE_2_5_GBPS = 2,
96 IB_SA_RATE_5_GBPS = 5,
97 IB_SA_RATE_10_GBPS = 3,
98 IB_SA_RATE_20_GBPS = 6,
99 IB_SA_RATE_30_GBPS = 4,
100 IB_SA_RATE_40_GBPS = 7,
101 IB_SA_RATE_60_GBPS = 8,
102 IB_SA_RATE_80_GBPS = 9,
103 IB_SA_RATE_120_GBPS = 10
104};
105
106static inline int ib_sa_rate_enum_to_int(enum ib_sa_rate rate)
107{
108 switch (rate) {
109 case IB_SA_RATE_2_5_GBPS: return 1;
110 case IB_SA_RATE_5_GBPS: return 2;
111 case IB_SA_RATE_10_GBPS: return 4;
112 case IB_SA_RATE_20_GBPS: return 8;
113 case IB_SA_RATE_30_GBPS: return 12;
114 case IB_SA_RATE_40_GBPS: return 16;
115 case IB_SA_RATE_60_GBPS: return 24;
116 case IB_SA_RATE_80_GBPS: return 32;
117 case IB_SA_RATE_120_GBPS: return 48;
118 default: return -1;
119 }
120}
121
122/* 94/*
123 * Structures for SA records are named "struct ib_sa_xxx_rec." No 95 * Structures for SA records are named "struct ib_sa_xxx_rec." No
124 * attempt is made to pack structures to match the physical layout of 96 * attempt is made to pack structures to match the physical layout of
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index c1ad6273ac6c..6bbf1b364400 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -314,6 +314,34 @@ enum ib_ah_flags {
314 IB_AH_GRH = 1 314 IB_AH_GRH = 1
315}; 315};
316 316
317enum ib_rate {
318 IB_RATE_PORT_CURRENT = 0,
319 IB_RATE_2_5_GBPS = 2,
320 IB_RATE_5_GBPS = 5,
321 IB_RATE_10_GBPS = 3,
322 IB_RATE_20_GBPS = 6,
323 IB_RATE_30_GBPS = 4,
324 IB_RATE_40_GBPS = 7,
325 IB_RATE_60_GBPS = 8,
326 IB_RATE_80_GBPS = 9,
327 IB_RATE_120_GBPS = 10
328};
329
330/**
331 * ib_rate_to_mult - Convert the IB rate enum to a multiple of the
332 * base rate of 2.5 Gbit/sec. For example, IB_RATE_5_GBPS will be
333 * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec.
334 * @rate: rate to convert.
335 */
336int ib_rate_to_mult(enum ib_rate rate) __attribute_const__;
337
338/**
339 * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate
340 * enum.
341 * @mult: multiple to convert.
342 */
343enum ib_rate mult_to_ib_rate(int mult) __attribute_const__;
344
317struct ib_ah_attr { 345struct ib_ah_attr {
318 struct ib_global_route grh; 346 struct ib_global_route grh;
319 u16 dlid; 347 u16 dlid;