aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-12-16 13:32:31 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-16 13:32:31 -0500
commite69381b4175ba162229646f6753ff1d87c24d468 (patch)
treeac4c03f6a0a1a0426832aa4f5c3b7732080c51cc
parent238ccbb050a243e935bb3fc679c2e4bbff7004aa (diff)
parent14f369d1d61e7ac6578c54ca9ce3caaf4072412c (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (45 commits) RDMA/cxgb3: Fix error paths in post_send and post_recv RDMA/nes: Fix stale ARP issue RDMA/nes: FIN during MPA startup causes timeout RDMA/nes: Free kmap() resources RDMA/nes: Check for zero STag RDMA/nes: Fix Xansation test crash on cm_node ref_count RDMA/nes: Abnormal listener exit causes loopback node crash RDMA/nes: Fix crash in nes_accept() RDMA/nes: Resource not freed for REJECTed connections RDMA/nes: MPA request/response error checking RDMA/nes: Fix query of ORD values RDMA/nes: Fix MAX_CM_BUFFER define RDMA/nes: Pass correct size to ioremap_nocache() RDMA/nes: Update copyright and branding string RDMA/nes: Add max_cqe check to nes_create_cq() RDMA/nes: Clean up struct nes_qp RDMA/nes: Implement IB_SIGNAL_ALL_WR as an iWARP extension RDMA/nes: Add additional SFP+ PHY uC status check and PHY reset RDMA/nes: Correct fast memory registration implementation IB/ehca: Fix error paths in post_send and post_recv ...
-rw-r--r--Documentation/infiniband/ipoib.txt10
-rw-r--r--drivers/infiniband/core/addr.c275
-rw-r--r--drivers/infiniband/core/cma.c133
-rw-r--r--drivers/infiniband/core/sa_query.c6
-rw-r--r--drivers/infiniband/core/ucma.c57
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c2
-rw-r--r--drivers/infiniband/hw/amso1100/c2_qp.c14
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c32
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_eq.c9
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c67
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c10
-rw-r--r--drivers/infiniband/hw/mlx4/main.c2
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c25
-rw-r--r--drivers/infiniband/hw/nes/Kconfig9
-rw-r--r--drivers/infiniband/hw/nes/nes.c5
-rw-r--r--drivers/infiniband/hw/nes/nes.h2
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c201
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.h7
-rw-r--r--drivers/infiniband/hw/nes/nes_context.h2
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c40
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.h29
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_user.h3
-rw-r--r--drivers/infiniband/hw/nes/nes_utils.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c817
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.h23
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c1
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c122
-rw-r--r--drivers/net/mlx4/fw.c3
-rw-r--r--include/linux/mlx4/device.h1
-rw-r--r--include/rdma/ib_addr.h36
-rw-r--r--include/rdma/ib_sa.h6
-rw-r--r--include/rdma/ib_user_sa.h16
-rw-r--r--include/rdma/ib_verbs.h5
-rw-r--r--include/rdma/rdma_user_cm.h6
-rw-r--r--net/rds/ib.c4
-rw-r--r--net/rds/iw.c4
39 files changed, 1084 insertions, 907 deletions
diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt
index 6d40f00b358c..64eeb55d0c09 100644
--- a/Documentation/infiniband/ipoib.txt
+++ b/Documentation/infiniband/ipoib.txt
@@ -36,11 +36,11 @@ Datagram vs Connected modes
36 fabric with a 2K MTU, the IPoIB MTU will be 2048 - 4 = 2044 bytes. 36 fabric with a 2K MTU, the IPoIB MTU will be 2048 - 4 = 2044 bytes.
37 37
38 In connected mode, the IB RC (Reliable Connected) transport is used. 38 In connected mode, the IB RC (Reliable Connected) transport is used.
39 Connected mode is to takes advantage of the connected nature of the 39 Connected mode takes advantage of the connected nature of the IB
40 IB transport and allows an MTU up to the maximal IP packet size of 40 transport and allows an MTU up to the maximal IP packet size of 64K,
41 64K, which reduces the number of IP packets needed for handling 41 which reduces the number of IP packets needed for handling large UDP
42 large UDP datagrams, TCP segments, etc and increases the performance 42 datagrams, TCP segments, etc and increases the performance for large
43 for large messages. 43 messages.
44 44
45 In connected mode, the interface's UD QP is still used for multicast 45 In connected mode, the interface's UD QP is still used for multicast
46 and communication with peers that don't support connected mode. In 46 and communication with peers that don't support connected mode. In
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index bd07803e9183..abbb06996f9e 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -36,7 +36,6 @@
36#include <linux/mutex.h> 36#include <linux/mutex.h>
37#include <linux/inetdevice.h> 37#include <linux/inetdevice.h>
38#include <linux/workqueue.h> 38#include <linux/workqueue.h>
39#include <linux/if_arp.h>
40#include <net/arp.h> 39#include <net/arp.h>
41#include <net/neighbour.h> 40#include <net/neighbour.h>
42#include <net/route.h> 41#include <net/route.h>
@@ -92,22 +91,12 @@ EXPORT_SYMBOL(rdma_addr_unregister_client);
92int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, 91int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
93 const unsigned char *dst_dev_addr) 92 const unsigned char *dst_dev_addr)
94{ 93{
95 switch (dev->type) { 94 dev_addr->dev_type = dev->type;
96 case ARPHRD_INFINIBAND:
97 dev_addr->dev_type = RDMA_NODE_IB_CA;
98 break;
99 case ARPHRD_ETHER:
100 dev_addr->dev_type = RDMA_NODE_RNIC;
101 break;
102 default:
103 return -EADDRNOTAVAIL;
104 }
105
106 memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 95 memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
107 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); 96 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
108 if (dst_dev_addr) 97 if (dst_dev_addr)
109 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); 98 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
110 dev_addr->src_dev = dev; 99 dev_addr->bound_dev_if = dev->ifindex;
111 return 0; 100 return 0;
112} 101}
113EXPORT_SYMBOL(rdma_copy_addr); 102EXPORT_SYMBOL(rdma_copy_addr);
@@ -117,6 +106,15 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
117 struct net_device *dev; 106 struct net_device *dev;
118 int ret = -EADDRNOTAVAIL; 107 int ret = -EADDRNOTAVAIL;
119 108
109 if (dev_addr->bound_dev_if) {
110 dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
111 if (!dev)
112 return -ENODEV;
113 ret = rdma_copy_addr(dev_addr, dev, NULL);
114 dev_put(dev);
115 return ret;
116 }
117
120 switch (addr->sa_family) { 118 switch (addr->sa_family) {
121 case AF_INET: 119 case AF_INET:
122 dev = ip_dev_find(&init_net, 120 dev = ip_dev_find(&init_net,
@@ -131,6 +129,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
131 129
132#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 130#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
133 case AF_INET6: 131 case AF_INET6:
132 read_lock(&dev_base_lock);
134 for_each_netdev(&init_net, dev) { 133 for_each_netdev(&init_net, dev) {
135 if (ipv6_chk_addr(&init_net, 134 if (ipv6_chk_addr(&init_net,
136 &((struct sockaddr_in6 *) addr)->sin6_addr, 135 &((struct sockaddr_in6 *) addr)->sin6_addr,
@@ -139,6 +138,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
139 break; 138 break;
140 } 139 }
141 } 140 }
141 read_unlock(&dev_base_lock);
142 break; 142 break;
143#endif 143#endif
144 } 144 }
@@ -176,48 +176,9 @@ static void queue_req(struct addr_req *req)
176 mutex_unlock(&lock); 176 mutex_unlock(&lock);
177} 177}
178 178
179static void addr_send_arp(struct sockaddr *dst_in) 179static int addr4_resolve(struct sockaddr_in *src_in,
180{ 180 struct sockaddr_in *dst_in,
181 struct rtable *rt; 181 struct rdma_dev_addr *addr)
182 struct flowi fl;
183
184 memset(&fl, 0, sizeof fl);
185
186 switch (dst_in->sa_family) {
187 case AF_INET:
188 fl.nl_u.ip4_u.daddr =
189 ((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
190
191 if (ip_route_output_key(&init_net, &rt, &fl))
192 return;
193
194 neigh_event_send(rt->u.dst.neighbour, NULL);
195 ip_rt_put(rt);
196 break;
197
198#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
199 case AF_INET6:
200 {
201 struct dst_entry *dst;
202
203 fl.nl_u.ip6_u.daddr =
204 ((struct sockaddr_in6 *) dst_in)->sin6_addr;
205
206 dst = ip6_route_output(&init_net, NULL, &fl);
207 if (!dst)
208 return;
209
210 neigh_event_send(dst->neighbour, NULL);
211 dst_release(dst);
212 break;
213 }
214#endif
215 }
216}
217
218static int addr4_resolve_remote(struct sockaddr_in *src_in,
219 struct sockaddr_in *dst_in,
220 struct rdma_dev_addr *addr)
221{ 182{
222 __be32 src_ip = src_in->sin_addr.s_addr; 183 __be32 src_ip = src_in->sin_addr.s_addr;
223 __be32 dst_ip = dst_in->sin_addr.s_addr; 184 __be32 dst_ip = dst_in->sin_addr.s_addr;
@@ -229,10 +190,22 @@ static int addr4_resolve_remote(struct sockaddr_in *src_in,
229 memset(&fl, 0, sizeof fl); 190 memset(&fl, 0, sizeof fl);
230 fl.nl_u.ip4_u.daddr = dst_ip; 191 fl.nl_u.ip4_u.daddr = dst_ip;
231 fl.nl_u.ip4_u.saddr = src_ip; 192 fl.nl_u.ip4_u.saddr = src_ip;
193 fl.oif = addr->bound_dev_if;
194
232 ret = ip_route_output_key(&init_net, &rt, &fl); 195 ret = ip_route_output_key(&init_net, &rt, &fl);
233 if (ret) 196 if (ret)
234 goto out; 197 goto out;
235 198
199 src_in->sin_family = AF_INET;
200 src_in->sin_addr.s_addr = rt->rt_src;
201
202 if (rt->idev->dev->flags & IFF_LOOPBACK) {
203 ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
204 if (!ret)
205 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
206 goto put;
207 }
208
236 /* If the device does ARP internally, return 'done' */ 209 /* If the device does ARP internally, return 'done' */
237 if (rt->idev->dev->flags & IFF_NOARP) { 210 if (rt->idev->dev->flags & IFF_NOARP) {
238 rdma_copy_addr(addr, rt->idev->dev, NULL); 211 rdma_copy_addr(addr, rt->idev->dev, NULL);
@@ -240,21 +213,14 @@ static int addr4_resolve_remote(struct sockaddr_in *src_in,
240 } 213 }
241 214
242 neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev); 215 neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
243 if (!neigh) { 216 if (!neigh || !(neigh->nud_state & NUD_VALID)) {
217 neigh_event_send(rt->u.dst.neighbour, NULL);
244 ret = -ENODATA; 218 ret = -ENODATA;
219 if (neigh)
220 goto release;
245 goto put; 221 goto put;
246 } 222 }
247 223
248 if (!(neigh->nud_state & NUD_VALID)) {
249 ret = -ENODATA;
250 goto release;
251 }
252
253 if (!src_ip) {
254 src_in->sin_family = dst_in->sin_family;
255 src_in->sin_addr.s_addr = rt->rt_src;
256 }
257
258 ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); 224 ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
259release: 225release:
260 neigh_release(neigh); 226 neigh_release(neigh);
@@ -265,52 +231,77 @@ out:
265} 231}
266 232
267#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 233#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
268static int addr6_resolve_remote(struct sockaddr_in6 *src_in, 234static int addr6_resolve(struct sockaddr_in6 *src_in,
269 struct sockaddr_in6 *dst_in, 235 struct sockaddr_in6 *dst_in,
270 struct rdma_dev_addr *addr) 236 struct rdma_dev_addr *addr)
271{ 237{
272 struct flowi fl; 238 struct flowi fl;
273 struct neighbour *neigh; 239 struct neighbour *neigh;
274 struct dst_entry *dst; 240 struct dst_entry *dst;
275 int ret = -ENODATA; 241 int ret;
276 242
277 memset(&fl, 0, sizeof fl); 243 memset(&fl, 0, sizeof fl);
278 fl.nl_u.ip6_u.daddr = dst_in->sin6_addr; 244 ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr);
279 fl.nl_u.ip6_u.saddr = src_in->sin6_addr; 245 ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr);
246 fl.oif = addr->bound_dev_if;
280 247
281 dst = ip6_route_output(&init_net, NULL, &fl); 248 dst = ip6_route_output(&init_net, NULL, &fl);
282 if (!dst) 249 if ((ret = dst->error))
283 return ret; 250 goto put;
251
252 if (ipv6_addr_any(&fl.fl6_src)) {
253 ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
254 &fl.fl6_dst, 0, &fl.fl6_src);
255 if (ret)
256 goto put;
257
258 src_in->sin6_family = AF_INET6;
259 ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src);
260 }
261
262 if (dst->dev->flags & IFF_LOOPBACK) {
263 ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
264 if (!ret)
265 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
266 goto put;
267 }
284 268
269 /* If the device does ARP internally, return 'done' */
285 if (dst->dev->flags & IFF_NOARP) { 270 if (dst->dev->flags & IFF_NOARP) {
286 ret = rdma_copy_addr(addr, dst->dev, NULL); 271 ret = rdma_copy_addr(addr, dst->dev, NULL);
287 } else { 272 goto put;
288 neigh = dst->neighbour; 273 }
289 if (neigh && (neigh->nud_state & NUD_VALID)) 274
290 ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); 275 neigh = dst->neighbour;
276 if (!neigh || !(neigh->nud_state & NUD_VALID)) {
277 neigh_event_send(dst->neighbour, NULL);
278 ret = -ENODATA;
279 goto put;
291 } 280 }
292 281
282 ret = rdma_copy_addr(addr, dst->dev, neigh->ha);
283put:
293 dst_release(dst); 284 dst_release(dst);
294 return ret; 285 return ret;
295} 286}
296#else 287#else
297static int addr6_resolve_remote(struct sockaddr_in6 *src_in, 288static int addr6_resolve(struct sockaddr_in6 *src_in,
298 struct sockaddr_in6 *dst_in, 289 struct sockaddr_in6 *dst_in,
299 struct rdma_dev_addr *addr) 290 struct rdma_dev_addr *addr)
300{ 291{
301 return -EADDRNOTAVAIL; 292 return -EADDRNOTAVAIL;
302} 293}
303#endif 294#endif
304 295
305static int addr_resolve_remote(struct sockaddr *src_in, 296static int addr_resolve(struct sockaddr *src_in,
306 struct sockaddr *dst_in, 297 struct sockaddr *dst_in,
307 struct rdma_dev_addr *addr) 298 struct rdma_dev_addr *addr)
308{ 299{
309 if (src_in->sa_family == AF_INET) { 300 if (src_in->sa_family == AF_INET) {
310 return addr4_resolve_remote((struct sockaddr_in *) src_in, 301 return addr4_resolve((struct sockaddr_in *) src_in,
311 (struct sockaddr_in *) dst_in, addr); 302 (struct sockaddr_in *) dst_in, addr);
312 } else 303 } else
313 return addr6_resolve_remote((struct sockaddr_in6 *) src_in, 304 return addr6_resolve((struct sockaddr_in6 *) src_in,
314 (struct sockaddr_in6 *) dst_in, addr); 305 (struct sockaddr_in6 *) dst_in, addr);
315} 306}
316 307
@@ -327,8 +318,7 @@ static void process_req(struct work_struct *work)
327 if (req->status == -ENODATA) { 318 if (req->status == -ENODATA) {
328 src_in = (struct sockaddr *) &req->src_addr; 319 src_in = (struct sockaddr *) &req->src_addr;
329 dst_in = (struct sockaddr *) &req->dst_addr; 320 dst_in = (struct sockaddr *) &req->dst_addr;
330 req->status = addr_resolve_remote(src_in, dst_in, 321 req->status = addr_resolve(src_in, dst_in, req->addr);
331 req->addr);
332 if (req->status && time_after_eq(jiffies, req->timeout)) 322 if (req->status && time_after_eq(jiffies, req->timeout))
333 req->status = -ETIMEDOUT; 323 req->status = -ETIMEDOUT;
334 else if (req->status == -ENODATA) 324 else if (req->status == -ENODATA)
@@ -352,82 +342,6 @@ static void process_req(struct work_struct *work)
352 } 342 }
353} 343}
354 344
355static int addr_resolve_local(struct sockaddr *src_in,
356 struct sockaddr *dst_in,
357 struct rdma_dev_addr *addr)
358{
359 struct net_device *dev;
360 int ret;
361
362 switch (dst_in->sa_family) {
363 case AF_INET:
364 {
365 __be32 src_ip = ((struct sockaddr_in *) src_in)->sin_addr.s_addr;
366 __be32 dst_ip = ((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
367
368 dev = ip_dev_find(&init_net, dst_ip);
369 if (!dev)
370 return -EADDRNOTAVAIL;
371
372 if (ipv4_is_zeronet(src_ip)) {
373 src_in->sa_family = dst_in->sa_family;
374 ((struct sockaddr_in *) src_in)->sin_addr.s_addr = dst_ip;
375 ret = rdma_copy_addr(addr, dev, dev->dev_addr);
376 } else if (ipv4_is_loopback(src_ip)) {
377 ret = rdma_translate_ip(dst_in, addr);
378 if (!ret)
379 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
380 } else {
381 ret = rdma_translate_ip(src_in, addr);
382 if (!ret)
383 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
384 }
385 dev_put(dev);
386 break;
387 }
388
389#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
390 case AF_INET6:
391 {
392 struct in6_addr *a;
393
394 for_each_netdev(&init_net, dev)
395 if (ipv6_chk_addr(&init_net,
396 &((struct sockaddr_in6 *) dst_in)->sin6_addr,
397 dev, 1))
398 break;
399
400 if (!dev)
401 return -EADDRNOTAVAIL;
402
403 a = &((struct sockaddr_in6 *) src_in)->sin6_addr;
404
405 if (ipv6_addr_any(a)) {
406 src_in->sa_family = dst_in->sa_family;
407 ((struct sockaddr_in6 *) src_in)->sin6_addr =
408 ((struct sockaddr_in6 *) dst_in)->sin6_addr;
409 ret = rdma_copy_addr(addr, dev, dev->dev_addr);
410 } else if (ipv6_addr_loopback(a)) {
411 ret = rdma_translate_ip(dst_in, addr);
412 if (!ret)
413 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
414 } else {
415 ret = rdma_translate_ip(src_in, addr);
416 if (!ret)
417 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
418 }
419 break;
420 }
421#endif
422
423 default:
424 ret = -EADDRNOTAVAIL;
425 break;
426 }
427
428 return ret;
429}
430
431int rdma_resolve_ip(struct rdma_addr_client *client, 345int rdma_resolve_ip(struct rdma_addr_client *client,
432 struct sockaddr *src_addr, struct sockaddr *dst_addr, 346 struct sockaddr *src_addr, struct sockaddr *dst_addr,
433 struct rdma_dev_addr *addr, int timeout_ms, 347 struct rdma_dev_addr *addr, int timeout_ms,
@@ -443,22 +357,28 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
443 if (!req) 357 if (!req)
444 return -ENOMEM; 358 return -ENOMEM;
445 359
446 if (src_addr) 360 src_in = (struct sockaddr *) &req->src_addr;
447 memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr)); 361 dst_in = (struct sockaddr *) &req->dst_addr;
448 memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr)); 362
363 if (src_addr) {
364 if (src_addr->sa_family != dst_addr->sa_family) {
365 ret = -EINVAL;
366 goto err;
367 }
368
369 memcpy(src_in, src_addr, ip_addr_size(src_addr));
370 } else {
371 src_in->sa_family = dst_addr->sa_family;
372 }
373
374 memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
449 req->addr = addr; 375 req->addr = addr;
450 req->callback = callback; 376 req->callback = callback;
451 req->context = context; 377 req->context = context;
452 req->client = client; 378 req->client = client;
453 atomic_inc(&client->refcount); 379 atomic_inc(&client->refcount);
454 380
455 src_in = (struct sockaddr *) &req->src_addr; 381 req->status = addr_resolve(src_in, dst_in, addr);
456 dst_in = (struct sockaddr *) &req->dst_addr;
457
458 req->status = addr_resolve_local(src_in, dst_in, addr);
459 if (req->status == -EADDRNOTAVAIL)
460 req->status = addr_resolve_remote(src_in, dst_in, addr);
461
462 switch (req->status) { 382 switch (req->status) {
463 case 0: 383 case 0:
464 req->timeout = jiffies; 384 req->timeout = jiffies;
@@ -467,15 +387,16 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
467 case -ENODATA: 387 case -ENODATA:
468 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; 388 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
469 queue_req(req); 389 queue_req(req);
470 addr_send_arp(dst_in);
471 break; 390 break;
472 default: 391 default:
473 ret = req->status; 392 ret = req->status;
474 atomic_dec(&client->refcount); 393 atomic_dec(&client->refcount);
475 kfree(req); 394 goto err;
476 break;
477 } 395 }
478 return ret; 396 return ret;
397err:
398 kfree(req);
399 return ret;
479} 400}
480EXPORT_SYMBOL(rdma_resolve_ip); 401EXPORT_SYMBOL(rdma_resolve_ip);
481 402
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 075317884b53..fbdd73106000 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -330,17 +330,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
330 union ib_gid gid; 330 union ib_gid gid;
331 int ret = -ENODEV; 331 int ret = -ENODEV;
332 332
333 switch (rdma_node_get_transport(dev_addr->dev_type)) { 333 rdma_addr_get_sgid(dev_addr, &gid);
334 case RDMA_TRANSPORT_IB:
335 ib_addr_get_sgid(dev_addr, &gid);
336 break;
337 case RDMA_TRANSPORT_IWARP:
338 iw_addr_get_sgid(dev_addr, &gid);
339 break;
340 default:
341 return -ENODEV;
342 }
343
344 list_for_each_entry(cma_dev, &dev_list, list) { 334 list_for_each_entry(cma_dev, &dev_list, list) {
345 ret = ib_find_cached_gid(cma_dev->device, &gid, 335 ret = ib_find_cached_gid(cma_dev->device, &gid,
346 &id_priv->id.port_num, NULL); 336 &id_priv->id.port_num, NULL);
@@ -1032,11 +1022,17 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
1032 if (rt->num_paths == 2) 1022 if (rt->num_paths == 2)
1033 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; 1023 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
1034 1024
1035 ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); 1025 if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
1036 ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr, 1026 rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
1037 &id->route.addr.dev_addr); 1027 rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
1038 if (ret) 1028 ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey);
1039 goto destroy_id; 1029 } else {
1030 ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
1031 &rt->addr.dev_addr);
1032 if (ret)
1033 goto destroy_id;
1034 }
1035 rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
1040 1036
1041 id_priv = container_of(id, struct rdma_id_private, id); 1037 id_priv = container_of(id, struct rdma_id_private, id);
1042 id_priv->state = CMA_CONNECT; 1038 id_priv->state = CMA_CONNECT;
@@ -1071,10 +1067,12 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
1071 cma_save_net_info(&id->route.addr, &listen_id->route.addr, 1067 cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1072 ip_ver, port, src, dst); 1068 ip_ver, port, src, dst);
1073 1069
1074 ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr, 1070 if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
1075 &id->route.addr.dev_addr); 1071 ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
1076 if (ret) 1072 &id->route.addr.dev_addr);
1077 goto err; 1073 if (ret)
1074 goto err;
1075 }
1078 1076
1079 id_priv = container_of(id, struct rdma_id_private, id); 1077 id_priv = container_of(id, struct rdma_id_private, id);
1080 id_priv->state = CMA_CONNECT; 1078 id_priv->state = CMA_CONNECT;
@@ -1474,15 +1472,6 @@ static void cma_listen_on_all(struct rdma_id_private *id_priv)
1474 mutex_unlock(&lock); 1472 mutex_unlock(&lock);
1475} 1473}
1476 1474
1477static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
1478{
1479 struct sockaddr_storage addr_in;
1480
1481 memset(&addr_in, 0, sizeof addr_in);
1482 addr_in.ss_family = af;
1483 return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
1484}
1485
1486int rdma_listen(struct rdma_cm_id *id, int backlog) 1475int rdma_listen(struct rdma_cm_id *id, int backlog)
1487{ 1476{
1488 struct rdma_id_private *id_priv; 1477 struct rdma_id_private *id_priv;
@@ -1490,7 +1479,8 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
1490 1479
1491 id_priv = container_of(id, struct rdma_id_private, id); 1480 id_priv = container_of(id, struct rdma_id_private, id);
1492 if (id_priv->state == CMA_IDLE) { 1481 if (id_priv->state == CMA_IDLE) {
1493 ret = cma_bind_any(id, AF_INET); 1482 ((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
1483 ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
1494 if (ret) 1484 if (ret)
1495 return ret; 1485 return ret;
1496 } 1486 }
@@ -1565,8 +1555,8 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1565 struct sockaddr_in6 *sin6; 1555 struct sockaddr_in6 *sin6;
1566 1556
1567 memset(&path_rec, 0, sizeof path_rec); 1557 memset(&path_rec, 0, sizeof path_rec);
1568 ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid); 1558 rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
1569 ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid); 1559 rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
1570 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr)); 1560 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
1571 path_rec.numb_path = 1; 1561 path_rec.numb_path = 1;
1572 path_rec.reversible = 1; 1562 path_rec.reversible = 1;
@@ -1781,7 +1771,11 @@ port_found:
1781 if (ret) 1771 if (ret)
1782 goto out; 1772 goto out;
1783 1773
1784 ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); 1774 id_priv->id.route.addr.dev_addr.dev_type =
1775 (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB) ?
1776 ARPHRD_INFINIBAND : ARPHRD_ETHER;
1777
1778 rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1785 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); 1779 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
1786 id_priv->id.port_num = p; 1780 id_priv->id.port_num = p;
1787 cma_attach_to_dev(id_priv, cma_dev); 1781 cma_attach_to_dev(id_priv, cma_dev);
@@ -1839,7 +1833,7 @@ out:
1839static int cma_resolve_loopback(struct rdma_id_private *id_priv) 1833static int cma_resolve_loopback(struct rdma_id_private *id_priv)
1840{ 1834{
1841 struct cma_work *work; 1835 struct cma_work *work;
1842 struct sockaddr_in *src_in, *dst_in; 1836 struct sockaddr *src, *dst;
1843 union ib_gid gid; 1837 union ib_gid gid;
1844 int ret; 1838 int ret;
1845 1839
@@ -1853,14 +1847,19 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
1853 goto err; 1847 goto err;
1854 } 1848 }
1855 1849
1856 ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); 1850 rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1857 ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); 1851 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
1858 1852
1859 if (cma_zero_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)) { 1853 src = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
1860 src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr; 1854 if (cma_zero_addr(src)) {
1861 dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr; 1855 dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
1862 src_in->sin_family = dst_in->sin_family; 1856 if ((src->sa_family = dst->sa_family) == AF_INET) {
1863 src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr; 1857 ((struct sockaddr_in *) src)->sin_addr.s_addr =
1858 ((struct sockaddr_in *) dst)->sin_addr.s_addr;
1859 } else {
1860 ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr,
1861 &((struct sockaddr_in6 *) dst)->sin6_addr);
1862 }
1864 } 1863 }
1865 1864
1866 work->id = id_priv; 1865 work->id = id_priv;
@@ -1878,10 +1877,14 @@ err:
1878static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 1877static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1879 struct sockaddr *dst_addr) 1878 struct sockaddr *dst_addr)
1880{ 1879{
1881 if (src_addr && src_addr->sa_family) 1880 if (!src_addr || !src_addr->sa_family) {
1882 return rdma_bind_addr(id, src_addr); 1881 src_addr = (struct sockaddr *) &id->route.addr.src_addr;
1883 else 1882 if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) {
1884 return cma_bind_any(id, dst_addr->sa_family); 1883 ((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
1884 ((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
1885 }
1886 }
1887 return rdma_bind_addr(id, src_addr);
1885} 1888}
1886 1889
1887int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 1890int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
@@ -2077,6 +2080,25 @@ static int cma_get_port(struct rdma_id_private *id_priv)
2077 return ret; 2080 return ret;
2078} 2081}
2079 2082
2083static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
2084 struct sockaddr *addr)
2085{
2086#if defined(CONFIG_IPv6) || defined(CONFIG_IPV6_MODULE)
2087 struct sockaddr_in6 *sin6;
2088
2089 if (addr->sa_family != AF_INET6)
2090 return 0;
2091
2092 sin6 = (struct sockaddr_in6 *) addr;
2093 if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
2094 !sin6->sin6_scope_id)
2095 return -EINVAL;
2096
2097 dev_addr->bound_dev_if = sin6->sin6_scope_id;
2098#endif
2099 return 0;
2100}
2101
2080int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) 2102int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2081{ 2103{
2082 struct rdma_id_private *id_priv; 2104 struct rdma_id_private *id_priv;
@@ -2089,7 +2111,13 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2089 if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND)) 2111 if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
2090 return -EINVAL; 2112 return -EINVAL;
2091 2113
2092 if (!cma_any_addr(addr)) { 2114 ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
2115 if (ret)
2116 goto err1;
2117
2118 if (cma_loopback_addr(addr)) {
2119 ret = cma_bind_loopback(id_priv);
2120 } else if (!cma_zero_addr(addr)) {
2093 ret = rdma_translate_ip(addr, &id->route.addr.dev_addr); 2121 ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
2094 if (ret) 2122 if (ret)
2095 goto err1; 2123 goto err1;
@@ -2108,7 +2136,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2108 2136
2109 return 0; 2137 return 0;
2110err2: 2138err2:
2111 if (!cma_any_addr(addr)) { 2139 if (id_priv->cma_dev) {
2112 mutex_lock(&lock); 2140 mutex_lock(&lock);
2113 cma_detach_from_dev(id_priv); 2141 cma_detach_from_dev(id_priv);
2114 mutex_unlock(&lock); 2142 mutex_unlock(&lock);
@@ -2687,10 +2715,15 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
2687 if (cma_any_addr(addr)) { 2715 if (cma_any_addr(addr)) {
2688 memset(mgid, 0, sizeof *mgid); 2716 memset(mgid, 0, sizeof *mgid);
2689 } else if ((addr->sa_family == AF_INET6) && 2717 } else if ((addr->sa_family == AF_INET6) &&
2690 ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) == 2718 ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
2691 0xFF10A01B)) { 2719 0xFF10A01B)) {
2692 /* IPv6 address is an SA assigned MGID. */ 2720 /* IPv6 address is an SA assigned MGID. */
2693 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 2721 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
2722 } else if ((addr->sa_family == AF_INET6)) {
2723 ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
2724 if (id_priv->id.ps == RDMA_PS_UDP)
2725 mc_map[7] = 0x01; /* Use RDMA CM signature */
2726 *mgid = *(union ib_gid *) (mc_map + 4);
2694 } else { 2727 } else {
2695 ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); 2728 ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
2696 if (id_priv->id.ps == RDMA_PS_UDP) 2729 if (id_priv->id.ps == RDMA_PS_UDP)
@@ -2716,7 +2749,7 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
2716 cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); 2749 cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
2717 if (id_priv->id.ps == RDMA_PS_UDP) 2750 if (id_priv->id.ps == RDMA_PS_UDP)
2718 rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); 2751 rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
2719 ib_addr_get_sgid(dev_addr, &rec.port_gid); 2752 rdma_addr_get_sgid(dev_addr, &rec.port_gid);
2720 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 2753 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
2721 rec.join_state = 1; 2754 rec.join_state = 1;
2722 2755
@@ -2815,7 +2848,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
2815 2848
2816 dev_addr = &id_priv->id.route.addr.dev_addr; 2849 dev_addr = &id_priv->id.route.addr.dev_addr;
2817 2850
2818 if ((dev_addr->src_dev == ndev) && 2851 if ((dev_addr->bound_dev_if == ndev->ifindex) &&
2819 memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { 2852 memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
2820 printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n", 2853 printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
2821 ndev->name, &id_priv->id); 2854 ndev->name, &id_priv->id);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 82543716d59e..7e1ffd8ccd5c 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -604,6 +604,12 @@ retry:
604 return ret ? ret : id; 604 return ret ? ret : id;
605} 605}
606 606
607void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
608{
609 ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
610}
611EXPORT_SYMBOL(ib_sa_unpack_path);
612
607static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, 613static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
608 int status, 614 int status,
609 struct ib_sa_mad *mad) 615 struct ib_sa_mad *mad)
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index bb96d3c4b0f4..b2e16c332d5b 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -43,6 +43,7 @@
43#include <rdma/rdma_user_cm.h> 43#include <rdma/rdma_user_cm.h>
44#include <rdma/ib_marshall.h> 44#include <rdma/ib_marshall.h>
45#include <rdma/rdma_cm.h> 45#include <rdma/rdma_cm.h>
46#include <rdma/rdma_cm_ib.h>
46 47
47MODULE_AUTHOR("Sean Hefty"); 48MODULE_AUTHOR("Sean Hefty");
48MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); 49MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
@@ -562,10 +563,10 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
562 switch (route->num_paths) { 563 switch (route->num_paths) {
563 case 0: 564 case 0:
564 dev_addr = &route->addr.dev_addr; 565 dev_addr = &route->addr.dev_addr;
565 ib_addr_get_dgid(dev_addr, 566 rdma_addr_get_dgid(dev_addr,
566 (union ib_gid *) &resp->ib_route[0].dgid); 567 (union ib_gid *) &resp->ib_route[0].dgid);
567 ib_addr_get_sgid(dev_addr, 568 rdma_addr_get_sgid(dev_addr,
568 (union ib_gid *) &resp->ib_route[0].sgid); 569 (union ib_gid *) &resp->ib_route[0].sgid);
569 resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 570 resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
570 break; 571 break;
571 case 2: 572 case 2:
@@ -812,6 +813,51 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
812 return ret; 813 return ret;
813} 814}
814 815
816static int ucma_set_ib_path(struct ucma_context *ctx,
817 struct ib_path_rec_data *path_data, size_t optlen)
818{
819 struct ib_sa_path_rec sa_path;
820 struct rdma_cm_event event;
821 int ret;
822
823 if (optlen % sizeof(*path_data))
824 return -EINVAL;
825
826 for (; optlen; optlen -= sizeof(*path_data), path_data++) {
827 if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY |
828 IB_PATH_BIDIRECTIONAL))
829 break;
830 }
831
832 if (!optlen)
833 return -EINVAL;
834
835 ib_sa_unpack_path(path_data->path_rec, &sa_path);
836 ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
837 if (ret)
838 return ret;
839
840 memset(&event, 0, sizeof event);
841 event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
842 return ucma_event_handler(ctx->cm_id, &event);
843}
844
845static int ucma_set_option_ib(struct ucma_context *ctx, int optname,
846 void *optval, size_t optlen)
847{
848 int ret;
849
850 switch (optname) {
851 case RDMA_OPTION_IB_PATH:
852 ret = ucma_set_ib_path(ctx, optval, optlen);
853 break;
854 default:
855 ret = -ENOSYS;
856 }
857
858 return ret;
859}
860
815static int ucma_set_option_level(struct ucma_context *ctx, int level, 861static int ucma_set_option_level(struct ucma_context *ctx, int level,
816 int optname, void *optval, size_t optlen) 862 int optname, void *optval, size_t optlen)
817{ 863{
@@ -821,6 +867,9 @@ static int ucma_set_option_level(struct ucma_context *ctx, int level,
821 case RDMA_OPTION_ID: 867 case RDMA_OPTION_ID:
822 ret = ucma_set_option_id(ctx, optname, optval, optlen); 868 ret = ucma_set_option_id(ctx, optname, optval, optlen);
823 break; 869 break;
870 case RDMA_OPTION_IB:
871 ret = ucma_set_option_ib(ctx, optname, optval, optlen);
872 break;
824 default: 873 default:
825 ret = -ENOSYS; 874 ret = -ENOSYS;
826 } 875 }
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 56feab6c251e..112d3970222a 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -285,7 +285,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
285 285
286 ucontext = ibdev->alloc_ucontext(ibdev, &udata); 286 ucontext = ibdev->alloc_ucontext(ibdev, &udata);
287 if (IS_ERR(ucontext)) { 287 if (IS_ERR(ucontext)) {
288 ret = PTR_ERR(file->ucontext); 288 ret = PTR_ERR(ucontext);
289 goto err; 289 goto err;
290 } 290 }
291 291
diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c
index a6d89440ad2c..ad518868df77 100644
--- a/drivers/infiniband/hw/amso1100/c2_qp.c
+++ b/drivers/infiniband/hw/amso1100/c2_qp.c
@@ -798,8 +798,10 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
798 u8 actual_sge_count; 798 u8 actual_sge_count;
799 u32 msg_size; 799 u32 msg_size;
800 800
801 if (qp->state > IB_QPS_RTS) 801 if (qp->state > IB_QPS_RTS) {
802 return -EINVAL; 802 err = -EINVAL;
803 goto out;
804 }
803 805
804 while (ib_wr) { 806 while (ib_wr) {
805 807
@@ -930,6 +932,7 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
930 ib_wr = ib_wr->next; 932 ib_wr = ib_wr->next;
931 } 933 }
932 934
935out:
933 if (err) 936 if (err)
934 *bad_wr = ib_wr; 937 *bad_wr = ib_wr;
935 return err; 938 return err;
@@ -944,8 +947,10 @@ int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
944 unsigned long lock_flags; 947 unsigned long lock_flags;
945 int err = 0; 948 int err = 0;
946 949
947 if (qp->state > IB_QPS_RTS) 950 if (qp->state > IB_QPS_RTS) {
948 return -EINVAL; 951 err = -EINVAL;
952 goto out;
953 }
949 954
950 /* 955 /*
951 * Try and post each work request 956 * Try and post each work request
@@ -998,6 +1003,7 @@ int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
998 ib_wr = ib_wr->next; 1003 ib_wr = ib_wr->next;
999 } 1004 }
1000 1005
1006out:
1001 if (err) 1007 if (err)
1002 *bad_wr = ib_wr; 1008 *bad_wr = ib_wr;
1003 return err; 1009 return err;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 1cecf98829ac..3eb8cecf81d7 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -365,18 +365,19 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
365 spin_lock_irqsave(&qhp->lock, flag); 365 spin_lock_irqsave(&qhp->lock, flag);
366 if (qhp->attr.state > IWCH_QP_STATE_RTS) { 366 if (qhp->attr.state > IWCH_QP_STATE_RTS) {
367 spin_unlock_irqrestore(&qhp->lock, flag); 367 spin_unlock_irqrestore(&qhp->lock, flag);
368 return -EINVAL; 368 err = -EINVAL;
369 goto out;
369 } 370 }
370 num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, 371 num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
371 qhp->wq.sq_size_log2); 372 qhp->wq.sq_size_log2);
372 if (num_wrs <= 0) { 373 if (num_wrs <= 0) {
373 spin_unlock_irqrestore(&qhp->lock, flag); 374 spin_unlock_irqrestore(&qhp->lock, flag);
374 return -ENOMEM; 375 err = -ENOMEM;
376 goto out;
375 } 377 }
376 while (wr) { 378 while (wr) {
377 if (num_wrs == 0) { 379 if (num_wrs == 0) {
378 err = -ENOMEM; 380 err = -ENOMEM;
379 *bad_wr = wr;
380 break; 381 break;
381 } 382 }
382 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); 383 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
@@ -428,10 +429,8 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
428 wr->opcode); 429 wr->opcode);
429 err = -EINVAL; 430 err = -EINVAL;
430 } 431 }
431 if (err) { 432 if (err)
432 *bad_wr = wr;
433 break; 433 break;
434 }
435 wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; 434 wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
436 sqp->wr_id = wr->wr_id; 435 sqp->wr_id = wr->wr_id;
437 sqp->opcode = wr2opcode(t3_wr_opcode); 436 sqp->opcode = wr2opcode(t3_wr_opcode);
@@ -454,6 +453,10 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
454 } 453 }
455 spin_unlock_irqrestore(&qhp->lock, flag); 454 spin_unlock_irqrestore(&qhp->lock, flag);
456 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); 455 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
456
457out:
458 if (err)
459 *bad_wr = wr;
457 return err; 460 return err;
458} 461}
459 462
@@ -471,18 +474,19 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
471 spin_lock_irqsave(&qhp->lock, flag); 474 spin_lock_irqsave(&qhp->lock, flag);
472 if (qhp->attr.state > IWCH_QP_STATE_RTS) { 475 if (qhp->attr.state > IWCH_QP_STATE_RTS) {
473 spin_unlock_irqrestore(&qhp->lock, flag); 476 spin_unlock_irqrestore(&qhp->lock, flag);
474 return -EINVAL; 477 err = -EINVAL;
478 goto out;
475 } 479 }
476 num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr, 480 num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
477 qhp->wq.rq_size_log2) - 1; 481 qhp->wq.rq_size_log2) - 1;
478 if (!wr) { 482 if (!wr) {
479 spin_unlock_irqrestore(&qhp->lock, flag); 483 spin_unlock_irqrestore(&qhp->lock, flag);
480 return -EINVAL; 484 err = -ENOMEM;
485 goto out;
481 } 486 }
482 while (wr) { 487 while (wr) {
483 if (wr->num_sge > T3_MAX_SGE) { 488 if (wr->num_sge > T3_MAX_SGE) {
484 err = -EINVAL; 489 err = -EINVAL;
485 *bad_wr = wr;
486 break; 490 break;
487 } 491 }
488 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); 492 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
@@ -494,10 +498,10 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
494 err = build_zero_stag_recv(qhp, wqe, wr); 498 err = build_zero_stag_recv(qhp, wqe, wr);
495 else 499 else
496 err = -ENOMEM; 500 err = -ENOMEM;
497 if (err) { 501
498 *bad_wr = wr; 502 if (err)
499 break; 503 break;
500 } 504
501 build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG, 505 build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
502 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 506 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
503 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP); 507 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP);
@@ -511,6 +515,10 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
511 } 515 }
512 spin_unlock_irqrestore(&qhp->lock, flag); 516 spin_unlock_irqrestore(&qhp->lock, flag);
513 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); 517 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
518
519out:
520 if (err)
521 *bad_wr = wr;
514 return err; 522 return err;
515} 523}
516 524
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index c825142a2fb7..0136abd50dd4 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -375,6 +375,7 @@ extern rwlock_t ehca_qp_idr_lock;
375extern rwlock_t ehca_cq_idr_lock; 375extern rwlock_t ehca_cq_idr_lock;
376extern struct idr ehca_qp_idr; 376extern struct idr ehca_qp_idr;
377extern struct idr ehca_cq_idr; 377extern struct idr ehca_cq_idr;
378extern spinlock_t shca_list_lock;
378 379
379extern int ehca_static_rate; 380extern int ehca_static_rate;
380extern int ehca_port_act_time; 381extern int ehca_port_act_time;
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
index 523e733c630e..3b87589b8ea0 100644
--- a/drivers/infiniband/hw/ehca/ehca_eq.c
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -169,12 +169,15 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
169 unsigned long flags; 169 unsigned long flags;
170 u64 h_ret; 170 u64 h_ret;
171 171
172 spin_lock_irqsave(&eq->spinlock, flags);
173 ibmebus_free_irq(eq->ist, (void *)shca); 172 ibmebus_free_irq(eq->ist, (void *)shca);
174 173
175 h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq); 174 spin_lock_irqsave(&shca_list_lock, flags);
175 eq->is_initialized = 0;
176 spin_unlock_irqrestore(&shca_list_lock, flags);
176 177
177 spin_unlock_irqrestore(&eq->spinlock, flags); 178 tasklet_kill(&eq->interrupt_task);
179
180 h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
178 181
179 if (h_ret != H_SUCCESS) { 182 if (h_ret != H_SUCCESS) {
180 ehca_err(&shca->ib_device, "Can't free EQ resources."); 183 ehca_err(&shca->ib_device, "Can't free EQ resources.");
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index fb2d83c5bf01..129a6bebd6e3 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -123,7 +123,7 @@ DEFINE_IDR(ehca_qp_idr);
123DEFINE_IDR(ehca_cq_idr); 123DEFINE_IDR(ehca_cq_idr);
124 124
125static LIST_HEAD(shca_list); /* list of all registered ehcas */ 125static LIST_HEAD(shca_list); /* list of all registered ehcas */
126static DEFINE_SPINLOCK(shca_list_lock); 126DEFINE_SPINLOCK(shca_list_lock);
127 127
128static struct timer_list poll_eqs_timer; 128static struct timer_list poll_eqs_timer;
129 129
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index 8fd88cd828fd..e3ec7fdd67bd 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -400,7 +400,6 @@ static inline void map_ib_wc_status(u32 cqe_status,
400 400
401static inline int post_one_send(struct ehca_qp *my_qp, 401static inline int post_one_send(struct ehca_qp *my_qp,
402 struct ib_send_wr *cur_send_wr, 402 struct ib_send_wr *cur_send_wr,
403 struct ib_send_wr **bad_send_wr,
404 int hidden) 403 int hidden)
405{ 404{
406 struct ehca_wqe *wqe_p; 405 struct ehca_wqe *wqe_p;
@@ -412,8 +411,6 @@ static inline int post_one_send(struct ehca_qp *my_qp,
412 wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); 411 wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
413 if (unlikely(!wqe_p)) { 412 if (unlikely(!wqe_p)) {
414 /* too many posted work requests: queue overflow */ 413 /* too many posted work requests: queue overflow */
415 if (bad_send_wr)
416 *bad_send_wr = cur_send_wr;
417 ehca_err(my_qp->ib_qp.device, "Too many posted WQEs " 414 ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
418 "qp_num=%x", my_qp->ib_qp.qp_num); 415 "qp_num=%x", my_qp->ib_qp.qp_num);
419 return -ENOMEM; 416 return -ENOMEM;
@@ -433,8 +430,6 @@ static inline int post_one_send(struct ehca_qp *my_qp,
433 */ 430 */
434 if (unlikely(ret)) { 431 if (unlikely(ret)) {
435 my_qp->ipz_squeue.current_q_offset = start_offset; 432 my_qp->ipz_squeue.current_q_offset = start_offset;
436 if (bad_send_wr)
437 *bad_send_wr = cur_send_wr;
438 ehca_err(my_qp->ib_qp.device, "Could not write WQE " 433 ehca_err(my_qp->ib_qp.device, "Could not write WQE "
439 "qp_num=%x", my_qp->ib_qp.qp_num); 434 "qp_num=%x", my_qp->ib_qp.qp_num);
440 return -EINVAL; 435 return -EINVAL;
@@ -448,7 +443,6 @@ int ehca_post_send(struct ib_qp *qp,
448 struct ib_send_wr **bad_send_wr) 443 struct ib_send_wr **bad_send_wr)
449{ 444{
450 struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); 445 struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
451 struct ib_send_wr *cur_send_wr;
452 int wqe_cnt = 0; 446 int wqe_cnt = 0;
453 int ret = 0; 447 int ret = 0;
454 unsigned long flags; 448 unsigned long flags;
@@ -457,7 +451,8 @@ int ehca_post_send(struct ib_qp *qp,
457 if (unlikely(my_qp->state < IB_QPS_RTS)) { 451 if (unlikely(my_qp->state < IB_QPS_RTS)) {
458 ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", 452 ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
459 my_qp->state, qp->qp_num); 453 my_qp->state, qp->qp_num);
460 return -EINVAL; 454 ret = -EINVAL;
455 goto out;
461 } 456 }
462 457
463 /* LOCK the QUEUE */ 458 /* LOCK the QUEUE */
@@ -476,24 +471,21 @@ int ehca_post_send(struct ib_qp *qp,
476 struct ib_send_wr circ_wr; 471 struct ib_send_wr circ_wr;
477 memset(&circ_wr, 0, sizeof(circ_wr)); 472 memset(&circ_wr, 0, sizeof(circ_wr));
478 circ_wr.opcode = IB_WR_RDMA_READ; 473 circ_wr.opcode = IB_WR_RDMA_READ;
479 post_one_send(my_qp, &circ_wr, NULL, 1); /* ignore retcode */ 474 post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */
480 wqe_cnt++; 475 wqe_cnt++;
481 ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num); 476 ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num);
482 my_qp->message_count = my_qp->packet_count = 0; 477 my_qp->message_count = my_qp->packet_count = 0;
483 } 478 }
484 479
485 /* loop processes list of send reqs */ 480 /* loop processes list of send reqs */
486 for (cur_send_wr = send_wr; cur_send_wr != NULL; 481 while (send_wr) {
487 cur_send_wr = cur_send_wr->next) { 482 ret = post_one_send(my_qp, send_wr, 0);
488 ret = post_one_send(my_qp, cur_send_wr, bad_send_wr, 0);
489 if (unlikely(ret)) { 483 if (unlikely(ret)) {
490 /* if one or more WQEs were successful, don't fail */
491 if (wqe_cnt)
492 ret = 0;
493 goto post_send_exit0; 484 goto post_send_exit0;
494 } 485 }
495 wqe_cnt++; 486 wqe_cnt++;
496 } /* eof for cur_send_wr */ 487 send_wr = send_wr->next;
488 }
497 489
498post_send_exit0: 490post_send_exit0:
499 iosync(); /* serialize GAL register access */ 491 iosync(); /* serialize GAL register access */
@@ -503,6 +495,10 @@ post_send_exit0:
503 my_qp, qp->qp_num, wqe_cnt, ret); 495 my_qp, qp->qp_num, wqe_cnt, ret);
504 my_qp->message_count += wqe_cnt; 496 my_qp->message_count += wqe_cnt;
505 spin_unlock_irqrestore(&my_qp->spinlock_s, flags); 497 spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
498
499out:
500 if (ret)
501 *bad_send_wr = send_wr;
506 return ret; 502 return ret;
507} 503}
508 504
@@ -511,7 +507,6 @@ static int internal_post_recv(struct ehca_qp *my_qp,
511 struct ib_recv_wr *recv_wr, 507 struct ib_recv_wr *recv_wr,
512 struct ib_recv_wr **bad_recv_wr) 508 struct ib_recv_wr **bad_recv_wr)
513{ 509{
514 struct ib_recv_wr *cur_recv_wr;
515 struct ehca_wqe *wqe_p; 510 struct ehca_wqe *wqe_p;
516 int wqe_cnt = 0; 511 int wqe_cnt = 0;
517 int ret = 0; 512 int ret = 0;
@@ -522,27 +517,23 @@ static int internal_post_recv(struct ehca_qp *my_qp,
522 if (unlikely(!HAS_RQ(my_qp))) { 517 if (unlikely(!HAS_RQ(my_qp))) {
523 ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", 518 ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d",
524 my_qp, my_qp->real_qp_num, my_qp->ext_type); 519 my_qp, my_qp->real_qp_num, my_qp->ext_type);
525 return -ENODEV; 520 ret = -ENODEV;
521 goto out;
526 } 522 }
527 523
528 /* LOCK the QUEUE */ 524 /* LOCK the QUEUE */
529 spin_lock_irqsave(&my_qp->spinlock_r, flags); 525 spin_lock_irqsave(&my_qp->spinlock_r, flags);
530 526
531 /* loop processes list of send reqs */ 527 /* loop processes list of recv reqs */
532 for (cur_recv_wr = recv_wr; cur_recv_wr != NULL; 528 while (recv_wr) {
533 cur_recv_wr = cur_recv_wr->next) {
534 u64 start_offset = my_qp->ipz_rqueue.current_q_offset; 529 u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
535 /* get pointer next to free WQE */ 530 /* get pointer next to free WQE */
536 wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue); 531 wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
537 if (unlikely(!wqe_p)) { 532 if (unlikely(!wqe_p)) {
538 /* too many posted work requests: queue overflow */ 533 /* too many posted work requests: queue overflow */
539 if (bad_recv_wr) 534 ret = -ENOMEM;
540 *bad_recv_wr = cur_recv_wr; 535 ehca_err(dev, "Too many posted WQEs "
541 if (wqe_cnt == 0) { 536 "qp_num=%x", my_qp->real_qp_num);
542 ret = -ENOMEM;
543 ehca_err(dev, "Too many posted WQEs "
544 "qp_num=%x", my_qp->real_qp_num);
545 }
546 goto post_recv_exit0; 537 goto post_recv_exit0;
547 } 538 }
548 /* 539 /*
@@ -552,7 +543,7 @@ static int internal_post_recv(struct ehca_qp *my_qp,
552 rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size; 543 rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
553 544
554 /* write a RECV WQE into the QUEUE */ 545 /* write a RECV WQE into the QUEUE */
555 ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr, 546 ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr,
556 rq_map_idx); 547 rq_map_idx);
557 /* 548 /*
558 * if something failed, 549 * if something failed,
@@ -560,22 +551,20 @@ static int internal_post_recv(struct ehca_qp *my_qp,
560 */ 551 */
561 if (unlikely(ret)) { 552 if (unlikely(ret)) {
562 my_qp->ipz_rqueue.current_q_offset = start_offset; 553 my_qp->ipz_rqueue.current_q_offset = start_offset;
563 *bad_recv_wr = cur_recv_wr; 554 ret = -EINVAL;
564 if (wqe_cnt == 0) { 555 ehca_err(dev, "Could not write WQE "
565 ret = -EINVAL; 556 "qp_num=%x", my_qp->real_qp_num);
566 ehca_err(dev, "Could not write WQE "
567 "qp_num=%x", my_qp->real_qp_num);
568 }
569 goto post_recv_exit0; 557 goto post_recv_exit0;
570 } 558 }
571 559
572 qmap_entry = &my_qp->rq_map.map[rq_map_idx]; 560 qmap_entry = &my_qp->rq_map.map[rq_map_idx];
573 qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id); 561 qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id);
574 qmap_entry->reported = 0; 562 qmap_entry->reported = 0;
575 qmap_entry->cqe_req = 1; 563 qmap_entry->cqe_req = 1;
576 564
577 wqe_cnt++; 565 wqe_cnt++;
578 } /* eof for cur_recv_wr */ 566 recv_wr = recv_wr->next;
567 } /* eof for recv_wr */
579 568
580post_recv_exit0: 569post_recv_exit0:
581 iosync(); /* serialize GAL register access */ 570 iosync(); /* serialize GAL register access */
@@ -584,6 +573,11 @@ post_recv_exit0:
584 ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", 573 ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
585 my_qp, my_qp->real_qp_num, wqe_cnt, ret); 574 my_qp, my_qp->real_qp_num, wqe_cnt, ret);
586 spin_unlock_irqrestore(&my_qp->spinlock_r, flags); 575 spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
576
577out:
578 if (ret)
579 *bad_recv_wr = recv_wr;
580
587 return ret; 581 return ret;
588} 582}
589 583
@@ -597,6 +591,7 @@ int ehca_post_recv(struct ib_qp *qp,
597 if (unlikely(my_qp->state == IB_QPS_RESET)) { 591 if (unlikely(my_qp->state == IB_QPS_RESET)) {
598 ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", 592 ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
599 my_qp->state, qp->qp_num); 593 my_qp->state, qp->qp_num);
594 *bad_recv_wr = recv_wr;
600 return -EINVAL; 595 return -EINVAL;
601 } 596 }
602 597
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 013d1380e77c..d2787fe80304 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -39,6 +39,7 @@
39#include <linux/delay.h> 39#include <linux/delay.h>
40#include <linux/netdevice.h> 40#include <linux/netdevice.h>
41#include <linux/vmalloc.h> 41#include <linux/vmalloc.h>
42#include <linux/bitmap.h>
42 43
43#include "ipath_kernel.h" 44#include "ipath_kernel.h"
44#include "ipath_verbs.h" 45#include "ipath_verbs.h"
@@ -1697,7 +1698,7 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
1697 unsigned len, int avail) 1698 unsigned len, int avail)
1698{ 1699{
1699 unsigned long flags; 1700 unsigned long flags;
1700 unsigned end, cnt = 0, next; 1701 unsigned end, cnt = 0;
1701 1702
1702 /* There are two bits per send buffer (busy and generation) */ 1703 /* There are two bits per send buffer (busy and generation) */
1703 start *= 2; 1704 start *= 2;
@@ -1748,12 +1749,7 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
1748 1749
1749 if (dd->ipath_pioupd_thresh) { 1750 if (dd->ipath_pioupd_thresh) {
1750 end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); 1751 end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
1751 next = find_first_bit(dd->ipath_pioavailkernel, end); 1752 cnt = bitmap_weight(dd->ipath_pioavailkernel, end);
1752 while (next < end) {
1753 cnt++;
1754 next = find_next_bit(dd->ipath_pioavailkernel, end,
1755 next + 1);
1756 }
1757 } 1753 }
1758 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1754 spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1759 1755
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 3cb3f47a10b8..e596537ff353 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -103,7 +103,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
103 props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; 103 props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
104 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) 104 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
105 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; 105 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
106 if (dev->dev->caps.max_gso_sz) 106 if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)
107 props->device_cap_flags |= IB_DEVICE_UD_TSO; 107 props->device_cap_flags |= IB_DEVICE_UD_TSO;
108 if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY) 108 if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
109 props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; 109 props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 256a00c6aeea..989555cee883 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -54,7 +54,8 @@ enum {
54 /* 54 /*
55 * Largest possible UD header: send with GRH and immediate data. 55 * Largest possible UD header: send with GRH and immediate data.
56 */ 56 */
57 MLX4_IB_UD_HEADER_SIZE = 72 57 MLX4_IB_UD_HEADER_SIZE = 72,
58 MLX4_IB_LSO_HEADER_SPARE = 128,
58}; 59};
59 60
60struct mlx4_ib_sqp { 61struct mlx4_ib_sqp {
@@ -67,7 +68,8 @@ struct mlx4_ib_sqp {
67}; 68};
68 69
69enum { 70enum {
70 MLX4_IB_MIN_SQ_STRIDE = 6 71 MLX4_IB_MIN_SQ_STRIDE = 6,
72 MLX4_IB_CACHE_LINE_SIZE = 64,
71}; 73};
72 74
73static const __be32 mlx4_ib_opcode[] = { 75static const __be32 mlx4_ib_opcode[] = {
@@ -261,7 +263,7 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
261 case IB_QPT_UD: 263 case IB_QPT_UD:
262 return sizeof (struct mlx4_wqe_ctrl_seg) + 264 return sizeof (struct mlx4_wqe_ctrl_seg) +
263 sizeof (struct mlx4_wqe_datagram_seg) + 265 sizeof (struct mlx4_wqe_datagram_seg) +
264 ((flags & MLX4_IB_QP_LSO) ? 64 : 0); 266 ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
265 case IB_QPT_UC: 267 case IB_QPT_UC:
266 return sizeof (struct mlx4_wqe_ctrl_seg) + 268 return sizeof (struct mlx4_wqe_ctrl_seg) +
267 sizeof (struct mlx4_wqe_raddr_seg); 269 sizeof (struct mlx4_wqe_raddr_seg);
@@ -897,7 +899,6 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
897 899
898 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | 900 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
899 (to_mlx4_st(ibqp->qp_type) << 16)); 901 (to_mlx4_st(ibqp->qp_type) << 16));
900 context->flags |= cpu_to_be32(1 << 8); /* DE? */
901 902
902 if (!(attr_mask & IB_QP_PATH_MIG_STATE)) 903 if (!(attr_mask & IB_QP_PATH_MIG_STATE))
903 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); 904 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
@@ -1467,16 +1468,12 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
1467 1468
1468static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, 1469static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
1469 struct mlx4_ib_qp *qp, unsigned *lso_seg_len, 1470 struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
1470 __be32 *lso_hdr_sz) 1471 __be32 *lso_hdr_sz, __be32 *blh)
1471{ 1472{
1472 unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); 1473 unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
1473 1474
1474 /* 1475 if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
1475 * This is a temporary limitation and will be removed in 1476 *blh = cpu_to_be32(1 << 6);
1476 * a forthcoming FW release:
1477 */
1478 if (unlikely(halign > 64))
1479 return -EINVAL;
1480 1477
1481 if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) && 1478 if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
1482 wr->num_sge > qp->sq.max_gs - (halign >> 4))) 1479 wr->num_sge > qp->sq.max_gs - (halign >> 4)))
@@ -1522,6 +1519,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1522 __be32 dummy; 1519 __be32 dummy;
1523 __be32 *lso_wqe; 1520 __be32 *lso_wqe;
1524 __be32 uninitialized_var(lso_hdr_sz); 1521 __be32 uninitialized_var(lso_hdr_sz);
1522 __be32 blh;
1525 int i; 1523 int i;
1526 1524
1527 spin_lock_irqsave(&qp->sq.lock, flags); 1525 spin_lock_irqsave(&qp->sq.lock, flags);
@@ -1530,6 +1528,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1530 1528
1531 for (nreq = 0; wr; ++nreq, wr = wr->next) { 1529 for (nreq = 0; wr; ++nreq, wr = wr->next) {
1532 lso_wqe = &dummy; 1530 lso_wqe = &dummy;
1531 blh = 0;
1533 1532
1534 if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { 1533 if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
1535 err = -ENOMEM; 1534 err = -ENOMEM;
@@ -1616,7 +1615,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1616 size += sizeof (struct mlx4_wqe_datagram_seg) / 16; 1615 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
1617 1616
1618 if (wr->opcode == IB_WR_LSO) { 1617 if (wr->opcode == IB_WR_LSO) {
1619 err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz); 1618 err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
1620 if (unlikely(err)) { 1619 if (unlikely(err)) {
1621 *bad_wr = wr; 1620 *bad_wr = wr;
1622 goto out; 1621 goto out;
@@ -1687,7 +1686,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1687 } 1686 }
1688 1687
1689 ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | 1688 ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
1690 (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0); 1689 (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
1691 1690
1692 stamp = ind + qp->sq_spare_wqes; 1691 stamp = ind + qp->sq_spare_wqes;
1693 ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); 1692 ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig
index d449eb6ec78e..846dc97cf260 100644
--- a/drivers/infiniband/hw/nes/Kconfig
+++ b/drivers/infiniband/hw/nes/Kconfig
@@ -4,14 +4,13 @@ config INFINIBAND_NES
4 select LIBCRC32C 4 select LIBCRC32C
5 select INET_LRO 5 select INET_LRO
6 ---help--- 6 ---help---
7 This is a low-level driver for NetEffect RDMA enabled 7 This is the RDMA Network Interface Card (RNIC) driver for
8 Network Interface Cards (RNIC). 8 NetEffect Ethernet Cluster Server Adapters.
9 9
10config INFINIBAND_NES_DEBUG 10config INFINIBAND_NES_DEBUG
11 bool "Verbose debugging output" 11 bool "Verbose debugging output"
12 depends on INFINIBAND_NES 12 depends on INFINIBAND_NES
13 default n 13 default n
14 ---help--- 14 ---help---
15 This option causes the NetEffect RNIC driver to produce debug 15 This option enables debug messages from the NetEffect RNIC
16 messages. Select this if you are developing the driver 16 driver. Select this if you are diagnosing a problem.
17 or trying to diagnose a problem.
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index cbde0cfe27e0..b9d09bafd6c1 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. 3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -521,7 +521,8 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i
521 spin_lock_init(&nesdev->indexed_regs_lock); 521 spin_lock_init(&nesdev->indexed_regs_lock);
522 522
523 /* Remap the PCI registers in adapter BAR0 to kernel VA space */ 523 /* Remap the PCI registers in adapter BAR0 to kernel VA space */
524 mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0), sizeof(mmio_regs)); 524 mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0),
525 pci_resource_len(pcidev, BAR_0));
525 if (mmio_regs == NULL) { 526 if (mmio_regs == NULL) {
526 printk(KERN_ERR PFX "Unable to remap BAR0\n"); 527 printk(KERN_ERR PFX "Unable to remap BAR0\n");
527 ret = -EIO; 528 ret = -EIO;
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index bcc6abc4faff..98840564bb2f 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. 3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 73473db19863..39468c277036 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * 3 *
4 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -52,6 +52,7 @@
52#include <linux/random.h> 52#include <linux/random.h>
53#include <linux/list.h> 53#include <linux/list.h>
54#include <linux/threads.h> 54#include <linux/threads.h>
55#include <linux/highmem.h>
55#include <net/arp.h> 56#include <net/arp.h>
56#include <net/neighbour.h> 57#include <net/neighbour.h>
57#include <net/route.h> 58#include <net/route.h>
@@ -251,6 +252,33 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
251 252
252 mpa_frame = (struct ietf_mpa_frame *)buffer; 253 mpa_frame = (struct ietf_mpa_frame *)buffer;
253 cm_node->mpa_frame_size = ntohs(mpa_frame->priv_data_len); 254 cm_node->mpa_frame_size = ntohs(mpa_frame->priv_data_len);
255 /* make sure mpa private data len is less than 512 bytes */
256 if (cm_node->mpa_frame_size > IETF_MAX_PRIV_DATA_LEN) {
257 nes_debug(NES_DBG_CM, "The received Length of Private"
258 " Data field exceeds 512 octets\n");
259 return -EINVAL;
260 }
261 /*
262 * make sure MPA receiver interoperate with the
263 * received MPA version and MPA key information
264 *
265 */
266 if (mpa_frame->rev != mpa_version) {
267 nes_debug(NES_DBG_CM, "The received mpa version"
268 " can not be interoperated\n");
269 return -EINVAL;
270 }
271 if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
272 if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) {
273 nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n");
274 return -EINVAL;
275 }
276 } else {
277 if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE)) {
278 nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n");
279 return -EINVAL;
280 }
281 }
254 282
255 if (cm_node->mpa_frame_size + sizeof(struct ietf_mpa_frame) != len) { 283 if (cm_node->mpa_frame_size + sizeof(struct ietf_mpa_frame) != len) {
256 nes_debug(NES_DBG_CM, "The received ietf buffer was not right" 284 nes_debug(NES_DBG_CM, "The received ietf buffer was not right"
@@ -486,6 +514,8 @@ static void nes_retrans_expired(struct nes_cm_node *cm_node)
486 send_reset(cm_node, NULL); 514 send_reset(cm_node, NULL);
487 break; 515 break;
488 default: 516 default:
517 add_ref_cm_node(cm_node);
518 send_reset(cm_node, NULL);
489 create_event(cm_node, NES_CM_EVENT_ABORTED); 519 create_event(cm_node, NES_CM_EVENT_ABORTED);
490 } 520 }
491} 521}
@@ -949,6 +979,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
949 reset_entry); 979 reset_entry);
950 { 980 {
951 struct nes_cm_node *loopback = cm_node->loopbackpartner; 981 struct nes_cm_node *loopback = cm_node->loopbackpartner;
982 enum nes_cm_node_state old_state;
952 if (NES_CM_STATE_FIN_WAIT1 <= cm_node->state) { 983 if (NES_CM_STATE_FIN_WAIT1 <= cm_node->state) {
953 rem_ref_cm_node(cm_node->cm_core, cm_node); 984 rem_ref_cm_node(cm_node->cm_core, cm_node);
954 } else { 985 } else {
@@ -960,11 +991,12 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
960 NES_CM_STATE_CLOSED; 991 NES_CM_STATE_CLOSED;
961 WARN_ON(1); 992 WARN_ON(1);
962 } else { 993 } else {
963 cm_node->state = 994 old_state = cm_node->state;
964 NES_CM_STATE_CLOSED; 995 cm_node->state = NES_CM_STATE_LISTENER_DESTROYED;
965 rem_ref_cm_node( 996 if (old_state != NES_CM_STATE_MPAREQ_RCVD)
966 cm_node->cm_core, 997 rem_ref_cm_node(
967 cm_node); 998 cm_node->cm_core,
999 cm_node);
968 } 1000 }
969 } else { 1001 } else {
970 struct nes_cm_event event; 1002 struct nes_cm_event event;
@@ -980,20 +1012,9 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
980 loopback->loc_port; 1012 loopback->loc_port;
981 event.cm_info.cm_id = loopback->cm_id; 1013 event.cm_info.cm_id = loopback->cm_id;
982 cm_event_connect_error(&event); 1014 cm_event_connect_error(&event);
1015 cm_node->state = NES_CM_STATE_LISTENER_DESTROYED;
983 loopback->state = NES_CM_STATE_CLOSED; 1016 loopback->state = NES_CM_STATE_CLOSED;
984 1017
985 event.cm_node = cm_node;
986 event.cm_info.rem_addr =
987 cm_node->rem_addr;
988 event.cm_info.loc_addr =
989 cm_node->loc_addr;
990 event.cm_info.rem_port =
991 cm_node->rem_port;
992 event.cm_info.loc_port =
993 cm_node->loc_port;
994 event.cm_info.cm_id = cm_node->cm_id;
995 cm_event_reset(&event);
996
997 rem_ref_cm_node(cm_node->cm_core, 1018 rem_ref_cm_node(cm_node->cm_core,
998 cm_node); 1019 cm_node);
999 1020
@@ -1077,12 +1098,13 @@ static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
1077/** 1098/**
1078 * nes_addr_resolve_neigh 1099 * nes_addr_resolve_neigh
1079 */ 1100 */
1080static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip) 1101static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpindex)
1081{ 1102{
1082 struct rtable *rt; 1103 struct rtable *rt;
1083 struct flowi fl; 1104 struct flowi fl;
1084 struct neighbour *neigh; 1105 struct neighbour *neigh;
1085 int rc = -1; 1106 int rc = arpindex;
1107 struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter;
1086 1108
1087 memset(&fl, 0, sizeof fl); 1109 memset(&fl, 0, sizeof fl);
1088 fl.nl_u.ip4_u.daddr = htonl(dst_ip); 1110 fl.nl_u.ip4_u.daddr = htonl(dst_ip);
@@ -1098,6 +1120,21 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip)
1098 nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X" 1120 nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X"
1099 " is %pM, Gateway is 0x%08X \n", dst_ip, 1121 " is %pM, Gateway is 0x%08X \n", dst_ip,
1100 neigh->ha, ntohl(rt->rt_gateway)); 1122 neigh->ha, ntohl(rt->rt_gateway));
1123
1124 if (arpindex >= 0) {
1125 if (!memcmp(nesadapter->arp_table[arpindex].mac_addr,
1126 neigh->ha, ETH_ALEN)){
1127 /* Mac address same as in nes_arp_table */
1128 neigh_release(neigh);
1129 ip_rt_put(rt);
1130 return rc;
1131 }
1132
1133 nes_manage_arp_cache(nesvnic->netdev,
1134 nesadapter->arp_table[arpindex].mac_addr,
1135 dst_ip, NES_ARP_DELETE);
1136 }
1137
1101 nes_manage_arp_cache(nesvnic->netdev, neigh->ha, 1138 nes_manage_arp_cache(nesvnic->netdev, neigh->ha,
1102 dst_ip, NES_ARP_ADD); 1139 dst_ip, NES_ARP_ADD);
1103 rc = nes_arp_table(nesvnic->nesdev, dst_ip, NULL, 1140 rc = nes_arp_table(nesvnic->nesdev, dst_ip, NULL,
@@ -1113,7 +1150,6 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip)
1113 return rc; 1150 return rc;
1114} 1151}
1115 1152
1116
1117/** 1153/**
1118 * make_cm_node - create a new instance of a cm node 1154 * make_cm_node - create a new instance of a cm node
1119 */ 1155 */
@@ -1123,6 +1159,7 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
1123{ 1159{
1124 struct nes_cm_node *cm_node; 1160 struct nes_cm_node *cm_node;
1125 struct timespec ts; 1161 struct timespec ts;
1162 int oldarpindex = 0;
1126 int arpindex = 0; 1163 int arpindex = 0;
1127 struct nes_device *nesdev; 1164 struct nes_device *nesdev;
1128 struct nes_adapter *nesadapter; 1165 struct nes_adapter *nesadapter;
@@ -1176,17 +1213,18 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
1176 nesadapter = nesdev->nesadapter; 1213 nesadapter = nesdev->nesadapter;
1177 1214
1178 cm_node->loopbackpartner = NULL; 1215 cm_node->loopbackpartner = NULL;
1216
1179 /* get the mac addr for the remote node */ 1217 /* get the mac addr for the remote node */
1180 if (ipv4_is_loopback(htonl(cm_node->rem_addr))) 1218 if (ipv4_is_loopback(htonl(cm_node->rem_addr)))
1181 arpindex = nes_arp_table(nesdev, ntohl(nesvnic->local_ipaddr), NULL, NES_ARP_RESOLVE); 1219 arpindex = nes_arp_table(nesdev, ntohl(nesvnic->local_ipaddr), NULL, NES_ARP_RESOLVE);
1182 else 1220 else {
1183 arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE); 1221 oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
1222 arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex);
1223
1224 }
1184 if (arpindex < 0) { 1225 if (arpindex < 0) {
1185 arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr); 1226 kfree(cm_node);
1186 if (arpindex < 0) { 1227 return NULL;
1187 kfree(cm_node);
1188 return NULL;
1189 }
1190 } 1228 }
1191 1229
1192 /* copy the mac addr to node context */ 1230 /* copy the mac addr to node context */
@@ -1333,13 +1371,20 @@ static void handle_fin_pkt(struct nes_cm_node *cm_node)
1333 case NES_CM_STATE_SYN_RCVD: 1371 case NES_CM_STATE_SYN_RCVD:
1334 case NES_CM_STATE_SYN_SENT: 1372 case NES_CM_STATE_SYN_SENT:
1335 case NES_CM_STATE_ESTABLISHED: 1373 case NES_CM_STATE_ESTABLISHED:
1336 case NES_CM_STATE_MPAREQ_SENT:
1337 case NES_CM_STATE_MPAREJ_RCVD: 1374 case NES_CM_STATE_MPAREJ_RCVD:
1338 cm_node->tcp_cntxt.rcv_nxt++; 1375 cm_node->tcp_cntxt.rcv_nxt++;
1339 cleanup_retrans_entry(cm_node); 1376 cleanup_retrans_entry(cm_node);
1340 cm_node->state = NES_CM_STATE_LAST_ACK; 1377 cm_node->state = NES_CM_STATE_LAST_ACK;
1341 send_fin(cm_node, NULL); 1378 send_fin(cm_node, NULL);
1342 break; 1379 break;
1380 case NES_CM_STATE_MPAREQ_SENT:
1381 create_event(cm_node, NES_CM_EVENT_ABORTED);
1382 cm_node->tcp_cntxt.rcv_nxt++;
1383 cleanup_retrans_entry(cm_node);
1384 cm_node->state = NES_CM_STATE_CLOSED;
1385 add_ref_cm_node(cm_node);
1386 send_reset(cm_node, NULL);
1387 break;
1343 case NES_CM_STATE_FIN_WAIT1: 1388 case NES_CM_STATE_FIN_WAIT1:
1344 cm_node->tcp_cntxt.rcv_nxt++; 1389 cm_node->tcp_cntxt.rcv_nxt++;
1345 cleanup_retrans_entry(cm_node); 1390 cleanup_retrans_entry(cm_node);
@@ -1590,6 +1635,7 @@ static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1590 break; 1635 break;
1591 case NES_CM_STATE_CLOSED: 1636 case NES_CM_STATE_CLOSED:
1592 cleanup_retrans_entry(cm_node); 1637 cleanup_retrans_entry(cm_node);
1638 add_ref_cm_node(cm_node);
1593 send_reset(cm_node, skb); 1639 send_reset(cm_node, skb);
1594 break; 1640 break;
1595 case NES_CM_STATE_TSA: 1641 case NES_CM_STATE_TSA:
@@ -1641,9 +1687,15 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1641 passive_open_err(cm_node, skb, 1); 1687 passive_open_err(cm_node, skb, 1);
1642 break; 1688 break;
1643 case NES_CM_STATE_LISTENING: 1689 case NES_CM_STATE_LISTENING:
1690 cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
1691 cleanup_retrans_entry(cm_node);
1692 cm_node->state = NES_CM_STATE_CLOSED;
1693 send_reset(cm_node, skb);
1694 break;
1644 case NES_CM_STATE_CLOSED: 1695 case NES_CM_STATE_CLOSED:
1645 cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq); 1696 cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
1646 cleanup_retrans_entry(cm_node); 1697 cleanup_retrans_entry(cm_node);
1698 add_ref_cm_node(cm_node);
1647 send_reset(cm_node, skb); 1699 send_reset(cm_node, skb);
1648 break; 1700 break;
1649 case NES_CM_STATE_ESTABLISHED: 1701 case NES_CM_STATE_ESTABLISHED:
@@ -1712,8 +1764,13 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1712 dev_kfree_skb_any(skb); 1764 dev_kfree_skb_any(skb);
1713 break; 1765 break;
1714 case NES_CM_STATE_LISTENING: 1766 case NES_CM_STATE_LISTENING:
1767 cleanup_retrans_entry(cm_node);
1768 cm_node->state = NES_CM_STATE_CLOSED;
1769 send_reset(cm_node, skb);
1770 break;
1715 case NES_CM_STATE_CLOSED: 1771 case NES_CM_STATE_CLOSED:
1716 cleanup_retrans_entry(cm_node); 1772 cleanup_retrans_entry(cm_node);
1773 add_ref_cm_node(cm_node);
1717 send_reset(cm_node, skb); 1774 send_reset(cm_node, skb);
1718 break; 1775 break;
1719 case NES_CM_STATE_LAST_ACK: 1776 case NES_CM_STATE_LAST_ACK:
@@ -1974,7 +2031,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
1974 if (!cm_node) 2031 if (!cm_node)
1975 return NULL; 2032 return NULL;
1976 mpa_frame = &cm_node->mpa_frame; 2033 mpa_frame = &cm_node->mpa_frame;
1977 strcpy(mpa_frame->key, IEFT_MPA_KEY_REQ); 2034 memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
1978 mpa_frame->flags = IETF_MPA_FLAGS_CRC; 2035 mpa_frame->flags = IETF_MPA_FLAGS_CRC;
1979 mpa_frame->rev = IETF_MPA_VERSION; 2036 mpa_frame->rev = IETF_MPA_VERSION;
1980 mpa_frame->priv_data_len = htons(private_data_len); 2037 mpa_frame->priv_data_len = htons(private_data_len);
@@ -2102,30 +2159,39 @@ static int mini_cm_reject(struct nes_cm_core *cm_core,
2102 cm_node->state = NES_CM_STATE_CLOSED; 2159 cm_node->state = NES_CM_STATE_CLOSED;
2103 rem_ref_cm_node(cm_core, cm_node); 2160 rem_ref_cm_node(cm_core, cm_node);
2104 } else { 2161 } else {
2105 ret = send_mpa_reject(cm_node); 2162 if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) {
2106 if (ret) { 2163 rem_ref_cm_node(cm_core, cm_node);
2107 cm_node->state = NES_CM_STATE_CLOSED; 2164 } else {
2108 err = send_reset(cm_node, NULL); 2165 ret = send_mpa_reject(cm_node);
2109 if (err) 2166 if (ret) {
2110 WARN_ON(1); 2167 cm_node->state = NES_CM_STATE_CLOSED;
2111 } else 2168 err = send_reset(cm_node, NULL);
2112 cm_id->add_ref(cm_id); 2169 if (err)
2170 WARN_ON(1);
2171 } else
2172 cm_id->add_ref(cm_id);
2173 }
2113 } 2174 }
2114 } else { 2175 } else {
2115 cm_node->cm_id = NULL; 2176 cm_node->cm_id = NULL;
2116 event.cm_node = loopback; 2177 if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) {
2117 event.cm_info.rem_addr = loopback->rem_addr; 2178 rem_ref_cm_node(cm_core, cm_node);
2118 event.cm_info.loc_addr = loopback->loc_addr; 2179 rem_ref_cm_node(cm_core, loopback);
2119 event.cm_info.rem_port = loopback->rem_port; 2180 } else {
2120 event.cm_info.loc_port = loopback->loc_port; 2181 event.cm_node = loopback;
2121 event.cm_info.cm_id = loopback->cm_id; 2182 event.cm_info.rem_addr = loopback->rem_addr;
2122 cm_event_mpa_reject(&event); 2183 event.cm_info.loc_addr = loopback->loc_addr;
2123 rem_ref_cm_node(cm_core, cm_node); 2184 event.cm_info.rem_port = loopback->rem_port;
2124 loopback->state = NES_CM_STATE_CLOSING; 2185 event.cm_info.loc_port = loopback->loc_port;
2186 event.cm_info.cm_id = loopback->cm_id;
2187 cm_event_mpa_reject(&event);
2188 rem_ref_cm_node(cm_core, cm_node);
2189 loopback->state = NES_CM_STATE_CLOSING;
2125 2190
2126 cm_id = loopback->cm_id; 2191 cm_id = loopback->cm_id;
2127 rem_ref_cm_node(cm_core, loopback); 2192 rem_ref_cm_node(cm_core, loopback);
2128 cm_id->rem_ref(cm_id); 2193 cm_id->rem_ref(cm_id);
2194 }
2129 } 2195 }
2130 2196
2131 return ret; 2197 return ret;
@@ -2164,11 +2230,15 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod
2164 case NES_CM_STATE_CLOSING: 2230 case NES_CM_STATE_CLOSING:
2165 ret = -1; 2231 ret = -1;
2166 break; 2232 break;
2167 case NES_CM_STATE_MPAREJ_RCVD:
2168 case NES_CM_STATE_LISTENING: 2233 case NES_CM_STATE_LISTENING:
2234 cleanup_retrans_entry(cm_node);
2235 send_reset(cm_node, NULL);
2236 break;
2237 case NES_CM_STATE_MPAREJ_RCVD:
2169 case NES_CM_STATE_UNKNOWN: 2238 case NES_CM_STATE_UNKNOWN:
2170 case NES_CM_STATE_INITED: 2239 case NES_CM_STATE_INITED:
2171 case NES_CM_STATE_CLOSED: 2240 case NES_CM_STATE_CLOSED:
2241 case NES_CM_STATE_LISTENER_DESTROYED:
2172 ret = rem_ref_cm_node(cm_core, cm_node); 2242 ret = rem_ref_cm_node(cm_core, cm_node);
2173 break; 2243 break;
2174 case NES_CM_STATE_TSA: 2244 case NES_CM_STATE_TSA:
@@ -2687,8 +2757,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2687 struct nes_pd *nespd; 2757 struct nes_pd *nespd;
2688 u64 tagged_offset; 2758 u64 tagged_offset;
2689 2759
2690
2691
2692 ibqp = nes_get_qp(cm_id->device, conn_param->qpn); 2760 ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
2693 if (!ibqp) 2761 if (!ibqp)
2694 return -EINVAL; 2762 return -EINVAL;
@@ -2704,6 +2772,13 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2704 "%s\n", cm_node, nesvnic, nesvnic->netdev, 2772 "%s\n", cm_node, nesvnic, nesvnic->netdev,
2705 nesvnic->netdev->name); 2773 nesvnic->netdev->name);
2706 2774
2775 if (NES_CM_STATE_LISTENER_DESTROYED == cm_node->state) {
2776 if (cm_node->loopbackpartner)
2777 rem_ref_cm_node(cm_node->cm_core, cm_node->loopbackpartner);
2778 rem_ref_cm_node(cm_node->cm_core, cm_node);
2779 return -EINVAL;
2780 }
2781
2707 /* associate the node with the QP */ 2782 /* associate the node with the QP */
2708 nesqp->cm_node = (void *)cm_node; 2783 nesqp->cm_node = (void *)cm_node;
2709 cm_node->nesqp = nesqp; 2784 cm_node->nesqp = nesqp;
@@ -2786,6 +2861,10 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2786 cpu_to_le32(conn_param->private_data_len + 2861 cpu_to_le32(conn_param->private_data_len +
2787 sizeof(struct ietf_mpa_frame)); 2862 sizeof(struct ietf_mpa_frame));
2788 wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = ibmr->lkey; 2863 wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = ibmr->lkey;
2864 if (nesqp->sq_kmapped) {
2865 nesqp->sq_kmapped = 0;
2866 kunmap(nesqp->page);
2867 }
2789 2868
2790 nesqp->nesqp_context->ird_ord_sizes |= 2869 nesqp->nesqp_context->ird_ord_sizes |=
2791 cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | 2870 cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
@@ -2929,7 +3008,7 @@ int nes_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
2929 if (cm_node->mpa_frame_size > MAX_CM_BUFFER) 3008 if (cm_node->mpa_frame_size > MAX_CM_BUFFER)
2930 return -EINVAL; 3009 return -EINVAL;
2931 3010
2932 strcpy(&cm_node->mpa_frame.key[0], IEFT_MPA_KEY_REP); 3011 memcpy(&cm_node->mpa_frame.key[0], IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
2933 if (loopback) { 3012 if (loopback) {
2934 memcpy(&loopback->mpa_frame.priv_data, pdata, pdata_len); 3013 memcpy(&loopback->mpa_frame.priv_data, pdata, pdata_len);
2935 loopback->mpa_frame.priv_data_len = pdata_len; 3014 loopback->mpa_frame.priv_data_len = pdata_len;
@@ -2974,6 +3053,9 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2974 if (!nesdev) 3053 if (!nesdev)
2975 return -EINVAL; 3054 return -EINVAL;
2976 3055
3056 if (!(cm_id->local_addr.sin_port) || !(cm_id->remote_addr.sin_port))
3057 return -EINVAL;
3058
2977 nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = " 3059 nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = "
2978 "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id, 3060 "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id,
2979 ntohl(nesvnic->local_ipaddr), 3061 ntohl(nesvnic->local_ipaddr),
@@ -3251,6 +3333,11 @@ static void cm_event_connected(struct nes_cm_event *event)
3251 wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0; 3333 wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0;
3252 wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; 3334 wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
3253 3335
3336 if (nesqp->sq_kmapped) {
3337 nesqp->sq_kmapped = 0;
3338 kunmap(nesqp->page);
3339 }
3340
3254 /* use the reserved spot on the WQ for the extra first WQE */ 3341 /* use the reserved spot on the WQ for the extra first WQE */
3255 nesqp->nesqp_context->ird_ord_sizes &= 3342 nesqp->nesqp_context->ird_ord_sizes &=
3256 cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | 3343 cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
@@ -3346,7 +3433,7 @@ static void cm_event_connect_error(struct nes_cm_event *event)
3346 nesqp->cm_id = NULL; 3433 nesqp->cm_id = NULL;
3347 cm_id->provider_data = NULL; 3434 cm_id->provider_data = NULL;
3348 cm_event.event = IW_CM_EVENT_CONNECT_REPLY; 3435 cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
3349 cm_event.status = IW_CM_EVENT_STATUS_REJECTED; 3436 cm_event.status = -ECONNRESET;
3350 cm_event.provider_data = cm_id->provider_data; 3437 cm_event.provider_data = cm_id->provider_data;
3351 cm_event.local_addr = cm_id->local_addr; 3438 cm_event.local_addr = cm_id->local_addr;
3352 cm_event.remote_addr = cm_id->remote_addr; 3439 cm_event.remote_addr = cm_id->remote_addr;
@@ -3390,6 +3477,8 @@ static void cm_event_reset(struct nes_cm_event *event)
3390 3477
3391 nes_debug(NES_DBG_CM, "%p - cm_id = %p\n", event->cm_node, cm_id); 3478 nes_debug(NES_DBG_CM, "%p - cm_id = %p\n", event->cm_node, cm_id);
3392 nesqp = cm_id->provider_data; 3479 nesqp = cm_id->provider_data;
3480 if (!nesqp)
3481 return;
3393 3482
3394 nesqp->cm_id = NULL; 3483 nesqp->cm_id = NULL;
3395 /* cm_id->provider_data = NULL; */ 3484 /* cm_id->provider_data = NULL; */
@@ -3401,8 +3490,8 @@ static void cm_event_reset(struct nes_cm_event *event)
3401 cm_event.private_data = NULL; 3490 cm_event.private_data = NULL;
3402 cm_event.private_data_len = 0; 3491 cm_event.private_data_len = 0;
3403 3492
3404 ret = cm_id->event_handler(cm_id, &cm_event);
3405 cm_id->add_ref(cm_id); 3493 cm_id->add_ref(cm_id);
3494 ret = cm_id->event_handler(cm_id, &cm_event);
3406 atomic_inc(&cm_closes); 3495 atomic_inc(&cm_closes);
3407 cm_event.event = IW_CM_EVENT_CLOSE; 3496 cm_event.event = IW_CM_EVENT_CLOSE;
3408 cm_event.status = IW_CM_EVENT_STATUS_OK; 3497 cm_event.status = IW_CM_EVENT_STATUS_OK;
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index 90e8e4d8a5ce..d9825fda70a1 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * 3 *
4 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -47,6 +47,8 @@
47#define IEFT_MPA_KEY_REP "MPA ID Rep Frame" 47#define IEFT_MPA_KEY_REP "MPA ID Rep Frame"
48#define IETF_MPA_KEY_SIZE 16 48#define IETF_MPA_KEY_SIZE 16
49#define IETF_MPA_VERSION 1 49#define IETF_MPA_VERSION 1
50#define IETF_MAX_PRIV_DATA_LEN 512
51#define IETF_MPA_FRAME_SIZE 20
50 52
51enum ietf_mpa_flags { 53enum ietf_mpa_flags {
52 IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */ 54 IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */
@@ -169,7 +171,7 @@ struct nes_timer_entry {
169 171
170#define NES_CM_DEF_SEQ2 0x18ed5740 172#define NES_CM_DEF_SEQ2 0x18ed5740
171#define NES_CM_DEF_LOCAL_ID2 0xb807 173#define NES_CM_DEF_LOCAL_ID2 0xb807
172#define MAX_CM_BUFFER 512 174#define MAX_CM_BUFFER (IETF_MPA_FRAME_SIZE + IETF_MAX_PRIV_DATA_LEN)
173 175
174 176
175typedef u32 nes_addr_t; 177typedef u32 nes_addr_t;
@@ -198,6 +200,7 @@ enum nes_cm_node_state {
198 NES_CM_STATE_TIME_WAIT, 200 NES_CM_STATE_TIME_WAIT,
199 NES_CM_STATE_LAST_ACK, 201 NES_CM_STATE_LAST_ACK,
200 NES_CM_STATE_CLOSING, 202 NES_CM_STATE_CLOSING,
203 NES_CM_STATE_LISTENER_DESTROYED,
201 NES_CM_STATE_CLOSED 204 NES_CM_STATE_CLOSED
202}; 205};
203 206
diff --git a/drivers/infiniband/hw/nes/nes_context.h b/drivers/infiniband/hw/nes/nes_context.h
index 0fb8d81d9a62..b4393a16099d 100644
--- a/drivers/infiniband/hw/nes/nes_context.h
+++ b/drivers/infiniband/hw/nes/nes_context.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * 3 *
4 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 3512d6de3019..b1c2cbb88f09 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * 3 *
4 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -424,8 +424,9 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
424 424
425 nesadapter->base_pd = 1; 425 nesadapter->base_pd = 1;
426 426
427 nesadapter->device_cap_flags = 427 nesadapter->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
428 IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW; 428 IB_DEVICE_MEM_WINDOW |
429 IB_DEVICE_MEM_MGT_EXTENSIONS;
429 430
430 nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter) 431 nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter)
431 [(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]); 432 [(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]);
@@ -436,11 +437,12 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
436 nesadapter->qp_table = (struct nes_qp **)(&nesadapter->allocated_arps[BITS_TO_LONGS(arp_table_size)]); 437 nesadapter->qp_table = (struct nes_qp **)(&nesadapter->allocated_arps[BITS_TO_LONGS(arp_table_size)]);
437 438
438 439
439 /* mark the usual suspect QPs and CQs as in use */ 440 /* mark the usual suspect QPs, MR and CQs as in use */
440 for (u32temp = 0; u32temp < NES_FIRST_QPN; u32temp++) { 441 for (u32temp = 0; u32temp < NES_FIRST_QPN; u32temp++) {
441 set_bit(u32temp, nesadapter->allocated_qps); 442 set_bit(u32temp, nesadapter->allocated_qps);
442 set_bit(u32temp, nesadapter->allocated_cqs); 443 set_bit(u32temp, nesadapter->allocated_cqs);
443 } 444 }
445 set_bit(0, nesadapter->allocated_mrs);
444 446
445 for (u32temp = 0; u32temp < 20; u32temp++) 447 for (u32temp = 0; u32temp < 20; u32temp++)
446 set_bit(u32temp, nesadapter->allocated_pds); 448 set_bit(u32temp, nesadapter->allocated_pds);
@@ -481,7 +483,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
481 nesadapter->max_irrq_wr = (u32temp >> 16) & 3; 483 nesadapter->max_irrq_wr = (u32temp >> 16) & 3;
482 484
483 nesadapter->max_sge = 4; 485 nesadapter->max_sge = 4;
484 nesadapter->max_cqe = 32767; 486 nesadapter->max_cqe = 32766;
485 487
486 if (nes_read_eeprom_values(nesdev, nesadapter)) { 488 if (nes_read_eeprom_values(nesdev, nesadapter)) {
487 printk(KERN_ERR PFX "Unable to read EEPROM data.\n"); 489 printk(KERN_ERR PFX "Unable to read EEPROM data.\n");
@@ -1355,6 +1357,8 @@ int nes_init_phy(struct nes_device *nesdev)
1355 } 1357 }
1356 if ((phy_type == NES_PHY_TYPE_ARGUS) || 1358 if ((phy_type == NES_PHY_TYPE_ARGUS) ||
1357 (phy_type == NES_PHY_TYPE_SFP_D)) { 1359 (phy_type == NES_PHY_TYPE_SFP_D)) {
1360 u32 first_time = 1;
1361
1358 /* Check firmware heartbeat */ 1362 /* Check firmware heartbeat */
1359 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); 1363 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
1360 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); 1364 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
@@ -1362,8 +1366,13 @@ int nes_init_phy(struct nes_device *nesdev)
1362 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); 1366 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
1363 temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); 1367 temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
1364 1368
1365 if (temp_phy_data != temp_phy_data2) 1369 if (temp_phy_data != temp_phy_data2) {
1366 return 0; 1370 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
1371 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
1372 if ((temp_phy_data & 0xff) > 0x20)
1373 return 0;
1374 printk(PFX "Reinitializing PHY\n");
1375 }
1367 1376
1368 /* no heartbeat, configure the PHY */ 1377 /* no heartbeat, configure the PHY */
1369 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000); 1378 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000);
@@ -1399,7 +1408,7 @@ int nes_init_phy(struct nes_device *nesdev)
1399 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); 1408 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
1400 do { 1409 do {
1401 if (counter++ > 150) { 1410 if (counter++ > 150) {
1402 nes_debug(NES_DBG_PHY, "No PHY heartbeat\n"); 1411 printk(PFX "No PHY heartbeat\n");
1403 break; 1412 break;
1404 } 1413 }
1405 mdelay(1); 1414 mdelay(1);
@@ -1413,11 +1422,20 @@ int nes_init_phy(struct nes_device *nesdev)
1413 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd); 1422 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
1414 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); 1423 temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
1415 if (counter++ > 300) { 1424 if (counter++ > 300) {
1416 nes_debug(NES_DBG_PHY, "PHY did not track\n"); 1425 if (((temp_phy_data & 0xff) == 0x0) && first_time) {
1417 break; 1426 first_time = 0;
1427 counter = 0;
1428 /* reset AMCC PHY and try again */
1429 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x00c0);
1430 nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x0040);
1431 continue;
1432 } else {
1433 printk(PFX "PHY did not track\n");
1434 break;
1435 }
1418 } 1436 }
1419 mdelay(10); 1437 mdelay(10);
1420 } while (((temp_phy_data & 0xff) != 0x50) && ((temp_phy_data & 0xff) != 0x70)); 1438 } while ((temp_phy_data & 0xff) < 0x30);
1421 1439
1422 /* setup signal integrity */ 1440 /* setup signal integrity */
1423 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000); 1441 nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000);
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
index f28a41ba9fa1..084be0ee689b 100644
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -1,5 +1,5 @@
1/* 1/*
2* Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. 2* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3* 3*
4* This software is available to you under a choice of one of two 4* This software is available to you under a choice of one of two
5* licenses. You may choose to be licensed under the terms of the GNU 5* licenses. You may choose to be licensed under the terms of the GNU
@@ -546,11 +546,23 @@ enum nes_iwarp_sq_fmr_wqe_word_idx {
546 NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX = 14, 546 NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX = 14,
547}; 547};
548 548
549enum nes_iwarp_sq_fmr_opcodes {
550 NES_IWARP_SQ_FMR_WQE_ZERO_BASED = (1<<6),
551 NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K = (0<<7),
552 NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M = (1<<7),
553 NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ = (1<<16),
554 NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE = (1<<17),
555 NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ = (1<<18),
556 NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE = (1<<19),
557 NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND = (1<<20),
558};
559
560#define NES_IWARP_SQ_FMR_WQE_MR_LENGTH_HIGH_MASK 0xFF;
561
549enum nes_iwarp_sq_locinv_wqe_word_idx { 562enum nes_iwarp_sq_locinv_wqe_word_idx {
550 NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX = 6, 563 NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX = 6,
551}; 564};
552 565
553
554enum nes_iwarp_rq_wqe_word_idx { 566enum nes_iwarp_rq_wqe_word_idx {
555 NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX = 1, 567 NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX = 1,
556 NES_IWARP_RQ_WQE_COMP_CTX_LOW_IDX = 2, 568 NES_IWARP_RQ_WQE_COMP_CTX_LOW_IDX = 2,
@@ -1153,6 +1165,19 @@ struct nes_pbl {
1153 /* TODO: need to add list for two level tables */ 1165 /* TODO: need to add list for two level tables */
1154}; 1166};
1155 1167
1168#define NES_4K_PBL_CHUNK_SIZE 4096
1169
1170struct nes_fast_mr_wqe_pbl {
1171 u64 *kva;
1172 dma_addr_t paddr;
1173};
1174
1175struct nes_ib_fast_reg_page_list {
1176 struct ib_fast_reg_page_list ibfrpl;
1177 struct nes_fast_mr_wqe_pbl nes_wqe_pbl;
1178 u64 pbl;
1179};
1180
1156struct nes_listener { 1181struct nes_listener {
1157 struct work_struct work; 1182 struct work_struct work;
1158 struct workqueue_struct *wq; 1183 struct workqueue_struct *wq;
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index de18fdfdadf2..ab1102780186 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * 3 *
4 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h
index cc90c14b49eb..71e133ab209b 100644
--- a/drivers/infiniband/hw/nes/nes_user.h
+++ b/drivers/infiniband/hw/nes/nes_user.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * Copyright (c) 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Topspin Communications. All rights reserved.
4 * Copyright (c) 2005 Cisco Systems. All rights reserved. 4 * Copyright (c) 2005 Cisco Systems. All rights reserved.
5 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. 5 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
@@ -86,6 +86,7 @@ enum iwnes_memreg_type {
86 IWNES_MEMREG_TYPE_CQ = 0x0002, 86 IWNES_MEMREG_TYPE_CQ = 0x0002,
87 IWNES_MEMREG_TYPE_MW = 0x0003, 87 IWNES_MEMREG_TYPE_MW = 0x0003,
88 IWNES_MEMREG_TYPE_FMR = 0x0004, 88 IWNES_MEMREG_TYPE_FMR = 0x0004,
89 IWNES_MEMREG_TYPE_FMEM = 0x0005,
89}; 90};
90 91
91struct nes_mem_reg_req { 92struct nes_mem_reg_req {
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index 9687c397ce1a..729d525c5b70 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * 3 *
4 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index a680c42d6e8c..64d3136e3747 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * 3 *
4 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -275,342 +275,236 @@ static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw,
275} 275}
276 276
277 277
278/** 278/*
279 * nes_alloc_fmr 279 * nes_alloc_fast_mr
280 */ 280 */
281static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, 281static int alloc_fast_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
282 int ibmr_access_flags, 282 u32 stag, u32 page_count)
283 struct ib_fmr_attr *ibfmr_attr)
284{ 283{
285 unsigned long flags;
286 struct nes_pd *nespd = to_nespd(ibpd);
287 struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
288 struct nes_device *nesdev = nesvnic->nesdev;
289 struct nes_adapter *nesadapter = nesdev->nesadapter;
290 struct nes_fmr *nesfmr;
291 struct nes_cqp_request *cqp_request;
292 struct nes_hw_cqp_wqe *cqp_wqe; 284 struct nes_hw_cqp_wqe *cqp_wqe;
285 struct nes_cqp_request *cqp_request;
286 unsigned long flags;
293 int ret; 287 int ret;
294 u32 stag; 288 struct nes_adapter *nesadapter = nesdev->nesadapter;
295 u32 stag_index = 0;
296 u32 next_stag_index = 0;
297 u32 driver_key = 0;
298 u32 opcode = 0; 289 u32 opcode = 0;
299 u8 stag_key = 0; 290 u16 major_code;
300 int i=0; 291 u64 region_length = page_count * PAGE_SIZE;
301 struct nes_vpbl vpbl;
302
303 get_random_bytes(&next_stag_index, sizeof(next_stag_index));
304 stag_key = (u8)next_stag_index;
305
306 driver_key = 0;
307
308 next_stag_index >>= 8;
309 next_stag_index %= nesadapter->max_mr;
310
311 ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
312 nesadapter->max_mr, &stag_index, &next_stag_index);
313 if (ret) {
314 goto failed_resource_alloc;
315 }
316
317 nesfmr = kzalloc(sizeof(*nesfmr), GFP_KERNEL);
318 if (!nesfmr) {
319 ret = -ENOMEM;
320 goto failed_fmr_alloc;
321 }
322
323 nesfmr->nesmr.mode = IWNES_MEMREG_TYPE_FMR;
324 if (ibfmr_attr->max_pages == 1) {
325 /* use zero length PBL */
326 nesfmr->nesmr.pbl_4k = 0;
327 nesfmr->nesmr.pbls_used = 0;
328 } else if (ibfmr_attr->max_pages <= 32) {
329 /* use PBL 256 */
330 nesfmr->nesmr.pbl_4k = 0;
331 nesfmr->nesmr.pbls_used = 1;
332 } else if (ibfmr_attr->max_pages <= 512) {
333 /* use 4K PBLs */
334 nesfmr->nesmr.pbl_4k = 1;
335 nesfmr->nesmr.pbls_used = 1;
336 } else {
337 /* use two level 4K PBLs */
338 /* add support for two level 256B PBLs */
339 nesfmr->nesmr.pbl_4k = 1;
340 nesfmr->nesmr.pbls_used = 1 + (ibfmr_attr->max_pages >> 9) +
341 ((ibfmr_attr->max_pages & 511) ? 1 : 0);
342 }
343 /* Register the region with the adapter */
344 spin_lock_irqsave(&nesadapter->pbl_lock, flags);
345
346 /* track PBL resources */
347 if (nesfmr->nesmr.pbls_used != 0) {
348 if (nesfmr->nesmr.pbl_4k) {
349 if (nesfmr->nesmr.pbls_used > nesadapter->free_4kpbl) {
350 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
351 ret = -ENOMEM;
352 goto failed_vpbl_avail;
353 } else {
354 nesadapter->free_4kpbl -= nesfmr->nesmr.pbls_used;
355 }
356 } else {
357 if (nesfmr->nesmr.pbls_used > nesadapter->free_256pbl) {
358 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
359 ret = -ENOMEM;
360 goto failed_vpbl_avail;
361 } else {
362 nesadapter->free_256pbl -= nesfmr->nesmr.pbls_used;
363 }
364 }
365 }
366
367 /* one level pbl */
368 if (nesfmr->nesmr.pbls_used == 0) {
369 nesfmr->root_vpbl.pbl_vbase = NULL;
370 nes_debug(NES_DBG_MR, "zero level pbl \n");
371 } else if (nesfmr->nesmr.pbls_used == 1) {
372 /* can change it to kmalloc & dma_map_single */
373 nesfmr->root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
374 &nesfmr->root_vpbl.pbl_pbase);
375 if (!nesfmr->root_vpbl.pbl_vbase) {
376 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
377 ret = -ENOMEM;
378 goto failed_vpbl_alloc;
379 }
380 nesfmr->leaf_pbl_cnt = 0;
381 nes_debug(NES_DBG_MR, "one level pbl, root_vpbl.pbl_vbase=%p \n",
382 nesfmr->root_vpbl.pbl_vbase);
383 }
384 /* two level pbl */
385 else {
386 nesfmr->root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192,
387 &nesfmr->root_vpbl.pbl_pbase);
388 if (!nesfmr->root_vpbl.pbl_vbase) {
389 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
390 ret = -ENOMEM;
391 goto failed_vpbl_alloc;
392 }
393
394 nesfmr->leaf_pbl_cnt = nesfmr->nesmr.pbls_used-1;
395 nesfmr->root_vpbl.leaf_vpbl = kzalloc(sizeof(*nesfmr->root_vpbl.leaf_vpbl)*1024, GFP_ATOMIC);
396 if (!nesfmr->root_vpbl.leaf_vpbl) {
397 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
398 ret = -ENOMEM;
399 goto failed_leaf_vpbl_alloc;
400 }
401
402 nes_debug(NES_DBG_MR, "two level pbl, root_vpbl.pbl_vbase=%p"
403 " leaf_pbl_cnt=%d root_vpbl.leaf_vpbl=%p\n",
404 nesfmr->root_vpbl.pbl_vbase, nesfmr->leaf_pbl_cnt, nesfmr->root_vpbl.leaf_vpbl);
405
406 for (i=0; i<nesfmr->leaf_pbl_cnt; i++)
407 nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase = NULL;
408
409 for (i=0; i<nesfmr->leaf_pbl_cnt; i++) {
410 vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
411 &vpbl.pbl_pbase);
412
413 if (!vpbl.pbl_vbase) {
414 ret = -ENOMEM;
415 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
416 goto failed_leaf_vpbl_pages_alloc;
417 }
418
419 nesfmr->root_vpbl.pbl_vbase[i].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase);
420 nesfmr->root_vpbl.pbl_vbase[i].pa_high = cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32)));
421 nesfmr->root_vpbl.leaf_vpbl[i] = vpbl;
422
423 nes_debug(NES_DBG_MR, "pbase_low=0x%x, pbase_high=0x%x, vpbl=%p\n",
424 nesfmr->root_vpbl.pbl_vbase[i].pa_low,
425 nesfmr->root_vpbl.pbl_vbase[i].pa_high,
426 &nesfmr->root_vpbl.leaf_vpbl[i]);
427 }
428 }
429 nesfmr->ib_qp = NULL;
430 nesfmr->access_rights =0;
431 292
432 stag = stag_index << 8;
433 stag |= driver_key;
434 stag += (u32)stag_key;
435 293
436 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
437 cqp_request = nes_get_cqp_request(nesdev); 294 cqp_request = nes_get_cqp_request(nesdev);
438 if (cqp_request == NULL) { 295 if (cqp_request == NULL) {
439 nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n"); 296 nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n");
440 ret = -ENOMEM; 297 return -ENOMEM;
441 goto failed_leaf_vpbl_pages_alloc;
442 } 298 }
299 nes_debug(NES_DBG_MR, "alloc_fast_reg_mr: page_count = %d, "
300 "region_length = %llu\n",
301 page_count, region_length);
443 cqp_request->waiting = 1; 302 cqp_request->waiting = 1;
444 cqp_wqe = &cqp_request->cqp_wqe; 303 cqp_wqe = &cqp_request->cqp_wqe;
445 304
446 nes_debug(NES_DBG_MR, "Registering STag 0x%08X, index = 0x%08X\n", 305 spin_lock_irqsave(&nesadapter->pbl_lock, flags);
447 stag, stag_index); 306 if (nesadapter->free_4kpbl > 0) {
448 307 nesadapter->free_4kpbl--;
449 opcode = NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_VA_TO | NES_CQP_STAG_MR; 308 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
450 309 } else {
451 if (nesfmr->nesmr.pbl_4k == 1) 310 /* No 4kpbl's available: */
452 opcode |= NES_CQP_STAG_PBL_BLK_SIZE; 311 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
453 312 nes_debug(NES_DBG_MR, "Out of Pbls\n");
454 if (ibmr_access_flags & IB_ACCESS_REMOTE_WRITE) { 313 nes_free_cqp_request(nesdev, cqp_request);
455 opcode |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE | 314 return -ENOMEM;
456 NES_CQP_STAG_RIGHTS_LOCAL_WRITE | NES_CQP_STAG_REM_ACC_EN;
457 nesfmr->access_rights |=
458 NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_RIGHTS_LOCAL_WRITE |
459 NES_CQP_STAG_REM_ACC_EN;
460 } 315 }
461 316
462 if (ibmr_access_flags & IB_ACCESS_REMOTE_READ) { 317 opcode = NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_MR |
463 opcode |= NES_CQP_STAG_RIGHTS_REMOTE_READ | 318 NES_CQP_STAG_PBL_BLK_SIZE | NES_CQP_STAG_VA_TO |
464 NES_CQP_STAG_RIGHTS_LOCAL_READ | NES_CQP_STAG_REM_ACC_EN; 319 NES_CQP_STAG_REM_ACC_EN;
465 nesfmr->access_rights |= 320 /*
466 NES_CQP_STAG_RIGHTS_REMOTE_READ | NES_CQP_STAG_RIGHTS_LOCAL_READ | 321 * The current OFED API does not support the zero based TO option.
467 NES_CQP_STAG_REM_ACC_EN; 322 * If added then need to changed the NES_CQP_STAG_VA* option. Also,
468 } 323 * the API does not support that ability to have the MR set for local
324 * access only when created and not allow the SQ op to override. Given
325 * this the remote enable must be set here.
326 */
469 327
470 nes_fill_init_cqp_wqe(cqp_wqe, nesdev); 328 nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
471 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode); 329 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
472 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX, (nespd->pd_id & 0x00007fff)); 330 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX, 1);
473 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
474 331
475 cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] = 332 cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] =
476 cpu_to_le32((nesfmr->nesmr.pbls_used>1) ? 333 cpu_to_le32((u32)(region_length >> 8) & 0xff000000);
477 (nesfmr->nesmr.pbls_used-1) : nesfmr->nesmr.pbls_used); 334 cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] |=
335 cpu_to_le32(nespd->pd_id & 0x00007fff);
336
337 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
338 set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_VA_LOW_IDX, 0);
339 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_LOW_IDX, 0);
340 set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, 0);
341 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX, (page_count * 8));
342 cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE);
343 barrier();
478 344
479 atomic_set(&cqp_request->refcount, 2); 345 atomic_set(&cqp_request->refcount, 2);
480 nes_post_cqp_request(nesdev, cqp_request); 346 nes_post_cqp_request(nesdev, cqp_request);
481 347
482 /* Wait for CQP */ 348 /* Wait for CQP */
483 ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), 349 ret = wait_event_timeout(cqp_request->waitq,
484 NES_EVENT_TIMEOUT); 350 (0 != cqp_request->request_done),
485 nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u," 351 NES_EVENT_TIMEOUT);
486 " CQP Major:Minor codes = 0x%04X:0x%04X.\n", 352
487 stag, ret, cqp_request->major_code, cqp_request->minor_code); 353 nes_debug(NES_DBG_MR, "Allocate STag 0x%08X completed, "
488 354 "wait_event_timeout ret = %u, CQP Major:Minor codes = "
489 if ((!ret) || (cqp_request->major_code)) { 355 "0x%04X:0x%04X.\n", stag, ret, cqp_request->major_code,
490 nes_put_cqp_request(nesdev, cqp_request); 356 cqp_request->minor_code);
491 ret = (!ret) ? -ETIME : -EIO; 357 major_code = cqp_request->major_code;
492 goto failed_leaf_vpbl_pages_alloc;
493 }
494 nes_put_cqp_request(nesdev, cqp_request); 358 nes_put_cqp_request(nesdev, cqp_request);
495 nesfmr->nesmr.ibfmr.lkey = stag;
496 nesfmr->nesmr.ibfmr.rkey = stag;
497 nesfmr->attr = *ibfmr_attr;
498
499 return &nesfmr->nesmr.ibfmr;
500
501 failed_leaf_vpbl_pages_alloc:
502 /* unroll all allocated pages */
503 for (i=0; i<nesfmr->leaf_pbl_cnt; i++) {
504 if (nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase) {
505 pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase,
506 nesfmr->root_vpbl.leaf_vpbl[i].pbl_pbase);
507 }
508 }
509 if (nesfmr->root_vpbl.leaf_vpbl)
510 kfree(nesfmr->root_vpbl.leaf_vpbl);
511 359
512 failed_leaf_vpbl_alloc: 360 if (!ret || major_code) {
513 if (nesfmr->leaf_pbl_cnt == 0) {
514 if (nesfmr->root_vpbl.pbl_vbase)
515 pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.pbl_vbase,
516 nesfmr->root_vpbl.pbl_pbase);
517 } else
518 pci_free_consistent(nesdev->pcidev, 8192, nesfmr->root_vpbl.pbl_vbase,
519 nesfmr->root_vpbl.pbl_pbase);
520
521 failed_vpbl_alloc:
522 if (nesfmr->nesmr.pbls_used != 0) {
523 spin_lock_irqsave(&nesadapter->pbl_lock, flags); 361 spin_lock_irqsave(&nesadapter->pbl_lock, flags);
524 if (nesfmr->nesmr.pbl_4k) 362 nesadapter->free_4kpbl++;
525 nesadapter->free_4kpbl += nesfmr->nesmr.pbls_used;
526 else
527 nesadapter->free_256pbl += nesfmr->nesmr.pbls_used;
528 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); 363 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
529 } 364 }
530 365
531failed_vpbl_avail: 366 if (!ret)
532 kfree(nesfmr); 367 return -ETIME;
533 368 else if (major_code)
534 failed_fmr_alloc: 369 return -EIO;
535 nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); 370 return 0;
536
537 failed_resource_alloc:
538 return ERR_PTR(ret);
539} 371}
540 372
541 373/*
542/** 374 * nes_alloc_fast_reg_mr
543 * nes_dealloc_fmr
544 */ 375 */
545static int nes_dealloc_fmr(struct ib_fmr *ibfmr) 376struct ib_mr *nes_alloc_fast_reg_mr(struct ib_pd *ibpd, int max_page_list_len)
546{ 377{
547 unsigned long flags; 378 struct nes_pd *nespd = to_nespd(ibpd);
548 struct nes_mr *nesmr = to_nesmr_from_ibfmr(ibfmr); 379 struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
549 struct nes_fmr *nesfmr = to_nesfmr(nesmr);
550 struct nes_vnic *nesvnic = to_nesvnic(ibfmr->device);
551 struct nes_device *nesdev = nesvnic->nesdev; 380 struct nes_device *nesdev = nesvnic->nesdev;
552 struct nes_adapter *nesadapter = nesdev->nesadapter; 381 struct nes_adapter *nesadapter = nesdev->nesadapter;
553 int i = 0;
554 int rc;
555 382
556 /* free the resources */ 383 u32 next_stag_index;
557 if (nesfmr->leaf_pbl_cnt == 0) { 384 u8 stag_key = 0;
558 /* single PBL case */ 385 u32 driver_key = 0;
559 if (nesfmr->root_vpbl.pbl_vbase) 386 int err = 0;
560 pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.pbl_vbase, 387 u32 stag_index = 0;
561 nesfmr->root_vpbl.pbl_pbase); 388 struct nes_mr *nesmr;
562 } else { 389 u32 stag;
563 for (i = 0; i < nesfmr->leaf_pbl_cnt; i++) { 390 int ret;
564 pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase, 391 struct ib_mr *ibmr;
565 nesfmr->root_vpbl.leaf_vpbl[i].pbl_pbase); 392/*
566 } 393 * Note: Set to always use a fixed length single page entry PBL. This is to allow
567 kfree(nesfmr->root_vpbl.leaf_vpbl); 394 * for the fast_reg_mr operation to always know the size of the PBL.
568 pci_free_consistent(nesdev->pcidev, 8192, nesfmr->root_vpbl.pbl_vbase, 395 */
569 nesfmr->root_vpbl.pbl_pbase); 396 if (max_page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64)))
570 } 397 return ERR_PTR(-E2BIG);
571 nesmr->ibmw.device = ibfmr->device;
572 nesmr->ibmw.pd = ibfmr->pd;
573 nesmr->ibmw.rkey = ibfmr->rkey;
574 nesmr->ibmw.uobject = NULL;
575 398
576 rc = nes_dealloc_mw(&nesmr->ibmw); 399 get_random_bytes(&next_stag_index, sizeof(next_stag_index));
400 stag_key = (u8)next_stag_index;
401 next_stag_index >>= 8;
402 next_stag_index %= nesadapter->max_mr;
577 403
578 if ((rc == 0) && (nesfmr->nesmr.pbls_used != 0)) { 404 err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
579 spin_lock_irqsave(&nesadapter->pbl_lock, flags); 405 nesadapter->max_mr, &stag_index,
580 if (nesfmr->nesmr.pbl_4k) { 406 &next_stag_index);
581 nesadapter->free_4kpbl += nesfmr->nesmr.pbls_used; 407 if (err)
582 WARN_ON(nesadapter->free_4kpbl > nesadapter->max_4kpbl); 408 return ERR_PTR(err);
583 } else { 409
584 nesadapter->free_256pbl += nesfmr->nesmr.pbls_used; 410 nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
585 WARN_ON(nesadapter->free_256pbl > nesadapter->max_256pbl); 411 if (!nesmr) {
586 } 412 nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
587 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); 413 return ERR_PTR(-ENOMEM);
588 } 414 }
589 415
590 return rc; 416 stag = stag_index << 8;
591} 417 stag |= driver_key;
418 stag += (u32)stag_key;
592 419
420 nes_debug(NES_DBG_MR, "Allocating STag 0x%08X index = 0x%08X\n",
421 stag, stag_index);
593 422
594/** 423 ret = alloc_fast_reg_mr(nesdev, nespd, stag, max_page_list_len);
595 * nes_map_phys_fmr 424
425 if (ret == 0) {
426 nesmr->ibmr.rkey = stag;
427 nesmr->ibmr.lkey = stag;
428 nesmr->mode = IWNES_MEMREG_TYPE_FMEM;
429 ibmr = &nesmr->ibmr;
430 } else {
431 kfree(nesmr);
432 nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
433 ibmr = ERR_PTR(-ENOMEM);
434 }
435 return ibmr;
436}
437
438/*
439 * nes_alloc_fast_reg_page_list
596 */ 440 */
597static int nes_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, 441static struct ib_fast_reg_page_list *nes_alloc_fast_reg_page_list(
598 int list_len, u64 iova) 442 struct ib_device *ibdev,
443 int page_list_len)
599{ 444{
600 return 0; 445 struct nes_vnic *nesvnic = to_nesvnic(ibdev);
601} 446 struct nes_device *nesdev = nesvnic->nesdev;
447 struct ib_fast_reg_page_list *pifrpl;
448 struct nes_ib_fast_reg_page_list *pnesfrpl;
602 449
450 if (page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64)))
451 return ERR_PTR(-E2BIG);
452 /*
453 * Allocate the ib_fast_reg_page_list structure, the
454 * nes_fast_bpl structure, and the PLB table.
455 */
456 pnesfrpl = kmalloc(sizeof(struct nes_ib_fast_reg_page_list) +
457 page_list_len * sizeof(u64), GFP_KERNEL);
458
459 if (!pnesfrpl)
460 return ERR_PTR(-ENOMEM);
603 461
604/** 462 pifrpl = &pnesfrpl->ibfrpl;
605 * nes_unmap_frm 463 pifrpl->page_list = &pnesfrpl->pbl;
464 pifrpl->max_page_list_len = page_list_len;
465 /*
466 * Allocate the WQE PBL
467 */
468 pnesfrpl->nes_wqe_pbl.kva = pci_alloc_consistent(nesdev->pcidev,
469 page_list_len * sizeof(u64),
470 &pnesfrpl->nes_wqe_pbl.paddr);
471
472 if (!pnesfrpl->nes_wqe_pbl.kva) {
473 kfree(pnesfrpl);
474 return ERR_PTR(-ENOMEM);
475 }
476 nes_debug(NES_DBG_MR, "nes_alloc_fast_reg_pbl: nes_frpl = %p, "
477 "ibfrpl = %p, ibfrpl.page_list = %p, pbl.kva = %p, "
478 "pbl.paddr= %p\n", pnesfrpl, &pnesfrpl->ibfrpl,
479 pnesfrpl->ibfrpl.page_list, pnesfrpl->nes_wqe_pbl.kva,
480 (void *)pnesfrpl->nes_wqe_pbl.paddr);
481
482 return pifrpl;
483}
484
485/*
486 * nes_free_fast_reg_page_list
606 */ 487 */
607static int nes_unmap_fmr(struct list_head *ibfmr_list) 488static void nes_free_fast_reg_page_list(struct ib_fast_reg_page_list *pifrpl)
608{ 489{
609 return 0; 490 struct nes_vnic *nesvnic = to_nesvnic(pifrpl->device);
491 struct nes_device *nesdev = nesvnic->nesdev;
492 struct nes_ib_fast_reg_page_list *pnesfrpl;
493
494 pnesfrpl = container_of(pifrpl, struct nes_ib_fast_reg_page_list, ibfrpl);
495 /*
496 * Free the WQE PBL.
497 */
498 pci_free_consistent(nesdev->pcidev,
499 pifrpl->max_page_list_len * sizeof(u64),
500 pnesfrpl->nes_wqe_pbl.kva,
501 pnesfrpl->nes_wqe_pbl.paddr);
502 /*
503 * Free the PBL structure
504 */
505 kfree(pnesfrpl);
610} 506}
611 507
612
613
614/** 508/**
615 * nes_query_device 509 * nes_query_device
616 */ 510 */
@@ -633,23 +527,23 @@ static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *prop
633 props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2; 527 props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2;
634 props->max_sge = nesdev->nesadapter->max_sge; 528 props->max_sge = nesdev->nesadapter->max_sge;
635 props->max_cq = nesibdev->max_cq; 529 props->max_cq = nesibdev->max_cq;
636 props->max_cqe = nesdev->nesadapter->max_cqe - 1; 530 props->max_cqe = nesdev->nesadapter->max_cqe;
637 props->max_mr = nesibdev->max_mr; 531 props->max_mr = nesibdev->max_mr;
638 props->max_mw = nesibdev->max_mr; 532 props->max_mw = nesibdev->max_mr;
639 props->max_pd = nesibdev->max_pd; 533 props->max_pd = nesibdev->max_pd;
640 props->max_sge_rd = 1; 534 props->max_sge_rd = 1;
641 switch (nesdev->nesadapter->max_irrq_wr) { 535 switch (nesdev->nesadapter->max_irrq_wr) {
642 case 0: 536 case 0:
643 props->max_qp_rd_atom = 1; 537 props->max_qp_rd_atom = 2;
644 break; 538 break;
645 case 1: 539 case 1:
646 props->max_qp_rd_atom = 4; 540 props->max_qp_rd_atom = 8;
647 break; 541 break;
648 case 2: 542 case 2:
649 props->max_qp_rd_atom = 16; 543 props->max_qp_rd_atom = 32;
650 break; 544 break;
651 case 3: 545 case 3:
652 props->max_qp_rd_atom = 32; 546 props->max_qp_rd_atom = 64;
653 break; 547 break;
654 default: 548 default:
655 props->max_qp_rd_atom = 0; 549 props->max_qp_rd_atom = 0;
@@ -1121,6 +1015,7 @@ static int nes_setup_virt_qp(struct nes_qp *nesqp, struct nes_pbl *nespbl,
1121 kunmap(nesqp->page); 1015 kunmap(nesqp->page);
1122 return -ENOMEM; 1016 return -ENOMEM;
1123 } 1017 }
1018 nesqp->sq_kmapped = 1;
1124 nesqp->hwqp.q2_vbase = mem; 1019 nesqp->hwqp.q2_vbase = mem;
1125 mem += 256; 1020 mem += 256;
1126 memset(nesqp->hwqp.q2_vbase, 0, 256); 1021 memset(nesqp->hwqp.q2_vbase, 0, 256);
@@ -1198,7 +1093,10 @@ static inline void nes_free_qp_mem(struct nes_device *nesdev,
1198 pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase); 1093 pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase);
1199 pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase ); 1094 pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase );
1200 nesqp->pbl_vbase = NULL; 1095 nesqp->pbl_vbase = NULL;
1201 kunmap(nesqp->page); 1096 if (nesqp->sq_kmapped) {
1097 nesqp->sq_kmapped = 0;
1098 kunmap(nesqp->page);
1099 }
1202 } 1100 }
1203} 1101}
1204 1102
@@ -1504,8 +1402,6 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
1504 nes_debug(NES_DBG_QP, "QP%u structure located @%p.Size = %u.\n", 1402 nes_debug(NES_DBG_QP, "QP%u structure located @%p.Size = %u.\n",
1505 nesqp->hwqp.qp_id, nesqp, (u32)sizeof(*nesqp)); 1403 nesqp->hwqp.qp_id, nesqp, (u32)sizeof(*nesqp));
1506 spin_lock_init(&nesqp->lock); 1404 spin_lock_init(&nesqp->lock);
1507 init_waitqueue_head(&nesqp->state_waitq);
1508 init_waitqueue_head(&nesqp->kick_waitq);
1509 nes_add_ref(&nesqp->ibqp); 1405 nes_add_ref(&nesqp->ibqp);
1510 break; 1406 break;
1511 default: 1407 default:
@@ -1513,6 +1409,8 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
1513 return ERR_PTR(-EINVAL); 1409 return ERR_PTR(-EINVAL);
1514 } 1410 }
1515 1411
1412 nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR);
1413
1516 /* update the QP table */ 1414 /* update the QP table */
1517 nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp; 1415 nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp;
1518 nes_debug(NES_DBG_QP, "netdev refcnt=%u\n", 1416 nes_debug(NES_DBG_QP, "netdev refcnt=%u\n",
@@ -1607,8 +1505,10 @@ static int nes_destroy_qp(struct ib_qp *ibqp)
1607 nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index; 1505 nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index;
1608 } 1506 }
1609 } 1507 }
1610 if (nesqp->pbl_pbase) 1508 if (nesqp->pbl_pbase && nesqp->sq_kmapped) {
1509 nesqp->sq_kmapped = 0;
1611 kunmap(nesqp->page); 1510 kunmap(nesqp->page);
1511 }
1612 } else { 1512 } else {
1613 /* Clean any pending completions from the cq(s) */ 1513 /* Clean any pending completions from the cq(s) */
1614 if (nesqp->nesscq) 1514 if (nesqp->nesscq)
@@ -1649,6 +1549,9 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
1649 unsigned long flags; 1549 unsigned long flags;
1650 int ret; 1550 int ret;
1651 1551
1552 if (entries > nesadapter->max_cqe)
1553 return ERR_PTR(-EINVAL);
1554
1652 err = nes_alloc_resource(nesadapter, nesadapter->allocated_cqs, 1555 err = nes_alloc_resource(nesadapter, nesadapter->allocated_cqs,
1653 nesadapter->max_cq, &cq_num, &nesadapter->next_cq); 1556 nesadapter->max_cq, &cq_num, &nesadapter->next_cq);
1654 if (err) { 1557 if (err) {
@@ -2606,9 +2509,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
2606 stag = stag_index << 8; 2509 stag = stag_index << 8;
2607 stag |= driver_key; 2510 stag |= driver_key;
2608 stag += (u32)stag_key; 2511 stag += (u32)stag_key;
2609 if (stag == 0) {
2610 stag = 1;
2611 }
2612 2512
2613 iova_start = virt; 2513 iova_start = virt;
2614 /* Make the leaf PBL the root if only one PBL */ 2514 /* Make the leaf PBL the root if only one PBL */
@@ -3109,7 +3009,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
3109 " already done based on hw state.\n", 3009 " already done based on hw state.\n",
3110 nesqp->hwqp.qp_id); 3010 nesqp->hwqp.qp_id);
3111 issue_modify_qp = 0; 3011 issue_modify_qp = 0;
3112 nesqp->in_disconnect = 0;
3113 } 3012 }
3114 switch (nesqp->hw_iwarp_state) { 3013 switch (nesqp->hw_iwarp_state) {
3115 case NES_AEQE_IWARP_STATE_CLOSING: 3014 case NES_AEQE_IWARP_STATE_CLOSING:
@@ -3122,7 +3021,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
3122 break; 3021 break;
3123 default: 3022 default:
3124 next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING; 3023 next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
3125 nesqp->in_disconnect = 1;
3126 nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING; 3024 nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
3127 break; 3025 break;
3128 } 3026 }
@@ -3139,7 +3037,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
3139 next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE; 3037 next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE;
3140 nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE; 3038 nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE;
3141 issue_modify_qp = 1; 3039 issue_modify_qp = 1;
3142 nesqp->in_disconnect = 1;
3143 break; 3040 break;
3144 case IB_QPS_ERR: 3041 case IB_QPS_ERR:
3145 case IB_QPS_RESET: 3042 case IB_QPS_RESET:
@@ -3162,7 +3059,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
3162 if ((nesqp->hw_tcp_state > NES_AEQE_TCP_STATE_CLOSED) && 3059 if ((nesqp->hw_tcp_state > NES_AEQE_TCP_STATE_CLOSED) &&
3163 (nesqp->hw_tcp_state != NES_AEQE_TCP_STATE_TIME_WAIT)) { 3060 (nesqp->hw_tcp_state != NES_AEQE_TCP_STATE_TIME_WAIT)) {
3164 next_iwarp_state |= NES_CQP_QP_RESET; 3061 next_iwarp_state |= NES_CQP_QP_RESET;
3165 nesqp->in_disconnect = 1;
3166 } else { 3062 } else {
3167 nes_debug(NES_DBG_MOD_QP, "QP%u NOT setting NES_CQP_QP_RESET since TCP state = %u\n", 3063 nes_debug(NES_DBG_MOD_QP, "QP%u NOT setting NES_CQP_QP_RESET since TCP state = %u\n",
3168 nesqp->hwqp.qp_id, nesqp->hw_tcp_state); 3064 nesqp->hwqp.qp_id, nesqp->hw_tcp_state);
@@ -3373,21 +3269,17 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
3373 struct nes_device *nesdev = nesvnic->nesdev; 3269 struct nes_device *nesdev = nesvnic->nesdev;
3374 struct nes_qp *nesqp = to_nesqp(ibqp); 3270 struct nes_qp *nesqp = to_nesqp(ibqp);
3375 struct nes_hw_qp_wqe *wqe; 3271 struct nes_hw_qp_wqe *wqe;
3376 int err; 3272 int err = 0;
3377 u32 qsize = nesqp->hwqp.sq_size; 3273 u32 qsize = nesqp->hwqp.sq_size;
3378 u32 head; 3274 u32 head;
3379 u32 wqe_misc; 3275 u32 wqe_misc = 0;
3380 u32 wqe_count; 3276 u32 wqe_count = 0;
3381 u32 counter; 3277 u32 counter;
3382 u32 total_payload_length;
3383
3384 err = 0;
3385 wqe_misc = 0;
3386 wqe_count = 0;
3387 total_payload_length = 0;
3388 3278
3389 if (nesqp->ibqp_state > IB_QPS_RTS) 3279 if (nesqp->ibqp_state > IB_QPS_RTS) {
3390 return -EINVAL; 3280 err = -EINVAL;
3281 goto out;
3282 }
3391 3283
3392 spin_lock_irqsave(&nesqp->lock, flags); 3284 spin_lock_irqsave(&nesqp->lock, flags);
3393 3285
@@ -3413,94 +3305,208 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
3413 u64temp = (u64)(ib_wr->wr_id); 3305 u64temp = (u64)(ib_wr->wr_id);
3414 set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX, 3306 set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX,
3415 u64temp); 3307 u64temp);
3416 switch (ib_wr->opcode) { 3308 switch (ib_wr->opcode) {
3417 case IB_WR_SEND: 3309 case IB_WR_SEND:
3418 if (ib_wr->send_flags & IB_SEND_SOLICITED) { 3310 case IB_WR_SEND_WITH_INV:
3419 wqe_misc = NES_IWARP_SQ_OP_SENDSE; 3311 if (IB_WR_SEND == ib_wr->opcode) {
3420 } else { 3312 if (ib_wr->send_flags & IB_SEND_SOLICITED)
3421 wqe_misc = NES_IWARP_SQ_OP_SEND; 3313 wqe_misc = NES_IWARP_SQ_OP_SENDSE;
3422 } 3314 else
3423 if (ib_wr->num_sge > nesdev->nesadapter->max_sge) { 3315 wqe_misc = NES_IWARP_SQ_OP_SEND;
3424 err = -EINVAL; 3316 } else {
3425 break; 3317 if (ib_wr->send_flags & IB_SEND_SOLICITED)
3426 } 3318 wqe_misc = NES_IWARP_SQ_OP_SENDSEINV;
3427 if (ib_wr->send_flags & IB_SEND_FENCE) { 3319 else
3428 wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE; 3320 wqe_misc = NES_IWARP_SQ_OP_SENDINV;
3429 }
3430 if ((ib_wr->send_flags & IB_SEND_INLINE) &&
3431 ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
3432 (ib_wr->sg_list[0].length <= 64)) {
3433 memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
3434 (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
3435 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
3436 ib_wr->sg_list[0].length);
3437 wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
3438 } else {
3439 fill_wqe_sg_send(wqe, ib_wr, 1);
3440 }
3441 3321
3442 break; 3322 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX,
3443 case IB_WR_RDMA_WRITE: 3323 ib_wr->ex.invalidate_rkey);
3444 wqe_misc = NES_IWARP_SQ_OP_RDMAW; 3324 }
3445 if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
3446 nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=%u\n",
3447 ib_wr->num_sge,
3448 nesdev->nesadapter->max_sge);
3449 err = -EINVAL;
3450 break;
3451 }
3452 if (ib_wr->send_flags & IB_SEND_FENCE) {
3453 wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
3454 }
3455 3325
3456 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX, 3326 if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
3457 ib_wr->wr.rdma.rkey); 3327 err = -EINVAL;
3458 set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX, 3328 break;
3459 ib_wr->wr.rdma.remote_addr);
3460
3461 if ((ib_wr->send_flags & IB_SEND_INLINE) &&
3462 ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
3463 (ib_wr->sg_list[0].length <= 64)) {
3464 memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
3465 (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
3466 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
3467 ib_wr->sg_list[0].length);
3468 wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
3469 } else {
3470 fill_wqe_sg_send(wqe, ib_wr, 1);
3471 }
3472 wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] =
3473 wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX];
3474 break;
3475 case IB_WR_RDMA_READ:
3476 /* iWARP only supports 1 sge for RDMA reads */
3477 if (ib_wr->num_sge > 1) {
3478 nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=1\n",
3479 ib_wr->num_sge);
3480 err = -EINVAL;
3481 break;
3482 }
3483 wqe_misc = NES_IWARP_SQ_OP_RDMAR;
3484 set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
3485 ib_wr->wr.rdma.remote_addr);
3486 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
3487 ib_wr->wr.rdma.rkey);
3488 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX,
3489 ib_wr->sg_list->length);
3490 set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
3491 ib_wr->sg_list->addr);
3492 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX,
3493 ib_wr->sg_list->lkey);
3494 break;
3495 default:
3496 /* error */
3497 err = -EINVAL;
3498 break;
3499 } 3329 }
3500 3330
3501 if (ib_wr->send_flags & IB_SEND_SIGNALED) { 3331 if (ib_wr->send_flags & IB_SEND_FENCE)
3502 wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL; 3332 wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
3333
3334 if ((ib_wr->send_flags & IB_SEND_INLINE) &&
3335 ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
3336 (ib_wr->sg_list[0].length <= 64)) {
3337 memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
3338 (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
3339 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
3340 ib_wr->sg_list[0].length);
3341 wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
3342 } else {
3343 fill_wqe_sg_send(wqe, ib_wr, 1);
3344 }
3345
3346 break;
3347 case IB_WR_RDMA_WRITE:
3348 wqe_misc = NES_IWARP_SQ_OP_RDMAW;
3349 if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
3350 nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=%u\n",
3351 ib_wr->num_sge, nesdev->nesadapter->max_sge);
3352 err = -EINVAL;
3353 break;
3354 }
3355
3356 if (ib_wr->send_flags & IB_SEND_FENCE)
3357 wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
3358
3359 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
3360 ib_wr->wr.rdma.rkey);
3361 set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
3362 ib_wr->wr.rdma.remote_addr);
3363
3364 if ((ib_wr->send_flags & IB_SEND_INLINE) &&
3365 ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
3366 (ib_wr->sg_list[0].length <= 64)) {
3367 memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
3368 (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
3369 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
3370 ib_wr->sg_list[0].length);
3371 wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
3372 } else {
3373 fill_wqe_sg_send(wqe, ib_wr, 1);
3374 }
3375
3376 wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] =
3377 wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX];
3378 break;
3379 case IB_WR_RDMA_READ:
3380 case IB_WR_RDMA_READ_WITH_INV:
3381 /* iWARP only supports 1 sge for RDMA reads */
3382 if (ib_wr->num_sge > 1) {
3383 nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=1\n",
3384 ib_wr->num_sge);
3385 err = -EINVAL;
3386 break;
3387 }
3388 if (ib_wr->opcode == IB_WR_RDMA_READ) {
3389 wqe_misc = NES_IWARP_SQ_OP_RDMAR;
3390 } else {
3391 wqe_misc = NES_IWARP_SQ_OP_RDMAR_LOCINV;
3392 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX,
3393 ib_wr->ex.invalidate_rkey);
3394 }
3395
3396 set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
3397 ib_wr->wr.rdma.remote_addr);
3398 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
3399 ib_wr->wr.rdma.rkey);
3400 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX,
3401 ib_wr->sg_list->length);
3402 set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
3403 ib_wr->sg_list->addr);
3404 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX,
3405 ib_wr->sg_list->lkey);
3406 break;
3407 case IB_WR_LOCAL_INV:
3408 wqe_misc = NES_IWARP_SQ_OP_LOCINV;
3409 set_wqe_32bit_value(wqe->wqe_words,
3410 NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX,
3411 ib_wr->ex.invalidate_rkey);
3412 break;
3413 case IB_WR_FAST_REG_MR:
3414 {
3415 int i;
3416 int flags = ib_wr->wr.fast_reg.access_flags;
3417 struct nes_ib_fast_reg_page_list *pnesfrpl =
3418 container_of(ib_wr->wr.fast_reg.page_list,
3419 struct nes_ib_fast_reg_page_list,
3420 ibfrpl);
3421 u64 *src_page_list = pnesfrpl->ibfrpl.page_list;
3422 u64 *dst_page_list = pnesfrpl->nes_wqe_pbl.kva;
3423
3424 if (ib_wr->wr.fast_reg.page_list_len >
3425 (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) {
3426 nes_debug(NES_DBG_IW_TX, "SQ_FMR: bad page_list_len\n");
3427 err = -EINVAL;
3428 break;
3429 }
3430 wqe_misc = NES_IWARP_SQ_OP_FAST_REG;
3431 set_wqe_64bit_value(wqe->wqe_words,
3432 NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX,
3433 ib_wr->wr.fast_reg.iova_start);
3434 set_wqe_32bit_value(wqe->wqe_words,
3435 NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX,
3436 ib_wr->wr.fast_reg.length);
3437 set_wqe_32bit_value(wqe->wqe_words,
3438 NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX,
3439 ib_wr->wr.fast_reg.rkey);
3440 /* Set page size: */
3441 if (ib_wr->wr.fast_reg.page_shift == 12) {
3442 wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K;
3443 } else if (ib_wr->wr.fast_reg.page_shift == 21) {
3444 wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M;
3445 } else {
3446 nes_debug(NES_DBG_IW_TX, "Invalid page shift,"
3447 " ib_wr=%u, max=1\n", ib_wr->num_sge);
3448 err = -EINVAL;
3449 break;
3450 }
3451 /* Set access_flags */
3452 wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ;
3453 if (flags & IB_ACCESS_LOCAL_WRITE)
3454 wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE;
3455
3456 if (flags & IB_ACCESS_REMOTE_WRITE)
3457 wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE;
3458
3459 if (flags & IB_ACCESS_REMOTE_READ)
3460 wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ;
3461
3462 if (flags & IB_ACCESS_MW_BIND)
3463 wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND;
3464
3465 /* Fill in PBL info: */
3466 if (ib_wr->wr.fast_reg.page_list_len >
3467 pnesfrpl->ibfrpl.max_page_list_len) {
3468 nes_debug(NES_DBG_IW_TX, "Invalid page list length,"
3469 " ib_wr=%p, value=%u, max=%u\n",
3470 ib_wr, ib_wr->wr.fast_reg.page_list_len,
3471 pnesfrpl->ibfrpl.max_page_list_len);
3472 err = -EINVAL;
3473 break;
3474 }
3475
3476 set_wqe_64bit_value(wqe->wqe_words,
3477 NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX,
3478 pnesfrpl->nes_wqe_pbl.paddr);
3479
3480 set_wqe_32bit_value(wqe->wqe_words,
3481 NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX,
3482 ib_wr->wr.fast_reg.page_list_len * 8);
3483
3484 for (i = 0; i < ib_wr->wr.fast_reg.page_list_len; i++)
3485 dst_page_list[i] = cpu_to_le64(src_page_list[i]);
3486
3487 nes_debug(NES_DBG_IW_TX, "SQ_FMR: iova_start: %p, "
3488 "length: %d, rkey: %0x, pgl_paddr: %p, "
3489 "page_list_len: %u, wqe_misc: %x\n",
3490 (void *)ib_wr->wr.fast_reg.iova_start,
3491 ib_wr->wr.fast_reg.length,
3492 ib_wr->wr.fast_reg.rkey,
3493 (void *)pnesfrpl->nes_wqe_pbl.paddr,
3494 ib_wr->wr.fast_reg.page_list_len,
3495 wqe_misc);
3496 break;
3497 }
3498 default:
3499 /* error */
3500 err = -EINVAL;
3501 break;
3503 } 3502 }
3503
3504 if (err)
3505 break;
3506
3507 if ((ib_wr->send_flags & IB_SEND_SIGNALED) || nesqp->sig_all)
3508 wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
3509
3504 wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(wqe_misc); 3510 wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(wqe_misc);
3505 3511
3506 ib_wr = ib_wr->next; 3512 ib_wr = ib_wr->next;
@@ -3522,6 +3528,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
3522 3528
3523 spin_unlock_irqrestore(&nesqp->lock, flags); 3529 spin_unlock_irqrestore(&nesqp->lock, flags);
3524 3530
3531out:
3525 if (err) 3532 if (err)
3526 *bad_wr = ib_wr; 3533 *bad_wr = ib_wr;
3527 return err; 3534 return err;
@@ -3548,8 +3555,10 @@ static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
3548 u32 counter; 3555 u32 counter;
3549 u32 total_payload_length; 3556 u32 total_payload_length;
3550 3557
3551 if (nesqp->ibqp_state > IB_QPS_RTS) 3558 if (nesqp->ibqp_state > IB_QPS_RTS) {
3552 return -EINVAL; 3559 err = -EINVAL;
3560 goto out;
3561 }
3553 3562
3554 spin_lock_irqsave(&nesqp->lock, flags); 3563 spin_lock_irqsave(&nesqp->lock, flags);
3555 3564
@@ -3612,6 +3621,7 @@ static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
3612 3621
3613 spin_unlock_irqrestore(&nesqp->lock, flags); 3622 spin_unlock_irqrestore(&nesqp->lock, flags);
3614 3623
3624out:
3615 if (err) 3625 if (err)
3616 *bad_wr = ib_wr; 3626 *bad_wr = ib_wr;
3617 return err; 3627 return err;
@@ -3720,6 +3730,12 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
3720 nes_debug(NES_DBG_CQ, "Operation = Send.\n"); 3730 nes_debug(NES_DBG_CQ, "Operation = Send.\n");
3721 entry->opcode = IB_WC_SEND; 3731 entry->opcode = IB_WC_SEND;
3722 break; 3732 break;
3733 case NES_IWARP_SQ_OP_LOCINV:
3734 entry->opcode = IB_WR_LOCAL_INV;
3735 break;
3736 case NES_IWARP_SQ_OP_FAST_REG:
3737 entry->opcode = IB_WC_FAST_REG_MR;
3738 break;
3723 } 3739 }
3724 3740
3725 nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1); 3741 nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1);
@@ -3890,10 +3906,9 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
3890 nesibdev->ibdev.dealloc_mw = nes_dealloc_mw; 3906 nesibdev->ibdev.dealloc_mw = nes_dealloc_mw;
3891 nesibdev->ibdev.bind_mw = nes_bind_mw; 3907 nesibdev->ibdev.bind_mw = nes_bind_mw;
3892 3908
3893 nesibdev->ibdev.alloc_fmr = nes_alloc_fmr; 3909 nesibdev->ibdev.alloc_fast_reg_mr = nes_alloc_fast_reg_mr;
3894 nesibdev->ibdev.unmap_fmr = nes_unmap_fmr; 3910 nesibdev->ibdev.alloc_fast_reg_page_list = nes_alloc_fast_reg_page_list;
3895 nesibdev->ibdev.dealloc_fmr = nes_dealloc_fmr; 3911 nesibdev->ibdev.free_fast_reg_page_list = nes_free_fast_reg_page_list;
3896 nesibdev->ibdev.map_phys_fmr = nes_map_phys_fmr;
3897 3912
3898 nesibdev->ibdev.attach_mcast = nes_multicast_attach; 3913 nesibdev->ibdev.attach_mcast = nes_multicast_attach;
3899 nesibdev->ibdev.detach_mcast = nes_multicast_detach; 3914 nesibdev->ibdev.detach_mcast = nes_multicast_detach;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
index 89822d75f82e..2df9993e0cac 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ b/drivers/infiniband/hw/nes/nes_verbs.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. 3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -135,19 +135,15 @@ struct nes_qp {
135 struct ib_qp ibqp; 135 struct ib_qp ibqp;
136 void *allocated_buffer; 136 void *allocated_buffer;
137 struct iw_cm_id *cm_id; 137 struct iw_cm_id *cm_id;
138 struct workqueue_struct *wq;
139 struct nes_cq *nesscq; 138 struct nes_cq *nesscq;
140 struct nes_cq *nesrcq; 139 struct nes_cq *nesrcq;
141 struct nes_pd *nespd; 140 struct nes_pd *nespd;
142 void *cm_node; /* handle of the node this QP is associated with */ 141 void *cm_node; /* handle of the node this QP is associated with */
143 struct ietf_mpa_frame *ietf_frame; 142 struct ietf_mpa_frame *ietf_frame;
144 dma_addr_t ietf_frame_pbase; 143 dma_addr_t ietf_frame_pbase;
145 wait_queue_head_t state_waitq;
146 struct ib_mr *lsmm_mr; 144 struct ib_mr *lsmm_mr;
147 unsigned long socket;
148 struct nes_hw_qp hwqp; 145 struct nes_hw_qp hwqp;
149 struct work_struct work; 146 struct work_struct work;
150 struct work_struct ae_work;
151 enum ib_qp_state ibqp_state; 147 enum ib_qp_state ibqp_state;
152 u32 iwarp_state; 148 u32 iwarp_state;
153 u32 hte_index; 149 u32 hte_index;
@@ -165,19 +161,20 @@ struct nes_qp {
165 struct page *page; 161 struct page *page;
166 struct timer_list terminate_timer; 162 struct timer_list terminate_timer;
167 enum ib_event_type terminate_eventtype; 163 enum ib_event_type terminate_eventtype;
168 wait_queue_head_t kick_waitq; 164 u16 active_conn:1;
169 u16 in_disconnect; 165 u16 skip_lsmm:1;
166 u16 user_mode:1;
167 u16 hte_added:1;
168 u16 flush_issued:1;
169 u16 destroyed:1;
170 u16 sig_all:1;
171 u16 rsvd:9;
170 u16 private_data_len; 172 u16 private_data_len;
171 u16 term_sq_flush_code; 173 u16 term_sq_flush_code;
172 u16 term_rq_flush_code; 174 u16 term_rq_flush_code;
173 u8 active_conn;
174 u8 skip_lsmm;
175 u8 user_mode;
176 u8 hte_added;
177 u8 hw_iwarp_state; 175 u8 hw_iwarp_state;
178 u8 flush_issued;
179 u8 hw_tcp_state; 176 u8 hw_tcp_state;
180 u8 term_flags; 177 u8 term_flags;
181 u8 destroyed; 178 u8 sq_kmapped;
182}; 179};
183#endif /* NES_VERBS_H */ 180#endif /* NES_VERBS_H */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 2bf5116deec4..df3eb8c9fd96 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -884,6 +884,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour,
884 884
885 neigh->neighbour = neighbour; 885 neigh->neighbour = neighbour;
886 neigh->dev = dev; 886 neigh->dev = dev;
887 memset(&neigh->dgid.raw, 0, sizeof (union ib_gid));
887 *to_ipoib_neigh(neighbour) = neigh; 888 *to_ipoib_neigh(neighbour) = neigh;
888 skb_queue_head_init(&neigh->queue); 889 skb_queue_head_init(&neigh->queue);
889 ipoib_cm_set(neigh, NULL); 890 ipoib_cm_set(neigh, NULL);
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index b9453d068e9d..274c883ef3ea 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -209,6 +209,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
209 mem_copy->copy_buf = NULL; 209 mem_copy->copy_buf = NULL;
210} 210}
211 211
212#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
213
212/** 214/**
213 * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses 215 * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
214 * and returns the length of resulting physical address array (may be less than 216 * and returns the length of resulting physical address array (may be less than
@@ -221,62 +223,52 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
221 * where --few fragments of the same page-- are present in the SG as 223 * where --few fragments of the same page-- are present in the SG as
222 * consecutive elements. Also, it handles one entry SG. 224 * consecutive elements. Also, it handles one entry SG.
223 */ 225 */
226
224static int iser_sg_to_page_vec(struct iser_data_buf *data, 227static int iser_sg_to_page_vec(struct iser_data_buf *data,
225 struct iser_page_vec *page_vec, 228 struct iser_page_vec *page_vec,
226 struct ib_device *ibdev) 229 struct ib_device *ibdev)
227{ 230{
228 struct scatterlist *sgl = (struct scatterlist *)data->buf; 231 struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf;
229 struct scatterlist *sg; 232 u64 start_addr, end_addr, page, chunk_start = 0;
230 u64 first_addr, last_addr, page;
231 int end_aligned;
232 unsigned int cur_page = 0;
233 unsigned long total_sz = 0; 233 unsigned long total_sz = 0;
234 int i; 234 unsigned int dma_len;
235 int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
235 236
236 /* compute the offset of first element */ 237 /* compute the offset of first element */
237 page_vec->offset = (u64) sgl[0].offset & ~MASK_4K; 238 page_vec->offset = (u64) sgl[0].offset & ~MASK_4K;
238 239
240 new_chunk = 1;
241 cur_page = 0;
239 for_each_sg(sgl, sg, data->dma_nents, i) { 242 for_each_sg(sgl, sg, data->dma_nents, i) {
240 unsigned int dma_len = ib_sg_dma_len(ibdev, sg); 243 start_addr = ib_sg_dma_address(ibdev, sg);
241 244 if (new_chunk)
245 chunk_start = start_addr;
246 dma_len = ib_sg_dma_len(ibdev, sg);
247 end_addr = start_addr + dma_len;
242 total_sz += dma_len; 248 total_sz += dma_len;
243 249
244 first_addr = ib_sg_dma_address(ibdev, sg); 250 /* collect page fragments until aligned or end of SG list */
245 last_addr = first_addr + dma_len; 251 if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
246 252 new_chunk = 0;
247 end_aligned = !(last_addr & ~MASK_4K); 253 continue;
248
249 /* continue to collect page fragments till aligned or SG ends */
250 while (!end_aligned && (i + 1 < data->dma_nents)) {
251 sg = sg_next(sg);
252 i++;
253 dma_len = ib_sg_dma_len(ibdev, sg);
254 total_sz += dma_len;
255 last_addr = ib_sg_dma_address(ibdev, sg) + dma_len;
256 end_aligned = !(last_addr & ~MASK_4K);
257 } 254 }
258 255 new_chunk = 1;
259 /* handle the 1st page in the 1st DMA element */ 256
260 if (cur_page == 0) { 257 /* address of the first page in the contiguous chunk;
261 page = first_addr & MASK_4K; 258 masking relevant for the very first SG entry,
262 page_vec->pages[cur_page] = page; 259 which might be unaligned */
263 cur_page++; 260 page = chunk_start & MASK_4K;
261 do {
262 page_vec->pages[cur_page++] = page;
264 page += SIZE_4K; 263 page += SIZE_4K;
265 } else 264 } while (page < end_addr);
266 page = first_addr;
267
268 for (; page < last_addr; page += SIZE_4K) {
269 page_vec->pages[cur_page] = page;
270 cur_page++;
271 }
272
273 } 265 }
266
274 page_vec->data_size = total_sz; 267 page_vec->data_size = total_sz;
275 iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page); 268 iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page);
276 return cur_page; 269 return cur_page;
277} 270}
278 271
279#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
280 272
281/** 273/**
282 * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned 274 * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
@@ -284,42 +276,40 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
284 * the number of entries which are aligned correctly. Supports the case where 276 * the number of entries which are aligned correctly. Supports the case where
285 * consecutive SG elements are actually fragments of the same physcial page. 277 * consecutive SG elements are actually fragments of the same physcial page.
286 */ 278 */
287static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data, 279static int iser_data_buf_aligned_len(struct iser_data_buf *data,
288 struct ib_device *ibdev) 280 struct ib_device *ibdev)
289{ 281{
290 struct scatterlist *sgl, *sg; 282 struct scatterlist *sgl, *sg, *next_sg = NULL;
291 u64 end_addr, next_addr; 283 u64 start_addr, end_addr;
292 int i, cnt; 284 int i, ret_len, start_check = 0;
293 unsigned int ret_len = 0; 285
286 if (data->dma_nents == 1)
287 return 1;
294 288
295 sgl = (struct scatterlist *)data->buf; 289 sgl = (struct scatterlist *)data->buf;
290 start_addr = ib_sg_dma_address(ibdev, sgl);
296 291
297 cnt = 0;
298 for_each_sg(sgl, sg, data->dma_nents, i) { 292 for_each_sg(sgl, sg, data->dma_nents, i) {
299 /* iser_dbg("Checking sg iobuf [%d]: phys=0x%08lX " 293 if (start_check && !IS_4K_ALIGNED(start_addr))
300 "offset: %ld sz: %ld\n", i, 294 break;
301 (unsigned long)sg_phys(sg), 295
302 (unsigned long)sg->offset, 296 next_sg = sg_next(sg);
303 (unsigned long)sg->length); */ 297 if (!next_sg)
304 end_addr = ib_sg_dma_address(ibdev, sg) + 298 break;
305 ib_sg_dma_len(ibdev, sg); 299
306 /* iser_dbg("Checking sg iobuf end address " 300 end_addr = start_addr + ib_sg_dma_len(ibdev, sg);
307 "0x%08lX\n", end_addr); */ 301 start_addr = ib_sg_dma_address(ibdev, next_sg);
308 if (i + 1 < data->dma_nents) { 302
309 next_addr = ib_sg_dma_address(ibdev, sg_next(sg)); 303 if (end_addr == start_addr) {
310 /* are i, i+1 fragments of the same page? */ 304 start_check = 0;
311 if (end_addr == next_addr) { 305 continue;
312 cnt++; 306 } else
313 continue; 307 start_check = 1;
314 } else if (!IS_4K_ALIGNED(end_addr)) { 308
315 ret_len = cnt + 1; 309 if (!IS_4K_ALIGNED(end_addr))
316 break; 310 break;
317 }
318 }
319 cnt++;
320 } 311 }
321 if (i == data->dma_nents) 312 ret_len = (next_sg) ? i : i+1;
322 ret_len = cnt; /* loop ended */
323 iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n", 313 iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n",
324 ret_len, data->dma_nents, data); 314 ret_len, data->dma_nents, data);
325 return ret_len; 315 return ret_len;
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 3c16602172fc..04f42ae1eda0 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -90,6 +90,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags)
90 [ 9] = "Q_Key violation counter", 90 [ 9] = "Q_Key violation counter",
91 [10] = "VMM", 91 [10] = "VMM",
92 [12] = "DPDP", 92 [12] = "DPDP",
93 [15] = "Big LSO headers",
93 [16] = "MW support", 94 [16] = "MW support",
94 [17] = "APM support", 95 [17] = "APM support",
95 [18] = "Atomic ops support", 96 [18] = "Atomic ops support",
@@ -235,7 +236,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
235 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MPT_OFFSET); 236 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MPT_OFFSET);
236 dev_cap->max_mpts = 1 << (field & 0x3f); 237 dev_cap->max_mpts = 1 << (field & 0x3f);
237 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET); 238 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET);
238 dev_cap->reserved_eqs = 1 << (field & 0xf); 239 dev_cap->reserved_eqs = field & 0xf;
239 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET); 240 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET);
240 dev_cap->max_eqs = 1 << (field & 0xf); 241 dev_cap->max_eqs = 1 << (field & 0xf);
241 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET); 242 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ce7cc6c7bcbb..e92d1bfdb330 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -61,6 +61,7 @@ enum {
61 MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1 << 8, 61 MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1 << 8,
62 MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1 << 9, 62 MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1 << 9,
63 MLX4_DEV_CAP_FLAG_DPDP = 1 << 12, 63 MLX4_DEV_CAP_FLAG_DPDP = 1 << 12,
64 MLX4_DEV_CAP_FLAG_BLH = 1 << 15,
64 MLX4_DEV_CAP_FLAG_MEM_WINDOW = 1 << 16, 65 MLX4_DEV_CAP_FLAG_MEM_WINDOW = 1 << 16,
65 MLX4_DEV_CAP_FLAG_APM = 1 << 17, 66 MLX4_DEV_CAP_FLAG_APM = 1 << 17,
66 MLX4_DEV_CAP_FLAG_ATOMIC = 1 << 18, 67 MLX4_DEV_CAP_FLAG_ATOMIC = 1 << 18,
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 483057b2f4b4..fa0d52b8e622 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -36,6 +36,7 @@
36 36
37#include <linux/in.h> 37#include <linux/in.h>
38#include <linux/in6.h> 38#include <linux/in6.h>
39#include <linux/if_arp.h>
39#include <linux/netdevice.h> 40#include <linux/netdevice.h>
40#include <linux/socket.h> 41#include <linux/socket.h>
41#include <rdma/ib_verbs.h> 42#include <rdma/ib_verbs.h>
@@ -60,8 +61,8 @@ struct rdma_dev_addr {
60 unsigned char src_dev_addr[MAX_ADDR_LEN]; 61 unsigned char src_dev_addr[MAX_ADDR_LEN];
61 unsigned char dst_dev_addr[MAX_ADDR_LEN]; 62 unsigned char dst_dev_addr[MAX_ADDR_LEN];
62 unsigned char broadcast[MAX_ADDR_LEN]; 63 unsigned char broadcast[MAX_ADDR_LEN];
63 enum rdma_node_type dev_type; 64 unsigned short dev_type;
64 struct net_device *src_dev; 65 int bound_dev_if;
65}; 66};
66 67
67/** 68/**
@@ -121,40 +122,29 @@ static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr,
121 memcpy(gid, dev_addr->broadcast + 4, sizeof *gid); 122 memcpy(gid, dev_addr->broadcast + 4, sizeof *gid);
122} 123}
123 124
124static inline void ib_addr_get_sgid(struct rdma_dev_addr *dev_addr, 125static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr)
125 union ib_gid *gid)
126{ 126{
127 memcpy(gid, dev_addr->src_dev_addr + 4, sizeof *gid); 127 return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0;
128} 128}
129 129
130static inline void ib_addr_set_sgid(struct rdma_dev_addr *dev_addr, 130static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
131 union ib_gid *gid)
132{ 131{
133 memcpy(dev_addr->src_dev_addr + 4, gid, sizeof *gid); 132 memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid);
134} 133}
135 134
136static inline void ib_addr_get_dgid(struct rdma_dev_addr *dev_addr, 135static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
137 union ib_gid *gid)
138{ 136{
139 memcpy(gid, dev_addr->dst_dev_addr + 4, sizeof *gid); 137 memcpy(dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid);
140} 138}
141 139
142static inline void ib_addr_set_dgid(struct rdma_dev_addr *dev_addr, 140static inline void rdma_addr_get_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
143 union ib_gid *gid)
144{ 141{
145 memcpy(dev_addr->dst_dev_addr + 4, gid, sizeof *gid); 142 memcpy(gid, dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid);
146} 143}
147 144
148static inline void iw_addr_get_sgid(struct rdma_dev_addr *dev_addr, 145static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
149 union ib_gid *gid)
150{
151 memcpy(gid, dev_addr->src_dev_addr, sizeof *gid);
152}
153
154static inline void iw_addr_get_dgid(struct rdma_dev_addr *dev_addr,
155 union ib_gid *gid)
156{ 146{
157 memcpy(gid, dev_addr->dst_dev_addr, sizeof *gid); 147 memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid);
158} 148}
159 149
160#endif /* IB_ADDR_H */ 150#endif /* IB_ADDR_H */
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 3841c1aff692..1082afaed158 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -379,4 +379,10 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
379 struct ib_sa_path_rec *rec, 379 struct ib_sa_path_rec *rec,
380 struct ib_ah_attr *ah_attr); 380 struct ib_ah_attr *ah_attr);
381 381
382/**
383 * ib_sa_unpack_path - Convert a path record from MAD format to struct
384 * ib_sa_path_rec.
385 */
386void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec);
387
382#endif /* IB_SA_H */ 388#endif /* IB_SA_H */
diff --git a/include/rdma/ib_user_sa.h b/include/rdma/ib_user_sa.h
index 659120157e14..cfc7c9ba781e 100644
--- a/include/rdma/ib_user_sa.h
+++ b/include/rdma/ib_user_sa.h
@@ -35,6 +35,22 @@
35 35
36#include <linux/types.h> 36#include <linux/types.h>
37 37
38enum {
39 IB_PATH_GMP = 1,
40 IB_PATH_PRIMARY = (1<<1),
41 IB_PATH_ALTERNATE = (1<<2),
42 IB_PATH_OUTBOUND = (1<<3),
43 IB_PATH_INBOUND = (1<<4),
44 IB_PATH_INBOUND_REVERSE = (1<<5),
45 IB_PATH_BIDIRECTIONAL = IB_PATH_OUTBOUND | IB_PATH_INBOUND_REVERSE
46};
47
48struct ib_path_rec_data {
49 __u32 flags;
50 __u32 reserved;
51 __u32 path_rec[16];
52};
53
38struct ib_user_path_rec { 54struct ib_user_path_rec {
39 __u8 dgid[16]; 55 __u8 dgid[16];
40 __u8 sgid[16]; 56 __u8 sgid[16];
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index c179318edd92..09509edb1c5f 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1425,6 +1425,11 @@ int ib_destroy_qp(struct ib_qp *qp);
1425 * @send_wr: A list of work requests to post on the send queue. 1425 * @send_wr: A list of work requests to post on the send queue.
1426 * @bad_send_wr: On an immediate failure, this parameter will reference 1426 * @bad_send_wr: On an immediate failure, this parameter will reference
1427 * the work request that failed to be posted on the QP. 1427 * the work request that failed to be posted on the QP.
1428 *
1429 * While IBA Vol. 1 section 11.4.1.1 specifies that if an immediate
1430 * error is returned, the QP state shall not be affected,
1431 * ib_post_send() will return an immediate error after queueing any
1432 * earlier work requests in the list.
1428 */ 1433 */
1429static inline int ib_post_send(struct ib_qp *qp, 1434static inline int ib_post_send(struct ib_qp *qp,
1430 struct ib_send_wr *send_wr, 1435 struct ib_send_wr *send_wr,
diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h
index c55705460b87..1d165022c02d 100644
--- a/include/rdma/rdma_user_cm.h
+++ b/include/rdma/rdma_user_cm.h
@@ -215,12 +215,14 @@ struct rdma_ucm_event_resp {
215 215
216/* Option levels */ 216/* Option levels */
217enum { 217enum {
218 RDMA_OPTION_ID = 0 218 RDMA_OPTION_ID = 0,
219 RDMA_OPTION_IB = 1
219}; 220};
220 221
221/* Option details */ 222/* Option details */
222enum { 223enum {
223 RDMA_OPTION_ID_TOS = 0 224 RDMA_OPTION_ID_TOS = 0,
225 RDMA_OPTION_IB_PATH = 1
224}; 226};
225 227
226struct rdma_ucm_set_option { 228struct rdma_ucm_set_option {
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 536ebe5d3f6b..3b8992361042 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -182,8 +182,8 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
182 ic = conn->c_transport_data; 182 ic = conn->c_transport_data;
183 dev_addr = &ic->i_cm_id->route.addr.dev_addr; 183 dev_addr = &ic->i_cm_id->route.addr.dev_addr;
184 184
185 ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); 185 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
186 ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); 186 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
187 187
188 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); 188 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
189 iinfo->max_send_wr = ic->i_send_ring.w_nr; 189 iinfo->max_send_wr = ic->i_send_ring.w_nr;
diff --git a/net/rds/iw.c b/net/rds/iw.c
index db224f7c2937..b28fa8525b24 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -184,8 +184,8 @@ static int rds_iw_conn_info_visitor(struct rds_connection *conn,
184 ic = conn->c_transport_data; 184 ic = conn->c_transport_data;
185 dev_addr = &ic->i_cm_id->route.addr.dev_addr; 185 dev_addr = &ic->i_cm_id->route.addr.dev_addr;
186 186
187 ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); 187 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
188 ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); 188 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
189 189
190 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); 190 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
191 iinfo->max_send_wr = ic->i_send_ring.w_nr; 191 iinfo->max_send_wr = ic->i_send_ring.w_nr;