aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 15:33:59 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 15:33:59 -0500
commit1d248b2593e92db6c51ca07235985a95c625a93f (patch)
tree4eceeb4eadb8a6339e0f83d0cad166f88d888557
parent1db2a5c11e495366bff35cf7445d494703f7febe (diff)
parent2a0d8366dde9c66d8f481bee56828b661e5c8bf1 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (26 commits) IB/mlx4: Set ownership bit correctly when copying CQEs during CQ resize RDMA/nes: Remove tx_free_list RDMA/cma: Add IPv6 support RDMA/addr: Add support for translating IPv6 addresses mlx4_core: Delete incorrect comment mlx4_core: Add support for multiple completion event vectors IB/iser: Avoid recv buffer exhaustion caused by unexpected PDUs IB/ehca: Remove redundant test of vpage IB/ehca: Replace modulus operations in flush error completion path IB/ipath: Add locking for interrupt use of ipath_pd contexts vs free IB/ipath: Fix spi_pioindex value IB/ipath: Only do 1X workaround on rev1 chips IB/ipath: Don't count IB symbol and link errors unless link is UP IB/ipath: Check return value of dma_map_single() IB/ipath: Fix PSN of send WQEs after an RDMA read resend RDMA/nes: Cleanup warnings RDMA/nes: Add loopback check to make_cm_node() RDMA/nes: Check cqp_avail_reqs is empty after locking the list RDMA/nes: Fix TCP compliance test failures RDMA/nes: Forward packets for a new connection with stale APBVT entry ...
-rw-r--r--drivers/infiniband/core/addr.c196
-rw-r--r--drivers/infiniband/core/cma.c86
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h7
-rw-r--r--drivers/infiniband/hw/ehca/ehca_eq.c2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c17
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c12
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c13
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c49
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c30
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c61
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba7220.c83
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h15
-rw-r--r--drivers/infiniband/hw/ipath/ipath_keys.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c32
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c5
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sdma.c21
-rw-r--r--drivers/infiniband/hw/ipath/ipath_stats.c8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c19
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c3
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h1
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c12
-rw-r--r--drivers/infiniband/hw/mlx4/main.c2
-rw-r--r--drivers/infiniband/hw/nes/nes.h18
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c279
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.h14
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c42
-rw-r--r--drivers/infiniband/hw/nes/nes_utils.c9
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c45
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h3
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c132
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c1
-rw-r--r--drivers/net/mlx4/cq.c11
-rw-r--r--drivers/net/mlx4/en_cq.c9
-rw-r--r--drivers/net/mlx4/en_main.c4
-rw-r--r--drivers/net/mlx4/eq.c121
-rw-r--r--drivers/net/mlx4/main.c53
-rw-r--r--drivers/net/mlx4/mlx4.h14
-rw-r--r--drivers/net/mlx4/profile.c4
-rw-r--r--include/linux/mlx4/device.h4
42 files changed, 965 insertions, 479 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 09a2bec7fd32..d98b05b28262 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -41,6 +41,8 @@
41#include <net/neighbour.h> 41#include <net/neighbour.h>
42#include <net/route.h> 42#include <net/route.h>
43#include <net/netevent.h> 43#include <net/netevent.h>
44#include <net/addrconf.h>
45#include <net/ip6_route.h>
44#include <rdma/ib_addr.h> 46#include <rdma/ib_addr.h>
45 47
46MODULE_AUTHOR("Sean Hefty"); 48MODULE_AUTHOR("Sean Hefty");
@@ -49,8 +51,8 @@ MODULE_LICENSE("Dual BSD/GPL");
49 51
50struct addr_req { 52struct addr_req {
51 struct list_head list; 53 struct list_head list;
52 struct sockaddr src_addr; 54 struct sockaddr_storage src_addr;
53 struct sockaddr dst_addr; 55 struct sockaddr_storage dst_addr;
54 struct rdma_dev_addr *addr; 56 struct rdma_dev_addr *addr;
55 struct rdma_addr_client *client; 57 struct rdma_addr_client *client;
56 void *context; 58 void *context;
@@ -113,15 +115,32 @@ EXPORT_SYMBOL(rdma_copy_addr);
113int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 115int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
114{ 116{
115 struct net_device *dev; 117 struct net_device *dev;
116 __be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr; 118 int ret = -EADDRNOTAVAIL;
117 int ret;
118 119
119 dev = ip_dev_find(&init_net, ip); 120 switch (addr->sa_family) {
120 if (!dev) 121 case AF_INET:
121 return -EADDRNOTAVAIL; 122 dev = ip_dev_find(&init_net,
123 ((struct sockaddr_in *) addr)->sin_addr.s_addr);
124
125 if (!dev)
126 return ret;
122 127
123 ret = rdma_copy_addr(dev_addr, dev, NULL); 128 ret = rdma_copy_addr(dev_addr, dev, NULL);
124 dev_put(dev); 129 dev_put(dev);
130 break;
131 case AF_INET6:
132 for_each_netdev(&init_net, dev) {
133 if (ipv6_chk_addr(&init_net,
134 &((struct sockaddr_in6 *) addr)->sin6_addr,
135 dev, 1)) {
136 ret = rdma_copy_addr(dev_addr, dev, NULL);
137 break;
138 }
139 }
140 break;
141 default:
142 break;
143 }
125 return ret; 144 return ret;
126} 145}
127EXPORT_SYMBOL(rdma_translate_ip); 146EXPORT_SYMBOL(rdma_translate_ip);
@@ -156,22 +175,37 @@ static void queue_req(struct addr_req *req)
156 mutex_unlock(&lock); 175 mutex_unlock(&lock);
157} 176}
158 177
159static void addr_send_arp(struct sockaddr_in *dst_in) 178static void addr_send_arp(struct sockaddr *dst_in)
160{ 179{
161 struct rtable *rt; 180 struct rtable *rt;
162 struct flowi fl; 181 struct flowi fl;
163 __be32 dst_ip = dst_in->sin_addr.s_addr; 182 struct dst_entry *dst;
164 183
165 memset(&fl, 0, sizeof fl); 184 memset(&fl, 0, sizeof fl);
166 fl.nl_u.ip4_u.daddr = dst_ip; 185 if (dst_in->sa_family == AF_INET) {
167 if (ip_route_output_key(&init_net, &rt, &fl)) 186 fl.nl_u.ip4_u.daddr =
168 return; 187 ((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
169 188
170 neigh_event_send(rt->u.dst.neighbour, NULL); 189 if (ip_route_output_key(&init_net, &rt, &fl))
171 ip_rt_put(rt); 190 return;
191
192 neigh_event_send(rt->u.dst.neighbour, NULL);
193 ip_rt_put(rt);
194
195 } else {
196 fl.nl_u.ip6_u.daddr =
197 ((struct sockaddr_in6 *) dst_in)->sin6_addr;
198
199 dst = ip6_route_output(&init_net, NULL, &fl);
200 if (!dst)
201 return;
202
203 neigh_event_send(dst->neighbour, NULL);
204 dst_release(dst);
205 }
172} 206}
173 207
174static int addr_resolve_remote(struct sockaddr_in *src_in, 208static int addr4_resolve_remote(struct sockaddr_in *src_in,
175 struct sockaddr_in *dst_in, 209 struct sockaddr_in *dst_in,
176 struct rdma_dev_addr *addr) 210 struct rdma_dev_addr *addr)
177{ 211{
@@ -220,10 +254,51 @@ out:
220 return ret; 254 return ret;
221} 255}
222 256
257static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
258 struct sockaddr_in6 *dst_in,
259 struct rdma_dev_addr *addr)
260{
261 struct flowi fl;
262 struct neighbour *neigh;
263 struct dst_entry *dst;
264 int ret = -ENODATA;
265
266 memset(&fl, 0, sizeof fl);
267 fl.nl_u.ip6_u.daddr = dst_in->sin6_addr;
268 fl.nl_u.ip6_u.saddr = src_in->sin6_addr;
269
270 dst = ip6_route_output(&init_net, NULL, &fl);
271 if (!dst)
272 return ret;
273
274 if (dst->dev->flags & IFF_NOARP) {
275 ret = rdma_copy_addr(addr, dst->dev, NULL);
276 } else {
277 neigh = dst->neighbour;
278 if (neigh && (neigh->nud_state & NUD_VALID))
279 ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
280 }
281
282 dst_release(dst);
283 return ret;
284}
285
286static int addr_resolve_remote(struct sockaddr *src_in,
287 struct sockaddr *dst_in,
288 struct rdma_dev_addr *addr)
289{
290 if (src_in->sa_family == AF_INET) {
291 return addr4_resolve_remote((struct sockaddr_in *) src_in,
292 (struct sockaddr_in *) dst_in, addr);
293 } else
294 return addr6_resolve_remote((struct sockaddr_in6 *) src_in,
295 (struct sockaddr_in6 *) dst_in, addr);
296}
297
223static void process_req(struct work_struct *work) 298static void process_req(struct work_struct *work)
224{ 299{
225 struct addr_req *req, *temp_req; 300 struct addr_req *req, *temp_req;
226 struct sockaddr_in *src_in, *dst_in; 301 struct sockaddr *src_in, *dst_in;
227 struct list_head done_list; 302 struct list_head done_list;
228 303
229 INIT_LIST_HEAD(&done_list); 304 INIT_LIST_HEAD(&done_list);
@@ -231,8 +306,8 @@ static void process_req(struct work_struct *work)
231 mutex_lock(&lock); 306 mutex_lock(&lock);
232 list_for_each_entry_safe(req, temp_req, &req_list, list) { 307 list_for_each_entry_safe(req, temp_req, &req_list, list) {
233 if (req->status == -ENODATA) { 308 if (req->status == -ENODATA) {
234 src_in = (struct sockaddr_in *) &req->src_addr; 309 src_in = (struct sockaddr *) &req->src_addr;
235 dst_in = (struct sockaddr_in *) &req->dst_addr; 310 dst_in = (struct sockaddr *) &req->dst_addr;
236 req->status = addr_resolve_remote(src_in, dst_in, 311 req->status = addr_resolve_remote(src_in, dst_in,
237 req->addr); 312 req->addr);
238 if (req->status && time_after_eq(jiffies, req->timeout)) 313 if (req->status && time_after_eq(jiffies, req->timeout))
@@ -251,41 +326,72 @@ static void process_req(struct work_struct *work)
251 326
252 list_for_each_entry_safe(req, temp_req, &done_list, list) { 327 list_for_each_entry_safe(req, temp_req, &done_list, list) {
253 list_del(&req->list); 328 list_del(&req->list);
254 req->callback(req->status, &req->src_addr, req->addr, 329 req->callback(req->status, (struct sockaddr *) &req->src_addr,
255 req->context); 330 req->addr, req->context);
256 put_client(req->client); 331 put_client(req->client);
257 kfree(req); 332 kfree(req);
258 } 333 }
259} 334}
260 335
261static int addr_resolve_local(struct sockaddr_in *src_in, 336static int addr_resolve_local(struct sockaddr *src_in,
262 struct sockaddr_in *dst_in, 337 struct sockaddr *dst_in,
263 struct rdma_dev_addr *addr) 338 struct rdma_dev_addr *addr)
264{ 339{
265 struct net_device *dev; 340 struct net_device *dev;
266 __be32 src_ip = src_in->sin_addr.s_addr;
267 __be32 dst_ip = dst_in->sin_addr.s_addr;
268 int ret; 341 int ret;
269 342
270 dev = ip_dev_find(&init_net, dst_ip); 343 if (dst_in->sa_family == AF_INET) {
271 if (!dev) 344 __be32 src_ip = ((struct sockaddr_in *) src_in)->sin_addr.s_addr;
272 return -EADDRNOTAVAIL; 345 __be32 dst_ip = ((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
273 346
274 if (ipv4_is_zeronet(src_ip)) { 347 dev = ip_dev_find(&init_net, dst_ip);
275 src_in->sin_family = dst_in->sin_family; 348 if (!dev)
276 src_in->sin_addr.s_addr = dst_ip; 349 return -EADDRNOTAVAIL;
277 ret = rdma_copy_addr(addr, dev, dev->dev_addr); 350
278 } else if (ipv4_is_loopback(src_ip)) { 351 if (ipv4_is_zeronet(src_ip)) {
279 ret = rdma_translate_ip((struct sockaddr *)dst_in, addr); 352 src_in->sa_family = dst_in->sa_family;
280 if (!ret) 353 ((struct sockaddr_in *) src_in)->sin_addr.s_addr = dst_ip;
281 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 354 ret = rdma_copy_addr(addr, dev, dev->dev_addr);
355 } else if (ipv4_is_loopback(src_ip)) {
356 ret = rdma_translate_ip(dst_in, addr);
357 if (!ret)
358 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
359 } else {
360 ret = rdma_translate_ip(src_in, addr);
361 if (!ret)
362 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
363 }
364 dev_put(dev);
282 } else { 365 } else {
283 ret = rdma_translate_ip((struct sockaddr *)src_in, addr); 366 struct in6_addr *a;
284 if (!ret) 367
285 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 368 for_each_netdev(&init_net, dev)
369 if (ipv6_chk_addr(&init_net,
370 &((struct sockaddr_in6 *) addr)->sin6_addr,
371 dev, 1))
372 break;
373
374 if (!dev)
375 return -EADDRNOTAVAIL;
376
377 a = &((struct sockaddr_in6 *) src_in)->sin6_addr;
378
379 if (ipv6_addr_any(a)) {
380 src_in->sa_family = dst_in->sa_family;
381 ((struct sockaddr_in6 *) src_in)->sin6_addr =
382 ((struct sockaddr_in6 *) dst_in)->sin6_addr;
383 ret = rdma_copy_addr(addr, dev, dev->dev_addr);
384 } else if (ipv6_addr_loopback(a)) {
385 ret = rdma_translate_ip(dst_in, addr);
386 if (!ret)
387 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
388 } else {
389 ret = rdma_translate_ip(src_in, addr);
390 if (!ret)
391 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
392 }
286 } 393 }
287 394
288 dev_put(dev);
289 return ret; 395 return ret;
290} 396}
291 397
@@ -296,7 +402,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
296 struct rdma_dev_addr *addr, void *context), 402 struct rdma_dev_addr *addr, void *context),
297 void *context) 403 void *context)
298{ 404{
299 struct sockaddr_in *src_in, *dst_in; 405 struct sockaddr *src_in, *dst_in;
300 struct addr_req *req; 406 struct addr_req *req;
301 int ret = 0; 407 int ret = 0;
302 408
@@ -313,8 +419,8 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
313 req->client = client; 419 req->client = client;
314 atomic_inc(&client->refcount); 420 atomic_inc(&client->refcount);
315 421
316 src_in = (struct sockaddr_in *) &req->src_addr; 422 src_in = (struct sockaddr *) &req->src_addr;
317 dst_in = (struct sockaddr_in *) &req->dst_addr; 423 dst_in = (struct sockaddr *) &req->dst_addr;
318 424
319 req->status = addr_resolve_local(src_in, dst_in, addr); 425 req->status = addr_resolve_local(src_in, dst_in, addr);
320 if (req->status == -EADDRNOTAVAIL) 426 if (req->status == -EADDRNOTAVAIL)
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index d951896ff7fc..2a2e50871b40 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -42,6 +42,7 @@
42#include <linux/inetdevice.h> 42#include <linux/inetdevice.h>
43 43
44#include <net/tcp.h> 44#include <net/tcp.h>
45#include <net/ipv6.h>
45 46
46#include <rdma/rdma_cm.h> 47#include <rdma/rdma_cm.h>
47#include <rdma/rdma_cm_ib.h> 48#include <rdma/rdma_cm_ib.h>
@@ -636,7 +637,12 @@ static inline int cma_zero_addr(struct sockaddr *addr)
636 637
637static inline int cma_loopback_addr(struct sockaddr *addr) 638static inline int cma_loopback_addr(struct sockaddr *addr)
638{ 639{
639 return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); 640 if (addr->sa_family == AF_INET)
641 return ipv4_is_loopback(
642 ((struct sockaddr_in *) addr)->sin_addr.s_addr);
643 else
644 return ipv6_addr_loopback(
645 &((struct sockaddr_in6 *) addr)->sin6_addr);
640} 646}
641 647
642static inline int cma_any_addr(struct sockaddr *addr) 648static inline int cma_any_addr(struct sockaddr *addr)
@@ -1467,10 +1473,10 @@ static void cma_listen_on_all(struct rdma_id_private *id_priv)
1467 1473
1468static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af) 1474static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
1469{ 1475{
1470 struct sockaddr_in addr_in; 1476 struct sockaddr_storage addr_in;
1471 1477
1472 memset(&addr_in, 0, sizeof addr_in); 1478 memset(&addr_in, 0, sizeof addr_in);
1473 addr_in.sin_family = af; 1479 addr_in.ss_family = af;
1474 return rdma_bind_addr(id, (struct sockaddr *) &addr_in); 1480 return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
1475} 1481}
1476 1482
@@ -2073,7 +2079,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2073 struct rdma_id_private *id_priv; 2079 struct rdma_id_private *id_priv;
2074 int ret; 2080 int ret;
2075 2081
2076 if (addr->sa_family != AF_INET) 2082 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
2077 return -EAFNOSUPPORT; 2083 return -EAFNOSUPPORT;
2078 2084
2079 id_priv = container_of(id, struct rdma_id_private, id); 2085 id_priv = container_of(id, struct rdma_id_private, id);
@@ -2113,31 +2119,59 @@ EXPORT_SYMBOL(rdma_bind_addr);
2113static int cma_format_hdr(void *hdr, enum rdma_port_space ps, 2119static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
2114 struct rdma_route *route) 2120 struct rdma_route *route)
2115{ 2121{
2116 struct sockaddr_in *src4, *dst4;
2117 struct cma_hdr *cma_hdr; 2122 struct cma_hdr *cma_hdr;
2118 struct sdp_hh *sdp_hdr; 2123 struct sdp_hh *sdp_hdr;
2119 2124
2120 src4 = (struct sockaddr_in *) &route->addr.src_addr; 2125 if (route->addr.src_addr.ss_family == AF_INET) {
2121 dst4 = (struct sockaddr_in *) &route->addr.dst_addr; 2126 struct sockaddr_in *src4, *dst4;
2122 2127
2123 switch (ps) { 2128 src4 = (struct sockaddr_in *) &route->addr.src_addr;
2124 case RDMA_PS_SDP: 2129 dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
2125 sdp_hdr = hdr; 2130
2126 if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION) 2131 switch (ps) {
2127 return -EINVAL; 2132 case RDMA_PS_SDP:
2128 sdp_set_ip_ver(sdp_hdr, 4); 2133 sdp_hdr = hdr;
2129 sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; 2134 if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
2130 sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; 2135 return -EINVAL;
2131 sdp_hdr->port = src4->sin_port; 2136 sdp_set_ip_ver(sdp_hdr, 4);
2132 break; 2137 sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2133 default: 2138 sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2134 cma_hdr = hdr; 2139 sdp_hdr->port = src4->sin_port;
2135 cma_hdr->cma_version = CMA_VERSION; 2140 break;
2136 cma_set_ip_ver(cma_hdr, 4); 2141 default:
2137 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; 2142 cma_hdr = hdr;
2138 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; 2143 cma_hdr->cma_version = CMA_VERSION;
2139 cma_hdr->port = src4->sin_port; 2144 cma_set_ip_ver(cma_hdr, 4);
2140 break; 2145 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2146 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2147 cma_hdr->port = src4->sin_port;
2148 break;
2149 }
2150 } else {
2151 struct sockaddr_in6 *src6, *dst6;
2152
2153 src6 = (struct sockaddr_in6 *) &route->addr.src_addr;
2154 dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr;
2155
2156 switch (ps) {
2157 case RDMA_PS_SDP:
2158 sdp_hdr = hdr;
2159 if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
2160 return -EINVAL;
2161 sdp_set_ip_ver(sdp_hdr, 6);
2162 sdp_hdr->src_addr.ip6 = src6->sin6_addr;
2163 sdp_hdr->dst_addr.ip6 = dst6->sin6_addr;
2164 sdp_hdr->port = src6->sin6_port;
2165 break;
2166 default:
2167 cma_hdr = hdr;
2168 cma_hdr->cma_version = CMA_VERSION;
2169 cma_set_ip_ver(cma_hdr, 6);
2170 cma_hdr->src_addr.ip6 = src6->sin6_addr;
2171 cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
2172 cma_hdr->port = src6->sin6_port;
2173 break;
2174 }
2141 } 2175 }
2142 return 0; 2176 return 0;
2143} 2177}
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 7fc35cf0cddf..c825142a2fb7 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -175,6 +175,13 @@ struct ehca_queue_map {
175 unsigned int next_wqe_idx; /* Idx to first wqe to be flushed */ 175 unsigned int next_wqe_idx; /* Idx to first wqe to be flushed */
176}; 176};
177 177
178/* function to calculate the next index for the qmap */
179static inline unsigned int next_index(unsigned int cur_index, unsigned int limit)
180{
181 unsigned int temp = cur_index + 1;
182 return (temp == limit) ? 0 : temp;
183}
184
178struct ehca_qp { 185struct ehca_qp {
179 union { 186 union {
180 struct ib_qp ib_qp; 187 struct ib_qp ib_qp;
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
index 49660dfa1867..523e733c630e 100644
--- a/drivers/infiniband/hw/ehca/ehca_eq.c
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -113,7 +113,7 @@ int ehca_create_eq(struct ehca_shca *shca,
113 if (h_ret != H_SUCCESS || vpage) 113 if (h_ret != H_SUCCESS || vpage)
114 goto create_eq_exit2; 114 goto create_eq_exit2;
115 } else { 115 } else {
116 if (h_ret != H_PAGE_REGISTERED || !vpage) 116 if (h_ret != H_PAGE_REGISTERED)
117 goto create_eq_exit2; 117 goto create_eq_exit2;
118 } 118 }
119 } 119 }
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index bec7e0249358..3b77b674cbf6 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -717,6 +717,7 @@ static int __devinit ehca_probe(struct of_device *dev,
717 const u64 *handle; 717 const u64 *handle;
718 struct ib_pd *ibpd; 718 struct ib_pd *ibpd;
719 int ret, i, eq_size; 719 int ret, i, eq_size;
720 unsigned long flags;
720 721
721 handle = of_get_property(dev->node, "ibm,hca-handle", NULL); 722 handle = of_get_property(dev->node, "ibm,hca-handle", NULL);
722 if (!handle) { 723 if (!handle) {
@@ -830,9 +831,9 @@ static int __devinit ehca_probe(struct of_device *dev,
830 ehca_err(&shca->ib_device, 831 ehca_err(&shca->ib_device,
831 "Cannot create device attributes ret=%d", ret); 832 "Cannot create device attributes ret=%d", ret);
832 833
833 spin_lock(&shca_list_lock); 834 spin_lock_irqsave(&shca_list_lock, flags);
834 list_add(&shca->shca_list, &shca_list); 835 list_add(&shca->shca_list, &shca_list);
835 spin_unlock(&shca_list_lock); 836 spin_unlock_irqrestore(&shca_list_lock, flags);
836 837
837 return 0; 838 return 0;
838 839
@@ -878,6 +879,7 @@ probe1:
878static int __devexit ehca_remove(struct of_device *dev) 879static int __devexit ehca_remove(struct of_device *dev)
879{ 880{
880 struct ehca_shca *shca = dev->dev.driver_data; 881 struct ehca_shca *shca = dev->dev.driver_data;
882 unsigned long flags;
881 int ret; 883 int ret;
882 884
883 sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp); 885 sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp);
@@ -915,9 +917,9 @@ static int __devexit ehca_remove(struct of_device *dev)
915 917
916 ib_dealloc_device(&shca->ib_device); 918 ib_dealloc_device(&shca->ib_device);
917 919
918 spin_lock(&shca_list_lock); 920 spin_lock_irqsave(&shca_list_lock, flags);
919 list_del(&shca->shca_list); 921 list_del(&shca->shca_list);
920 spin_unlock(&shca_list_lock); 922 spin_unlock_irqrestore(&shca_list_lock, flags);
921 923
922 return ret; 924 return ret;
923} 925}
@@ -975,6 +977,7 @@ static int ehca_mem_notifier(struct notifier_block *nb,
975 unsigned long action, void *data) 977 unsigned long action, void *data)
976{ 978{
977 static unsigned long ehca_dmem_warn_time; 979 static unsigned long ehca_dmem_warn_time;
980 unsigned long flags;
978 981
979 switch (action) { 982 switch (action) {
980 case MEM_CANCEL_OFFLINE: 983 case MEM_CANCEL_OFFLINE:
@@ -985,12 +988,12 @@ static int ehca_mem_notifier(struct notifier_block *nb,
985 case MEM_GOING_ONLINE: 988 case MEM_GOING_ONLINE:
986 case MEM_GOING_OFFLINE: 989 case MEM_GOING_OFFLINE:
987 /* only ok if no hca is attached to the lpar */ 990 /* only ok if no hca is attached to the lpar */
988 spin_lock(&shca_list_lock); 991 spin_lock_irqsave(&shca_list_lock, flags);
989 if (list_empty(&shca_list)) { 992 if (list_empty(&shca_list)) {
990 spin_unlock(&shca_list_lock); 993 spin_unlock_irqrestore(&shca_list_lock, flags);
991 return NOTIFY_OK; 994 return NOTIFY_OK;
992 } else { 995 } else {
993 spin_unlock(&shca_list_lock); 996 spin_unlock_irqrestore(&shca_list_lock, flags);
994 if (printk_timed_ratelimit(&ehca_dmem_warn_time, 997 if (printk_timed_ratelimit(&ehca_dmem_warn_time,
995 30 * 1000)) 998 30 * 1000))
996 ehca_gen_err("DMEM operations are not allowed" 999 ehca_gen_err("DMEM operations are not allowed"
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index cadbf0cdd910..f161cf173dbe 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -1138,14 +1138,14 @@ static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue,
1138 return -EFAULT; 1138 return -EFAULT;
1139 } 1139 }
1140 1140
1141 tail_idx = (qmap->tail + 1) % qmap->entries; 1141 tail_idx = next_index(qmap->tail, qmap->entries);
1142 wqe_idx = q_ofs / ipz_queue->qe_size; 1142 wqe_idx = q_ofs / ipz_queue->qe_size;
1143 1143
1144 /* check all processed wqes, whether a cqe is requested or not */ 1144 /* check all processed wqes, whether a cqe is requested or not */
1145 while (tail_idx != wqe_idx) { 1145 while (tail_idx != wqe_idx) {
1146 if (qmap->map[tail_idx].cqe_req) 1146 if (qmap->map[tail_idx].cqe_req)
1147 qmap->left_to_poll++; 1147 qmap->left_to_poll++;
1148 tail_idx = (tail_idx + 1) % qmap->entries; 1148 tail_idx = next_index(tail_idx, qmap->entries);
1149 } 1149 }
1150 /* save index in queue, where we have to start flushing */ 1150 /* save index in queue, where we have to start flushing */
1151 qmap->next_wqe_idx = wqe_idx; 1151 qmap->next_wqe_idx = wqe_idx;
@@ -1195,14 +1195,14 @@ static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca)
1195 } else { 1195 } else {
1196 spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); 1196 spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
1197 my_qp->sq_map.left_to_poll = 0; 1197 my_qp->sq_map.left_to_poll = 0;
1198 my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) % 1198 my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
1199 my_qp->sq_map.entries; 1199 my_qp->sq_map.entries);
1200 spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); 1200 spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
1201 1201
1202 spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); 1202 spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
1203 my_qp->rq_map.left_to_poll = 0; 1203 my_qp->rq_map.left_to_poll = 0;
1204 my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) % 1204 my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
1205 my_qp->rq_map.entries; 1205 my_qp->rq_map.entries);
1206 spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); 1206 spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
1207 } 1207 }
1208 1208
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index 00a648f4316c..c7112686782f 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -726,13 +726,13 @@ repoll:
726 * set left_to_poll to 0 because in error state, we will not 726 * set left_to_poll to 0 because in error state, we will not
727 * get any additional CQEs 727 * get any additional CQEs
728 */ 728 */
729 my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) % 729 my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
730 my_qp->sq_map.entries; 730 my_qp->sq_map.entries);
731 my_qp->sq_map.left_to_poll = 0; 731 my_qp->sq_map.left_to_poll = 0;
732 ehca_add_to_err_list(my_qp, 1); 732 ehca_add_to_err_list(my_qp, 1);
733 733
734 my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) % 734 my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
735 my_qp->rq_map.entries; 735 my_qp->rq_map.entries);
736 my_qp->rq_map.left_to_poll = 0; 736 my_qp->rq_map.left_to_poll = 0;
737 if (HAS_RQ(my_qp)) 737 if (HAS_RQ(my_qp))
738 ehca_add_to_err_list(my_qp, 0); 738 ehca_add_to_err_list(my_qp, 0);
@@ -860,9 +860,8 @@ static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq,
860 860
861 /* mark as reported and advance next_wqe pointer */ 861 /* mark as reported and advance next_wqe pointer */
862 qmap_entry->reported = 1; 862 qmap_entry->reported = 1;
863 qmap->next_wqe_idx++; 863 qmap->next_wqe_idx = next_index(qmap->next_wqe_idx,
864 if (qmap->next_wqe_idx == qmap->entries) 864 qmap->entries);
865 qmap->next_wqe_idx = 0;
866 qmap_entry = &qmap->map[qmap->next_wqe_idx]; 865 qmap_entry = &qmap->map[qmap->next_wqe_idx];
867 866
868 wc++; nr++; 867 wc++; nr++;
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index ad0aab60b051..69c0ce321b4e 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -661,6 +661,8 @@ bail:
661static void __devexit cleanup_device(struct ipath_devdata *dd) 661static void __devexit cleanup_device(struct ipath_devdata *dd)
662{ 662{
663 int port; 663 int port;
664 struct ipath_portdata **tmp;
665 unsigned long flags;
664 666
665 if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { 667 if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
666 /* can't do anything more with chip; needs re-init */ 668 /* can't do anything more with chip; needs re-init */
@@ -742,20 +744,21 @@ static void __devexit cleanup_device(struct ipath_devdata *dd)
742 744
743 /* 745 /*
744 * free any resources still in use (usually just kernel ports) 746 * free any resources still in use (usually just kernel ports)
745 * at unload; we do for portcnt, not cfgports, because cfgports 747 * at unload; we do for portcnt, because that's what we allocate.
746 * could have changed while we were loaded. 748 * We acquire lock to be really paranoid that ipath_pd isn't being
749 * accessed from some interrupt-related code (that should not happen,
750 * but best to be sure).
747 */ 751 */
752 spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
753 tmp = dd->ipath_pd;
754 dd->ipath_pd = NULL;
755 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
748 for (port = 0; port < dd->ipath_portcnt; port++) { 756 for (port = 0; port < dd->ipath_portcnt; port++) {
749 struct ipath_portdata *pd = dd->ipath_pd[port]; 757 struct ipath_portdata *pd = tmp[port];
750 dd->ipath_pd[port] = NULL; 758 tmp[port] = NULL; /* debugging paranoia */
751 ipath_free_pddata(dd, pd); 759 ipath_free_pddata(dd, pd);
752 } 760 }
753 kfree(dd->ipath_pd); 761 kfree(tmp);
754 /*
755 * debuggability, in case some cleanup path tries to use it
756 * after this
757 */
758 dd->ipath_pd = NULL;
759} 762}
760 763
761static void __devexit ipath_remove_one(struct pci_dev *pdev) 764static void __devexit ipath_remove_one(struct pci_dev *pdev)
@@ -2586,6 +2589,7 @@ int ipath_reset_device(int unit)
2586{ 2589{
2587 int ret, i; 2590 int ret, i;
2588 struct ipath_devdata *dd = ipath_lookup(unit); 2591 struct ipath_devdata *dd = ipath_lookup(unit);
2592 unsigned long flags;
2589 2593
2590 if (!dd) { 2594 if (!dd) {
2591 ret = -ENODEV; 2595 ret = -ENODEV;
@@ -2611,18 +2615,21 @@ int ipath_reset_device(int unit)
2611 goto bail; 2615 goto bail;
2612 } 2616 }
2613 2617
2618 spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
2614 if (dd->ipath_pd) 2619 if (dd->ipath_pd)
2615 for (i = 1; i < dd->ipath_cfgports; i++) { 2620 for (i = 1; i < dd->ipath_cfgports; i++) {
2616 if (dd->ipath_pd[i] && dd->ipath_pd[i]->port_cnt) { 2621 if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
2617 ipath_dbg("unit %u port %d is in use " 2622 continue;
2618 "(PID %u cmd %s), can't reset\n", 2623 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
2619 unit, i, 2624 ipath_dbg("unit %u port %d is in use "
2620 pid_nr(dd->ipath_pd[i]->port_pid), 2625 "(PID %u cmd %s), can't reset\n",
2621 dd->ipath_pd[i]->port_comm); 2626 unit, i,
2622 ret = -EBUSY; 2627 pid_nr(dd->ipath_pd[i]->port_pid),
2623 goto bail; 2628 dd->ipath_pd[i]->port_comm);
2624 } 2629 ret = -EBUSY;
2630 goto bail;
2625 } 2631 }
2632 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
2626 2633
2627 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) 2634 if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
2628 teardown_sdma(dd); 2635 teardown_sdma(dd);
@@ -2656,9 +2663,12 @@ static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
2656{ 2663{
2657 int i, sub, any = 0; 2664 int i, sub, any = 0;
2658 struct pid *pid; 2665 struct pid *pid;
2666 unsigned long flags;
2659 2667
2660 if (!dd->ipath_pd) 2668 if (!dd->ipath_pd)
2661 return 0; 2669 return 0;
2670
2671 spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
2662 for (i = 1; i < dd->ipath_cfgports; i++) { 2672 for (i = 1; i < dd->ipath_cfgports; i++) {
2663 if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) 2673 if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
2664 continue; 2674 continue;
@@ -2682,6 +2692,7 @@ static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
2682 any++; 2692 any++;
2683 } 2693 }
2684 } 2694 }
2695 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
2685 return any; 2696 return any;
2686} 2697}
2687 2698
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 1af1f3a907c6..239d4e8068ac 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -223,8 +223,13 @@ static int ipath_get_base_info(struct file *fp,
223 (unsigned long long) kinfo->spi_subport_rcvhdr_base); 223 (unsigned long long) kinfo->spi_subport_rcvhdr_base);
224 } 224 }
225 225
226 kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) / 226 /*
227 dd->ipath_palign; 227 * All user buffers are 2KB buffers. If we ever support
228 * giving 4KB buffers to user processes, this will need some
229 * work.
230 */
231 kinfo->spi_pioindex = (kinfo->spi_piobufbase -
232 (dd->ipath_piobufbase & 0xffffffff)) / dd->ipath_palign;
228 kinfo->spi_pioalign = dd->ipath_palign; 233 kinfo->spi_pioalign = dd->ipath_palign;
229 234
230 kinfo->spi_qpair = IPATH_KD_QP; 235 kinfo->spi_qpair = IPATH_KD_QP;
@@ -2041,7 +2046,9 @@ static int ipath_close(struct inode *in, struct file *fp)
2041 struct ipath_filedata *fd; 2046 struct ipath_filedata *fd;
2042 struct ipath_portdata *pd; 2047 struct ipath_portdata *pd;
2043 struct ipath_devdata *dd; 2048 struct ipath_devdata *dd;
2049 unsigned long flags;
2044 unsigned port; 2050 unsigned port;
2051 struct pid *pid;
2045 2052
2046 ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n", 2053 ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n",
2047 (long)in->i_rdev, fp->private_data); 2054 (long)in->i_rdev, fp->private_data);
@@ -2074,14 +2081,13 @@ static int ipath_close(struct inode *in, struct file *fp)
2074 mutex_unlock(&ipath_mutex); 2081 mutex_unlock(&ipath_mutex);
2075 goto bail; 2082 goto bail;
2076 } 2083 }
2084 /* early; no interrupt users after this */
2085 spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
2077 port = pd->port_port; 2086 port = pd->port_port;
2078 2087 dd->ipath_pd[port] = NULL;
2079 if (pd->port_hdrqfull) { 2088 pid = pd->port_pid;
2080 ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors " 2089 pd->port_pid = NULL;
2081 "during run\n", pd->port_comm, pid_nr(pd->port_pid), 2090 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
2082 pd->port_hdrqfull);
2083 pd->port_hdrqfull = 0;
2084 }
2085 2091
2086 if (pd->port_rcvwait_to || pd->port_piowait_to 2092 if (pd->port_rcvwait_to || pd->port_piowait_to
2087 || pd->port_rcvnowait || pd->port_pionowait) { 2093 || pd->port_rcvnowait || pd->port_pionowait) {
@@ -2138,13 +2144,11 @@ static int ipath_close(struct inode *in, struct file *fp)
2138 unlock_expected_tids(pd); 2144 unlock_expected_tids(pd);
2139 ipath_stats.sps_ports--; 2145 ipath_stats.sps_ports--;
2140 ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n", 2146 ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
2141 pd->port_comm, pid_nr(pd->port_pid), 2147 pd->port_comm, pid_nr(pid),
2142 dd->ipath_unit, port); 2148 dd->ipath_unit, port);
2143 } 2149 }
2144 2150
2145 put_pid(pd->port_pid); 2151 put_pid(pid);
2146 pd->port_pid = NULL;
2147 dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */
2148 mutex_unlock(&ipath_mutex); 2152 mutex_unlock(&ipath_mutex);
2149 ipath_free_pddata(dd, pd); /* after releasing the mutex */ 2153 ipath_free_pddata(dd, pd); /* after releasing the mutex */
2150 2154
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 8bb5170b4e41..53912c327bfe 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -86,7 +86,7 @@ static int create_file(const char *name, mode_t mode,
86 *dentry = NULL; 86 *dentry = NULL;
87 mutex_lock(&parent->d_inode->i_mutex); 87 mutex_lock(&parent->d_inode->i_mutex);
88 *dentry = lookup_one_len(name, parent, strlen(name)); 88 *dentry = lookup_one_len(name, parent, strlen(name));
89 if (!IS_ERR(dentry)) 89 if (!IS_ERR(*dentry))
90 error = ipathfs_mknod(parent->d_inode, *dentry, 90 error = ipathfs_mknod(parent->d_inode, *dentry,
91 mode, fops, data); 91 mode, fops, data);
92 else 92 else
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index 421cc2af891f..fbf8c5379ea8 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -721,6 +721,12 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
721 INFINIPATH_HWE_SERDESPLLFAILED); 721 INFINIPATH_HWE_SERDESPLLFAILED);
722 } 722 }
723 723
724 dd->ibdeltainprog = 1;
725 dd->ibsymsnap =
726 ipath_read_creg32(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
727 dd->iblnkerrsnap =
728 ipath_read_creg32(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
729
724 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); 730 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
725 config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1); 731 config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1);
726 732
@@ -810,6 +816,36 @@ static void ipath_pe_quiet_serdes(struct ipath_devdata *dd)
810{ 816{
811 u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); 817 u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
812 818
819 if (dd->ibsymdelta || dd->iblnkerrdelta ||
820 dd->ibdeltainprog) {
821 u64 diagc;
822 /* enable counter writes */
823 diagc = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwdiagctrl);
824 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwdiagctrl,
825 diagc | INFINIPATH_DC_COUNTERWREN);
826
827 if (dd->ibsymdelta || dd->ibdeltainprog) {
828 val = ipath_read_creg32(dd,
829 dd->ipath_cregs->cr_ibsymbolerrcnt);
830 if (dd->ibdeltainprog)
831 val -= val - dd->ibsymsnap;
832 val -= dd->ibsymdelta;
833 ipath_write_creg(dd,
834 dd->ipath_cregs->cr_ibsymbolerrcnt, val);
835 }
836 if (dd->iblnkerrdelta || dd->ibdeltainprog) {
837 val = ipath_read_creg32(dd,
838 dd->ipath_cregs->cr_iblinkerrrecovcnt);
839 if (dd->ibdeltainprog)
840 val -= val - dd->iblnkerrsnap;
841 val -= dd->iblnkerrdelta;
842 ipath_write_creg(dd,
843 dd->ipath_cregs->cr_iblinkerrrecovcnt, val);
844 }
845
846 /* and disable counter writes */
847 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwdiagctrl, diagc);
848 }
813 val |= INFINIPATH_SERDC0_TXIDLE; 849 val |= INFINIPATH_SERDC0_TXIDLE;
814 ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n", 850 ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n",
815 (unsigned long long) val); 851 (unsigned long long) val);
@@ -1749,6 +1785,31 @@ static void ipath_pe_config_jint(struct ipath_devdata *dd, u16 a, u16 b)
1749 1785
1750static int ipath_pe_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) 1786static int ipath_pe_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
1751{ 1787{
1788 if (ibup) {
1789 if (dd->ibdeltainprog) {
1790 dd->ibdeltainprog = 0;
1791 dd->ibsymdelta +=
1792 ipath_read_creg32(dd,
1793 dd->ipath_cregs->cr_ibsymbolerrcnt) -
1794 dd->ibsymsnap;
1795 dd->iblnkerrdelta +=
1796 ipath_read_creg32(dd,
1797 dd->ipath_cregs->cr_iblinkerrrecovcnt) -
1798 dd->iblnkerrsnap;
1799 }
1800 } else {
1801 dd->ipath_lli_counter = 0;
1802 if (!dd->ibdeltainprog) {
1803 dd->ibdeltainprog = 1;
1804 dd->ibsymsnap =
1805 ipath_read_creg32(dd,
1806 dd->ipath_cregs->cr_ibsymbolerrcnt);
1807 dd->iblnkerrsnap =
1808 ipath_read_creg32(dd,
1809 dd->ipath_cregs->cr_iblinkerrrecovcnt);
1810 }
1811 }
1812
1752 ipath_setup_pe_setextled(dd, ipath_ib_linkstate(dd, ibcs), 1813 ipath_setup_pe_setextled(dd, ipath_ib_linkstate(dd, ibcs),
1753 ipath_ib_linktrstate(dd, ibcs)); 1814 ipath_ib_linktrstate(dd, ibcs));
1754 return 0; 1815 return 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c
index 9839e20119bc..b2a9d4c155d1 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba7220.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c
@@ -951,6 +951,12 @@ static int ipath_7220_bringup_serdes(struct ipath_devdata *dd)
951 INFINIPATH_HWE_SERDESPLLFAILED); 951 INFINIPATH_HWE_SERDESPLLFAILED);
952 } 952 }
953 953
954 dd->ibdeltainprog = 1;
955 dd->ibsymsnap =
956 ipath_read_creg32(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
957 dd->iblnkerrsnap =
958 ipath_read_creg32(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
959
954 if (!dd->ipath_ibcddrctrl) { 960 if (!dd->ipath_ibcddrctrl) {
955 /* not on re-init after reset */ 961 /* not on re-init after reset */
956 dd->ipath_ibcddrctrl = 962 dd->ipath_ibcddrctrl =
@@ -1084,6 +1090,37 @@ static void ipath_7220_config_jint(struct ipath_devdata *dd,
1084static void ipath_7220_quiet_serdes(struct ipath_devdata *dd) 1090static void ipath_7220_quiet_serdes(struct ipath_devdata *dd)
1085{ 1091{
1086 u64 val; 1092 u64 val;
1093 if (dd->ibsymdelta || dd->iblnkerrdelta ||
1094 dd->ibdeltainprog) {
1095 u64 diagc;
1096 /* enable counter writes */
1097 diagc = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwdiagctrl);
1098 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwdiagctrl,
1099 diagc | INFINIPATH_DC_COUNTERWREN);
1100
1101 if (dd->ibsymdelta || dd->ibdeltainprog) {
1102 val = ipath_read_creg32(dd,
1103 dd->ipath_cregs->cr_ibsymbolerrcnt);
1104 if (dd->ibdeltainprog)
1105 val -= val - dd->ibsymsnap;
1106 val -= dd->ibsymdelta;
1107 ipath_write_creg(dd,
1108 dd->ipath_cregs->cr_ibsymbolerrcnt, val);
1109 }
1110 if (dd->iblnkerrdelta || dd->ibdeltainprog) {
1111 val = ipath_read_creg32(dd,
1112 dd->ipath_cregs->cr_iblinkerrrecovcnt);
1113 if (dd->ibdeltainprog)
1114 val -= val - dd->iblnkerrsnap;
1115 val -= dd->iblnkerrdelta;
1116 ipath_write_creg(dd,
1117 dd->ipath_cregs->cr_iblinkerrrecovcnt, val);
1118 }
1119
1120 /* and disable counter writes */
1121 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwdiagctrl, diagc);
1122 }
1123
1087 dd->ipath_flags &= ~IPATH_IB_AUTONEG_INPROG; 1124 dd->ipath_flags &= ~IPATH_IB_AUTONEG_INPROG;
1088 wake_up(&dd->ipath_autoneg_wait); 1125 wake_up(&dd->ipath_autoneg_wait);
1089 cancel_delayed_work(&dd->ipath_autoneg_work); 1126 cancel_delayed_work(&dd->ipath_autoneg_work);
@@ -2325,7 +2362,7 @@ static void try_auto_neg(struct ipath_devdata *dd)
2325 2362
2326static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) 2363static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
2327{ 2364{
2328 int ret = 0; 2365 int ret = 0, symadj = 0;
2329 u32 ltstate = ipath_ib_linkstate(dd, ibcs); 2366 u32 ltstate = ipath_ib_linkstate(dd, ibcs);
2330 2367
2331 dd->ipath_link_width_active = 2368 dd->ipath_link_width_active =
@@ -2368,6 +2405,13 @@ static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
2368 ipath_dbg("DDR negotiation try, %u/%u\n", 2405 ipath_dbg("DDR negotiation try, %u/%u\n",
2369 dd->ipath_autoneg_tries, 2406 dd->ipath_autoneg_tries,
2370 IPATH_AUTONEG_TRIES); 2407 IPATH_AUTONEG_TRIES);
2408 if (!dd->ibdeltainprog) {
2409 dd->ibdeltainprog = 1;
2410 dd->ibsymsnap = ipath_read_creg32(dd,
2411 dd->ipath_cregs->cr_ibsymbolerrcnt);
2412 dd->iblnkerrsnap = ipath_read_creg32(dd,
2413 dd->ipath_cregs->cr_iblinkerrrecovcnt);
2414 }
2371 try_auto_neg(dd); 2415 try_auto_neg(dd);
2372 ret = 1; /* no other IB status change processing */ 2416 ret = 1; /* no other IB status change processing */
2373 } else if ((dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) 2417 } else if ((dd->ipath_flags & IPATH_IB_AUTONEG_INPROG)
@@ -2388,6 +2432,7 @@ static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
2388 set_speed_fast(dd, 2432 set_speed_fast(dd,
2389 dd->ipath_link_speed_enabled); 2433 dd->ipath_link_speed_enabled);
2390 wake_up(&dd->ipath_autoneg_wait); 2434 wake_up(&dd->ipath_autoneg_wait);
2435 symadj = 1;
2391 } else if (dd->ipath_flags & IPATH_IB_AUTONEG_FAILED) { 2436 } else if (dd->ipath_flags & IPATH_IB_AUTONEG_FAILED) {
2392 /* 2437 /*
2393 * clear autoneg failure flag, and do setup 2438 * clear autoneg failure flag, and do setup
@@ -2403,22 +2448,28 @@ static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
2403 IBA7220_IBC_IBTA_1_2_MASK; 2448 IBA7220_IBC_IBTA_1_2_MASK;
2404 ipath_write_kreg(dd, 2449 ipath_write_kreg(dd,
2405 IPATH_KREG_OFFSET(IBNCModeCtrl), 0); 2450 IPATH_KREG_OFFSET(IBNCModeCtrl), 0);
2451 symadj = 1;
2406 } 2452 }
2407 } 2453 }
2408 /* 2454 /*
2409 * if we are in 1X, and are in autoneg width, it 2455 * if we are in 1X on rev1 only, and are in autoneg width,
2410 * could be due to an xgxs problem, so if we haven't 2456 * it could be due to an xgxs problem, so if we haven't
2411 * already tried, try twice to get to 4X; if we 2457 * already tried, try twice to get to 4X; if we
2412 * tried, and couldn't, report it, since it will 2458 * tried, and couldn't, report it, since it will
2413 * probably not be what is desired. 2459 * probably not be what is desired.
2414 */ 2460 */
2415 if ((dd->ipath_link_width_enabled & (IB_WIDTH_1X | 2461 if (dd->ipath_minrev == 1 &&
2462 (dd->ipath_link_width_enabled & (IB_WIDTH_1X |
2416 IB_WIDTH_4X)) == (IB_WIDTH_1X | IB_WIDTH_4X) 2463 IB_WIDTH_4X)) == (IB_WIDTH_1X | IB_WIDTH_4X)
2417 && dd->ipath_link_width_active == IB_WIDTH_1X 2464 && dd->ipath_link_width_active == IB_WIDTH_1X
2418 && dd->ipath_x1_fix_tries < 3) { 2465 && dd->ipath_x1_fix_tries < 3) {
2419 if (++dd->ipath_x1_fix_tries == 3) 2466 if (++dd->ipath_x1_fix_tries == 3) {
2420 dev_info(&dd->pcidev->dev, 2467 dev_info(&dd->pcidev->dev,
2421 "IB link is in 1X mode\n"); 2468 "IB link is in 1X mode\n");
2469 if (!(dd->ipath_flags &
2470 IPATH_IB_AUTONEG_INPROG))
2471 symadj = 1;
2472 }
2422 else { 2473 else {
2423 ipath_cdbg(VERBOSE, "IB 1X in " 2474 ipath_cdbg(VERBOSE, "IB 1X in "
2424 "auto-width, try %u to be " 2475 "auto-width, try %u to be "
@@ -2429,7 +2480,8 @@ static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
2429 dd->ipath_f_xgxs_reset(dd); 2480 dd->ipath_f_xgxs_reset(dd);
2430 ret = 1; /* skip other processing */ 2481 ret = 1; /* skip other processing */
2431 } 2482 }
2432 } 2483 } else if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG))
2484 symadj = 1;
2433 2485
2434 if (!ret) { 2486 if (!ret) {
2435 dd->delay_mult = rate_to_delay 2487 dd->delay_mult = rate_to_delay
@@ -2440,6 +2492,25 @@ static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
2440 } 2492 }
2441 } 2493 }
2442 2494
2495 if (symadj) {
2496 if (dd->ibdeltainprog) {
2497 dd->ibdeltainprog = 0;
2498 dd->ibsymdelta += ipath_read_creg32(dd,
2499 dd->ipath_cregs->cr_ibsymbolerrcnt) -
2500 dd->ibsymsnap;
2501 dd->iblnkerrdelta += ipath_read_creg32(dd,
2502 dd->ipath_cregs->cr_iblinkerrrecovcnt) -
2503 dd->iblnkerrsnap;
2504 }
2505 } else if (!ibup && !dd->ibdeltainprog
2506 && !(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG)) {
2507 dd->ibdeltainprog = 1;
2508 dd->ibsymsnap = ipath_read_creg32(dd,
2509 dd->ipath_cregs->cr_ibsymbolerrcnt);
2510 dd->iblnkerrsnap = ipath_read_creg32(dd,
2511 dd->ipath_cregs->cr_iblinkerrrecovcnt);
2512 }
2513
2443 if (!ret) 2514 if (!ret)
2444 ipath_setup_7220_setextled(dd, ipath_ib_linkstate(dd, ibcs), 2515 ipath_setup_7220_setextled(dd, ipath_ib_linkstate(dd, ibcs),
2445 ltstate); 2516 ltstate);
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 3e5baa43fc82..64aeefbd2a5d 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -229,6 +229,7 @@ static int init_chip_first(struct ipath_devdata *dd)
229 spin_lock_init(&dd->ipath_kernel_tid_lock); 229 spin_lock_init(&dd->ipath_kernel_tid_lock);
230 spin_lock_init(&dd->ipath_user_tid_lock); 230 spin_lock_init(&dd->ipath_user_tid_lock);
231 spin_lock_init(&dd->ipath_sendctrl_lock); 231 spin_lock_init(&dd->ipath_sendctrl_lock);
232 spin_lock_init(&dd->ipath_uctxt_lock);
232 spin_lock_init(&dd->ipath_sdma_lock); 233 spin_lock_init(&dd->ipath_sdma_lock);
233 spin_lock_init(&dd->ipath_gpio_lock); 234 spin_lock_init(&dd->ipath_gpio_lock);
234 spin_lock_init(&dd->ipath_eep_st_lock); 235 spin_lock_init(&dd->ipath_eep_st_lock);
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 0bd8bcb184a1..6ba4861dd6ac 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -355,6 +355,19 @@ struct ipath_devdata {
355 /* errors masked because they occur too fast */ 355 /* errors masked because they occur too fast */
356 ipath_err_t ipath_maskederrs; 356 ipath_err_t ipath_maskederrs;
357 u64 ipath_lastlinkrecov; /* link recoveries at last ACTIVE */ 357 u64 ipath_lastlinkrecov; /* link recoveries at last ACTIVE */
358 /* these 5 fields are used to establish deltas for IB Symbol
359 * errors and linkrecovery errors. They can be reported on
360 * some chips during link negotiation prior to INIT, and with
361 * DDR when faking DDR negotiations with non-IBTA switches.
362 * The chip counters are adjusted at driver unload if there is
363 * a non-zero delta.
364 */
365 u64 ibdeltainprog;
366 u64 ibsymdelta;
367 u64 ibsymsnap;
368 u64 iblnkerrdelta;
369 u64 iblnkerrsnap;
370
358 /* time in jiffies at which to re-enable maskederrs */ 371 /* time in jiffies at which to re-enable maskederrs */
359 unsigned long ipath_unmasktime; 372 unsigned long ipath_unmasktime;
360 /* count of egrfull errors, combined for all ports */ 373 /* count of egrfull errors, combined for all ports */
@@ -464,6 +477,8 @@ struct ipath_devdata {
464 spinlock_t ipath_kernel_tid_lock; 477 spinlock_t ipath_kernel_tid_lock;
465 spinlock_t ipath_user_tid_lock; 478 spinlock_t ipath_user_tid_lock;
466 spinlock_t ipath_sendctrl_lock; 479 spinlock_t ipath_sendctrl_lock;
480 /* around ipath_pd and (user ports) port_cnt use (intr vs free) */
481 spinlock_t ipath_uctxt_lock;
467 482
468 /* 483 /*
469 * IPATH_STATUS_*, 484 * IPATH_STATUS_*,
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index 8f32b17a5eed..c0e933fec218 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -132,6 +132,7 @@ int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
132 * (see ipath_get_dma_mr and ipath_dma.c). 132 * (see ipath_get_dma_mr and ipath_dma.c).
133 */ 133 */
134 if (sge->lkey == 0) { 134 if (sge->lkey == 0) {
135 /* always a kernel port, no locking needed */
135 struct ipath_pd *pd = to_ipd(qp->ibqp.pd); 136 struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
136 137
137 if (pd->user) { 138 if (pd->user) {
@@ -211,6 +212,7 @@ int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
211 * (see ipath_get_dma_mr and ipath_dma.c). 212 * (see ipath_get_dma_mr and ipath_dma.c).
212 */ 213 */
213 if (rkey == 0) { 214 if (rkey == 0) {
215 /* always a kernel port, no locking needed */
214 struct ipath_pd *pd = to_ipd(qp->ibqp.pd); 216 struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
215 217
216 if (pd->user) { 218 if (pd->user) {
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index be4fc9ada8e7..17a123197477 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -348,6 +348,7 @@ bail:
348 */ 348 */
349static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys) 349static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
350{ 350{
351 /* always a kernel port, no locking needed */
351 struct ipath_portdata *pd = dd->ipath_pd[0]; 352 struct ipath_portdata *pd = dd->ipath_pd[0];
352 353
353 memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys)); 354 memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
@@ -730,6 +731,7 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
730 int i; 731 int i;
731 int changed = 0; 732 int changed = 0;
732 733
734 /* always a kernel port, no locking needed */
733 pd = dd->ipath_pd[0]; 735 pd = dd->ipath_pd[0];
734 736
735 for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { 737 for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 4715911101e4..3a5a89b609c4 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -745,6 +745,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
745 struct ipath_swqe *swq = NULL; 745 struct ipath_swqe *swq = NULL;
746 struct ipath_ibdev *dev; 746 struct ipath_ibdev *dev;
747 size_t sz; 747 size_t sz;
748 size_t sg_list_sz;
748 struct ib_qp *ret; 749 struct ib_qp *ret;
749 750
750 if (init_attr->create_flags) { 751 if (init_attr->create_flags) {
@@ -789,19 +790,31 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
789 goto bail; 790 goto bail;
790 } 791 }
791 sz = sizeof(*qp); 792 sz = sizeof(*qp);
793 sg_list_sz = 0;
792 if (init_attr->srq) { 794 if (init_attr->srq) {
793 struct ipath_srq *srq = to_isrq(init_attr->srq); 795 struct ipath_srq *srq = to_isrq(init_attr->srq);
794 796
795 sz += sizeof(*qp->r_sg_list) * 797 if (srq->rq.max_sge > 1)
796 srq->rq.max_sge; 798 sg_list_sz = sizeof(*qp->r_sg_list) *
797 } else 799 (srq->rq.max_sge - 1);
798 sz += sizeof(*qp->r_sg_list) * 800 } else if (init_attr->cap.max_recv_sge > 1)
799 init_attr->cap.max_recv_sge; 801 sg_list_sz = sizeof(*qp->r_sg_list) *
800 qp = kmalloc(sz, GFP_KERNEL); 802 (init_attr->cap.max_recv_sge - 1);
803 qp = kmalloc(sz + sg_list_sz, GFP_KERNEL);
801 if (!qp) { 804 if (!qp) {
802 ret = ERR_PTR(-ENOMEM); 805 ret = ERR_PTR(-ENOMEM);
803 goto bail_swq; 806 goto bail_swq;
804 } 807 }
808 if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD ||
809 init_attr->qp_type == IB_QPT_SMI ||
810 init_attr->qp_type == IB_QPT_GSI)) {
811 qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL);
812 if (!qp->r_ud_sg_list) {
813 ret = ERR_PTR(-ENOMEM);
814 goto bail_qp;
815 }
816 } else
817 qp->r_ud_sg_list = NULL;
805 if (init_attr->srq) { 818 if (init_attr->srq) {
806 sz = 0; 819 sz = 0;
807 qp->r_rq.size = 0; 820 qp->r_rq.size = 0;
@@ -818,7 +831,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
818 qp->r_rq.size * sz); 831 qp->r_rq.size * sz);
819 if (!qp->r_rq.wq) { 832 if (!qp->r_rq.wq) {
820 ret = ERR_PTR(-ENOMEM); 833 ret = ERR_PTR(-ENOMEM);
821 goto bail_qp; 834 goto bail_sg_list;
822 } 835 }
823 } 836 }
824 837
@@ -848,7 +861,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
848 if (err) { 861 if (err) {
849 ret = ERR_PTR(err); 862 ret = ERR_PTR(err);
850 vfree(qp->r_rq.wq); 863 vfree(qp->r_rq.wq);
851 goto bail_qp; 864 goto bail_sg_list;
852 } 865 }
853 qp->ip = NULL; 866 qp->ip = NULL;
854 qp->s_tx = NULL; 867 qp->s_tx = NULL;
@@ -925,6 +938,8 @@ bail_ip:
925 vfree(qp->r_rq.wq); 938 vfree(qp->r_rq.wq);
926 ipath_free_qp(&dev->qp_table, qp); 939 ipath_free_qp(&dev->qp_table, qp);
927 free_qpn(&dev->qp_table, qp->ibqp.qp_num); 940 free_qpn(&dev->qp_table, qp->ibqp.qp_num);
941bail_sg_list:
942 kfree(qp->r_ud_sg_list);
928bail_qp: 943bail_qp:
929 kfree(qp); 944 kfree(qp);
930bail_swq: 945bail_swq:
@@ -989,6 +1004,7 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
989 kref_put(&qp->ip->ref, ipath_release_mmap_info); 1004 kref_put(&qp->ip->ref, ipath_release_mmap_info);
990 else 1005 else
991 vfree(qp->r_rq.wq); 1006 vfree(qp->r_rq.wq);
1007 kfree(qp->r_ud_sg_list);
992 vfree(qp->s_wq); 1008 vfree(qp->s_wq);
993 kfree(qp); 1009 kfree(qp);
994 return 0; 1010 return 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 7b93cda1a4bd..9170710b950d 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -573,9 +573,8 @@ int ipath_make_rc_req(struct ipath_qp *qp)
573 ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); 573 ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
574 qp->s_state = OP(RDMA_READ_REQUEST); 574 qp->s_state = OP(RDMA_READ_REQUEST);
575 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); 575 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
576 bth2 = qp->s_psn++ & IPATH_PSN_MASK; 576 bth2 = qp->s_psn & IPATH_PSN_MASK;
577 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) 577 qp->s_psn = wqe->lpsn + 1;
578 qp->s_next_psn = qp->s_psn;
579 ss = NULL; 578 ss = NULL;
580 len = 0; 579 len = 0;
581 qp->s_cur++; 580 qp->s_cur++;
diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c
index 284c9bca517e..8e255adf5d9b 100644
--- a/drivers/infiniband/hw/ipath/ipath_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_sdma.c
@@ -698,10 +698,8 @@ retry:
698 698
699 addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr, 699 addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr,
700 tx->map_len, DMA_TO_DEVICE); 700 tx->map_len, DMA_TO_DEVICE);
701 if (dma_mapping_error(&dd->pcidev->dev, addr)) { 701 if (dma_mapping_error(&dd->pcidev->dev, addr))
702 ret = -EIO; 702 goto ioerr;
703 goto unlock;
704 }
705 703
706 dwoffset = tx->map_len >> 2; 704 dwoffset = tx->map_len >> 2;
707 make_sdma_desc(dd, sdmadesc, (u64) addr, dwoffset, 0); 705 make_sdma_desc(dd, sdmadesc, (u64) addr, dwoffset, 0);
@@ -741,6 +739,8 @@ retry:
741 dw = (len + 3) >> 2; 739 dw = (len + 3) >> 2;
742 addr = dma_map_single(&dd->pcidev->dev, sge->vaddr, dw << 2, 740 addr = dma_map_single(&dd->pcidev->dev, sge->vaddr, dw << 2,
743 DMA_TO_DEVICE); 741 DMA_TO_DEVICE);
742 if (dma_mapping_error(&dd->pcidev->dev, addr))
743 goto unmap;
744 make_sdma_desc(dd, sdmadesc, (u64) addr, dw, dwoffset); 744 make_sdma_desc(dd, sdmadesc, (u64) addr, dw, dwoffset);
745 /* SDmaUseLargeBuf has to be set in every descriptor */ 745 /* SDmaUseLargeBuf has to be set in every descriptor */
746 if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF) 746 if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
@@ -798,7 +798,18 @@ retry:
798 list_add_tail(&tx->txreq.list, &dd->ipath_sdma_activelist); 798 list_add_tail(&tx->txreq.list, &dd->ipath_sdma_activelist);
799 if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_VL15) 799 if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_VL15)
800 vl15_watchdog_enq(dd); 800 vl15_watchdog_enq(dd);
801 801 goto unlock;
802
803unmap:
804 while (tail != dd->ipath_sdma_descq_tail) {
805 if (!tail)
806 tail = dd->ipath_sdma_descq_cnt - 1;
807 else
808 tail--;
809 unmap_desc(dd, tail);
810 }
811ioerr:
812 ret = -EIO;
802unlock: 813unlock:
803 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); 814 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
804fail: 815fail:
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index c8e3d65f0de8..f63e143e3292 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -112,6 +112,14 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
112 dd->ipath_lastrpkts = val; 112 dd->ipath_lastrpkts = val;
113 } 113 }
114 val64 = dd->ipath_rpkts; 114 val64 = dd->ipath_rpkts;
115 } else if (creg == dd->ipath_cregs->cr_ibsymbolerrcnt) {
116 if (dd->ibdeltainprog)
117 val64 -= val64 - dd->ibsymsnap;
118 val64 -= dd->ibsymdelta;
119 } else if (creg == dd->ipath_cregs->cr_iblinkerrrecovcnt) {
120 if (dd->ibdeltainprog)
121 val64 -= val64 - dd->iblnkerrsnap;
122 val64 -= dd->iblnkerrdelta;
115 } else 123 } else
116 val64 = (u64) val; 124 val64 = (u64) val;
117 125
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 729446f56aab..91c74cc797ae 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -70,8 +70,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
70 goto done; 70 goto done;
71 } 71 }
72 72
73 rsge.sg_list = NULL;
74
75 /* 73 /*
76 * Check that the qkey matches (except for QP0, see 9.6.1.4.1). 74 * Check that the qkey matches (except for QP0, see 9.6.1.4.1).
77 * Qkeys with the high order bit set mean use the 75 * Qkeys with the high order bit set mean use the
@@ -115,21 +113,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
115 rq = &qp->r_rq; 113 rq = &qp->r_rq;
116 } 114 }
117 115
118 if (rq->max_sge > 1) {
119 /*
120 * XXX We could use GFP_KERNEL if ipath_do_send()
121 * was always called from the tasklet instead of
122 * from ipath_post_send().
123 */
124 rsge.sg_list = kmalloc((rq->max_sge - 1) *
125 sizeof(struct ipath_sge),
126 GFP_ATOMIC);
127 if (!rsge.sg_list) {
128 dev->n_pkt_drops++;
129 goto drop;
130 }
131 }
132
133 /* 116 /*
134 * Get the next work request entry to find where to put the data. 117 * Get the next work request entry to find where to put the data.
135 * Note that it is safe to drop the lock after changing rq->tail 118 * Note that it is safe to drop the lock after changing rq->tail
@@ -147,6 +130,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
147 goto drop; 130 goto drop;
148 } 131 }
149 wqe = get_rwqe_ptr(rq, tail); 132 wqe = get_rwqe_ptr(rq, tail);
133 rsge.sg_list = qp->r_ud_sg_list;
150 if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) { 134 if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) {
151 spin_unlock_irqrestore(&rq->lock, flags); 135 spin_unlock_irqrestore(&rq->lock, flags);
152 dev->n_pkt_drops++; 136 dev->n_pkt_drops++;
@@ -242,7 +226,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
242 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 226 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
243 swqe->wr.send_flags & IB_SEND_SOLICITED); 227 swqe->wr.send_flags & IB_SEND_SOLICITED);
244drop: 228drop:
245 kfree(rsge.sg_list);
246 if (atomic_dec_and_test(&qp->refcount)) 229 if (atomic_dec_and_test(&qp->refcount))
247 wake_up(&qp->wait); 230 wake_up(&qp->wait);
248done:; 231done:;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index eabc4247860b..cdf0e6abd34d 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -1852,7 +1852,7 @@ unsigned ipath_get_npkeys(struct ipath_devdata *dd)
1852} 1852}
1853 1853
1854/** 1854/**
1855 * ipath_get_pkey - return the indexed PKEY from the port 0 PKEY table 1855 * ipath_get_pkey - return the indexed PKEY from the port PKEY table
1856 * @dd: the infinipath device 1856 * @dd: the infinipath device
1857 * @index: the PKEY index 1857 * @index: the PKEY index
1858 */ 1858 */
@@ -1860,6 +1860,7 @@ unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
1860{ 1860{
1861 unsigned ret; 1861 unsigned ret;
1862 1862
1863 /* always a kernel port, no locking needed */
1863 if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys)) 1864 if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
1864 ret = 0; 1865 ret = 0;
1865 else 1866 else
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 9d12ae8a778e..11e3f613df93 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -431,6 +431,7 @@ struct ipath_qp {
431 u32 s_lsn; /* limit sequence number (credit) */ 431 u32 s_lsn; /* limit sequence number (credit) */
432 struct ipath_swqe *s_wq; /* send work queue */ 432 struct ipath_swqe *s_wq; /* send work queue */
433 struct ipath_swqe *s_wqe; 433 struct ipath_swqe *s_wqe;
434 struct ipath_sge *r_ud_sg_list;
434 struct ipath_rq r_rq; /* receive work queue */ 435 struct ipath_rq r_rq; /* receive work queue */
435 struct ipath_sge r_sg_list[0]; /* verified SGEs */ 436 struct ipath_sge r_sg_list[0]; /* verified SGEs */
436}; 437};
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 18308494a195..8415ecce5c4c 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -222,7 +222,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
222 } 222 }
223 223
224 err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, 224 err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
225 cq->db.dma, &cq->mcq, 0); 225 cq->db.dma, &cq->mcq, vector, 0);
226 if (err) 226 if (err)
227 goto err_dbmap; 227 goto err_dbmap;
228 228
@@ -325,15 +325,17 @@ static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq)
325 325
326static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq) 326static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
327{ 327{
328 struct mlx4_cqe *cqe; 328 struct mlx4_cqe *cqe, *new_cqe;
329 int i; 329 int i;
330 330
331 i = cq->mcq.cons_index; 331 i = cq->mcq.cons_index;
332 cqe = get_cqe(cq, i & cq->ibcq.cqe); 332 cqe = get_cqe(cq, i & cq->ibcq.cqe);
333 while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) { 333 while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
334 memcpy(get_cqe_from_buf(&cq->resize_buf->buf, 334 new_cqe = get_cqe_from_buf(&cq->resize_buf->buf,
335 (i + 1) & cq->resize_buf->cqe), 335 (i + 1) & cq->resize_buf->cqe);
336 get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe)); 336 memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe));
337 new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) |
338 (((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0);
337 cqe = get_cqe(cq, ++i & cq->ibcq.cqe); 339 cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
338 } 340 }
339 ++cq->mcq.cons_index; 341 ++cq->mcq.cons_index;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 2e80f8f47b02..dcefe1fceb5c 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -578,7 +578,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
578 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) 578 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
579 ibdev->num_ports++; 579 ibdev->num_ports++;
580 ibdev->ib_dev.phys_port_cnt = ibdev->num_ports; 580 ibdev->ib_dev.phys_port_cnt = ibdev->num_ports;
581 ibdev->ib_dev.num_comp_vectors = 1; 581 ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
582 ibdev->ib_dev.dma_device = &dev->pdev->dev; 582 ibdev->ib_dev.dma_device = &dev->pdev->dev;
583 583
584 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; 584 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 1595dc7bba9d..13a5bb1a7bcf 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -137,14 +137,18 @@
137 137
138#ifdef CONFIG_INFINIBAND_NES_DEBUG 138#ifdef CONFIG_INFINIBAND_NES_DEBUG
139#define nes_debug(level, fmt, args...) \ 139#define nes_debug(level, fmt, args...) \
140do { \
140 if (level & nes_debug_level) \ 141 if (level & nes_debug_level) \
141 printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args) 142 printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args); \
142 143} while (0)
143#define assert(expr) \ 144
144if (!(expr)) { \ 145#define assert(expr) \
145 printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \ 146do { \
146 #expr, __FILE__, __func__, __LINE__); \ 147 if (!(expr)) { \
147} 148 printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \
149 #expr, __FILE__, __func__, __LINE__); \
150 } \
151} while (0)
148 152
149#define NES_EVENT_TIMEOUT 1200000 153#define NES_EVENT_TIMEOUT 1200000
150#else 154#else
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 2caf9da81ad5..cb48041bed69 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -86,15 +86,14 @@ static int mini_cm_accept(struct nes_cm_core *, struct ietf_mpa_frame *,
86 struct nes_cm_node *); 86 struct nes_cm_node *);
87static int mini_cm_reject(struct nes_cm_core *, struct ietf_mpa_frame *, 87static int mini_cm_reject(struct nes_cm_core *, struct ietf_mpa_frame *,
88 struct nes_cm_node *); 88 struct nes_cm_node *);
89static void mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, 89static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *,
90 struct sk_buff *); 90 struct sk_buff *);
91static int mini_cm_dealloc_core(struct nes_cm_core *); 91static int mini_cm_dealloc_core(struct nes_cm_core *);
92static int mini_cm_get(struct nes_cm_core *); 92static int mini_cm_get(struct nes_cm_core *);
93static int mini_cm_set(struct nes_cm_core *, u32, u32); 93static int mini_cm_set(struct nes_cm_core *, u32, u32);
94 94
95static struct sk_buff *form_cm_frame(struct sk_buff *, struct nes_cm_node *, 95static void form_cm_frame(struct sk_buff *, struct nes_cm_node *,
96 void *, u32, void *, u32, u8); 96 void *, u32, void *, u32, u8);
97static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node);
98static int add_ref_cm_node(struct nes_cm_node *); 97static int add_ref_cm_node(struct nes_cm_node *);
99static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *); 98static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *);
100 99
@@ -251,7 +250,7 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 len)
251 * form_cm_frame - get a free packet and build empty frame Use 250 * form_cm_frame - get a free packet and build empty frame Use
252 * node info to build. 251 * node info to build.
253 */ 252 */
254static struct sk_buff *form_cm_frame(struct sk_buff *skb, 253static void form_cm_frame(struct sk_buff *skb,
255 struct nes_cm_node *cm_node, void *options, u32 optionsize, 254 struct nes_cm_node *cm_node, void *options, u32 optionsize,
256 void *data, u32 datasize, u8 flags) 255 void *data, u32 datasize, u8 flags)
257{ 256{
@@ -339,7 +338,6 @@ static struct sk_buff *form_cm_frame(struct sk_buff *skb,
339 skb_shinfo(skb)->nr_frags = 0; 338 skb_shinfo(skb)->nr_frags = 0;
340 cm_packets_created++; 339 cm_packets_created++;
341 340
342 return skb;
343} 341}
344 342
345 343
@@ -356,7 +354,6 @@ static void print_core(struct nes_cm_core *core)
356 354
357 nes_debug(NES_DBG_CM, "State : %u \n", core->state); 355 nes_debug(NES_DBG_CM, "State : %u \n", core->state);
358 356
359 nes_debug(NES_DBG_CM, "Tx Free cnt : %u \n", skb_queue_len(&core->tx_free_list));
360 nes_debug(NES_DBG_CM, "Listen Nodes : %u \n", atomic_read(&core->listen_node_cnt)); 357 nes_debug(NES_DBG_CM, "Listen Nodes : %u \n", atomic_read(&core->listen_node_cnt));
361 nes_debug(NES_DBG_CM, "Active Nodes : %u \n", atomic_read(&core->node_cnt)); 358 nes_debug(NES_DBG_CM, "Active Nodes : %u \n", atomic_read(&core->node_cnt));
362 359
@@ -381,8 +378,6 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
381 int ret = 0; 378 int ret = 0;
382 u32 was_timer_set; 379 u32 was_timer_set;
383 380
384 if (!cm_node)
385 return -EINVAL;
386 new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); 381 new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
387 if (!new_send) 382 if (!new_send)
388 return -1; 383 return -1;
@@ -459,13 +454,23 @@ static void nes_cm_timer_tick(unsigned long pass)
459 int ret = NETDEV_TX_OK; 454 int ret = NETDEV_TX_OK;
460 enum nes_cm_node_state last_state; 455 enum nes_cm_node_state last_state;
461 456
457 struct list_head timer_list;
458 INIT_LIST_HEAD(&timer_list);
462 spin_lock_irqsave(&cm_core->ht_lock, flags); 459 spin_lock_irqsave(&cm_core->ht_lock, flags);
463 460
464 list_for_each_safe(list_node, list_core_temp, 461 list_for_each_safe(list_node, list_core_temp,
465 &cm_core->connected_nodes) { 462 &cm_core->connected_nodes) {
466 cm_node = container_of(list_node, struct nes_cm_node, list); 463 cm_node = container_of(list_node, struct nes_cm_node, list);
467 add_ref_cm_node(cm_node); 464 if (!list_empty(&cm_node->recv_list) || (cm_node->send_entry)) {
468 spin_unlock_irqrestore(&cm_core->ht_lock, flags); 465 add_ref_cm_node(cm_node);
466 list_add(&cm_node->timer_entry, &timer_list);
467 }
468 }
469 spin_unlock_irqrestore(&cm_core->ht_lock, flags);
470
471 list_for_each_safe(list_node, list_core_temp, &timer_list) {
472 cm_node = container_of(list_node, struct nes_cm_node,
473 timer_entry);
469 spin_lock_irqsave(&cm_node->recv_list_lock, flags); 474 spin_lock_irqsave(&cm_node->recv_list_lock, flags);
470 list_for_each_safe(list_core, list_node_temp, 475 list_for_each_safe(list_core, list_node_temp,
471 &cm_node->recv_list) { 476 &cm_node->recv_list) {
@@ -519,7 +524,7 @@ static void nes_cm_timer_tick(unsigned long pass)
519 do { 524 do {
520 send_entry = cm_node->send_entry; 525 send_entry = cm_node->send_entry;
521 if (!send_entry) 526 if (!send_entry)
522 continue; 527 break;
523 if (time_after(send_entry->timetosend, jiffies)) { 528 if (time_after(send_entry->timetosend, jiffies)) {
524 if (cm_node->state != NES_CM_STATE_TSA) { 529 if (cm_node->state != NES_CM_STATE_TSA) {
525 if ((nexttimeout > 530 if ((nexttimeout >
@@ -528,18 +533,18 @@ static void nes_cm_timer_tick(unsigned long pass)
528 nexttimeout = 533 nexttimeout =
529 send_entry->timetosend; 534 send_entry->timetosend;
530 settimer = 1; 535 settimer = 1;
531 continue; 536 break;
532 } 537 }
533 } else { 538 } else {
534 free_retrans_entry(cm_node); 539 free_retrans_entry(cm_node);
535 continue; 540 break;
536 } 541 }
537 } 542 }
538 543
539 if ((cm_node->state == NES_CM_STATE_TSA) || 544 if ((cm_node->state == NES_CM_STATE_TSA) ||
540 (cm_node->state == NES_CM_STATE_CLOSED)) { 545 (cm_node->state == NES_CM_STATE_CLOSED)) {
541 free_retrans_entry(cm_node); 546 free_retrans_entry(cm_node);
542 continue; 547 break;
543 } 548 }
544 549
545 if (!send_entry->retranscount || 550 if (!send_entry->retranscount ||
@@ -557,7 +562,7 @@ static void nes_cm_timer_tick(unsigned long pass)
557 NES_CM_EVENT_ABORTED); 562 NES_CM_EVENT_ABORTED);
558 spin_lock_irqsave(&cm_node->retrans_list_lock, 563 spin_lock_irqsave(&cm_node->retrans_list_lock,
559 flags); 564 flags);
560 continue; 565 break;
561 } 566 }
562 atomic_inc(&send_entry->skb->users); 567 atomic_inc(&send_entry->skb->users);
563 cm_packets_retrans++; 568 cm_packets_retrans++;
@@ -583,7 +588,7 @@ static void nes_cm_timer_tick(unsigned long pass)
583 send_entry->retrycount--; 588 send_entry->retrycount--;
584 nexttimeout = jiffies + NES_SHORT_TIME; 589 nexttimeout = jiffies + NES_SHORT_TIME;
585 settimer = 1; 590 settimer = 1;
586 continue; 591 break;
587 } else { 592 } else {
588 cm_packets_sent++; 593 cm_packets_sent++;
589 } 594 }
@@ -615,14 +620,12 @@ static void nes_cm_timer_tick(unsigned long pass)
615 620
616 spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); 621 spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
617 rem_ref_cm_node(cm_node->cm_core, cm_node); 622 rem_ref_cm_node(cm_node->cm_core, cm_node);
618 spin_lock_irqsave(&cm_core->ht_lock, flags);
619 if (ret != NETDEV_TX_OK) { 623 if (ret != NETDEV_TX_OK) {
620 nes_debug(NES_DBG_CM, "rexmit failed for cm_node=%p\n", 624 nes_debug(NES_DBG_CM, "rexmit failed for cm_node=%p\n",
621 cm_node); 625 cm_node);
622 break; 626 break;
623 } 627 }
624 } 628 }
625 spin_unlock_irqrestore(&cm_core->ht_lock, flags);
626 629
627 if (settimer) { 630 if (settimer) {
628 if (!timer_pending(&cm_core->tcp_timer)) { 631 if (!timer_pending(&cm_core->tcp_timer)) {
@@ -683,7 +686,7 @@ static int send_syn(struct nes_cm_node *cm_node, u32 sendack,
683 optionssize += 1; 686 optionssize += 1;
684 687
685 if (!skb) 688 if (!skb)
686 skb = get_free_pkt(cm_node); 689 skb = dev_alloc_skb(MAX_CM_BUFFER);
687 if (!skb) { 690 if (!skb) {
688 nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); 691 nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
689 return -1; 692 return -1;
@@ -708,7 +711,7 @@ static int send_reset(struct nes_cm_node *cm_node, struct sk_buff *skb)
708 int flags = SET_RST | SET_ACK; 711 int flags = SET_RST | SET_ACK;
709 712
710 if (!skb) 713 if (!skb)
711 skb = get_free_pkt(cm_node); 714 skb = dev_alloc_skb(MAX_CM_BUFFER);
712 if (!skb) { 715 if (!skb) {
713 nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); 716 nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
714 return -1; 717 return -1;
@@ -729,7 +732,7 @@ static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb)
729 int ret; 732 int ret;
730 733
731 if (!skb) 734 if (!skb)
732 skb = get_free_pkt(cm_node); 735 skb = dev_alloc_skb(MAX_CM_BUFFER);
733 736
734 if (!skb) { 737 if (!skb) {
735 nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); 738 nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
@@ -752,7 +755,7 @@ static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb)
752 755
753 /* if we didn't get a frame get one */ 756 /* if we didn't get a frame get one */
754 if (!skb) 757 if (!skb)
755 skb = get_free_pkt(cm_node); 758 skb = dev_alloc_skb(MAX_CM_BUFFER);
756 759
757 if (!skb) { 760 if (!skb) {
758 nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); 761 nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
@@ -767,59 +770,15 @@ static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb)
767 770
768 771
769/** 772/**
770 * get_free_pkt
771 */
772static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node)
773{
774 struct sk_buff *skb, *new_skb;
775
776 /* check to see if we need to repopulate the free tx pkt queue */
777 if (skb_queue_len(&cm_node->cm_core->tx_free_list) < NES_CM_FREE_PKT_LO_WATERMARK) {
778 while (skb_queue_len(&cm_node->cm_core->tx_free_list) <
779 cm_node->cm_core->free_tx_pkt_max) {
780 /* replace the frame we took, we won't get it back */
781 new_skb = dev_alloc_skb(cm_node->cm_core->mtu);
782 BUG_ON(!new_skb);
783 /* add a replacement frame to the free tx list head */
784 skb_queue_head(&cm_node->cm_core->tx_free_list, new_skb);
785 }
786 }
787
788 skb = skb_dequeue(&cm_node->cm_core->tx_free_list);
789
790 return skb;
791}
792
793
794/**
795 * make_hashkey - generate hash key from node tuple
796 */
797static inline int make_hashkey(u16 loc_port, nes_addr_t loc_addr, u16 rem_port,
798 nes_addr_t rem_addr)
799{
800 u32 hashkey = 0;
801
802 hashkey = loc_addr + rem_addr + loc_port + rem_port;
803 hashkey = (hashkey % NES_CM_HASHTABLE_SIZE);
804
805 return hashkey;
806}
807
808
809/**
810 * find_node - find a cm node that matches the reference cm node 773 * find_node - find a cm node that matches the reference cm node
811 */ 774 */
812static struct nes_cm_node *find_node(struct nes_cm_core *cm_core, 775static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
813 u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr) 776 u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr)
814{ 777{
815 unsigned long flags; 778 unsigned long flags;
816 u32 hashkey;
817 struct list_head *hte; 779 struct list_head *hte;
818 struct nes_cm_node *cm_node; 780 struct nes_cm_node *cm_node;
819 781
820 /* make a hash index key for this packet */
821 hashkey = make_hashkey(loc_port, loc_addr, rem_port, rem_addr);
822
823 /* get a handle on the hte */ 782 /* get a handle on the hte */
824 hte = &cm_core->connected_nodes; 783 hte = &cm_core->connected_nodes;
825 784
@@ -887,7 +846,6 @@ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
887static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) 846static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
888{ 847{
889 unsigned long flags; 848 unsigned long flags;
890 u32 hashkey;
891 struct list_head *hte; 849 struct list_head *hte;
892 850
893 if (!cm_node || !cm_core) 851 if (!cm_node || !cm_core)
@@ -896,11 +854,6 @@ static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node
896 nes_debug(NES_DBG_CM, "Adding Node %p to Active Connection HT\n", 854 nes_debug(NES_DBG_CM, "Adding Node %p to Active Connection HT\n",
897 cm_node); 855 cm_node);
898 856
899 /* first, make an index into our hash table */
900 hashkey = make_hashkey(cm_node->loc_port, cm_node->loc_addr,
901 cm_node->rem_port, cm_node->rem_addr);
902 cm_node->hashkey = hashkey;
903
904 spin_lock_irqsave(&cm_core->ht_lock, flags); 857 spin_lock_irqsave(&cm_core->ht_lock, flags);
905 858
906 /* get a handle on the hash table element (list head for this slot) */ 859 /* get a handle on the hash table element (list head for this slot) */
@@ -925,28 +878,36 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
925 struct list_head *list_pos = NULL; 878 struct list_head *list_pos = NULL;
926 struct list_head *list_temp = NULL; 879 struct list_head *list_temp = NULL;
927 struct nes_cm_node *cm_node = NULL; 880 struct nes_cm_node *cm_node = NULL;
881 struct list_head reset_list;
928 882
929 nes_debug(NES_DBG_CM, "attempting listener= %p free_nodes= %d, " 883 nes_debug(NES_DBG_CM, "attempting listener= %p free_nodes= %d, "
930 "refcnt=%d\n", listener, free_hanging_nodes, 884 "refcnt=%d\n", listener, free_hanging_nodes,
931 atomic_read(&listener->ref_count)); 885 atomic_read(&listener->ref_count));
932 /* free non-accelerated child nodes for this listener */ 886 /* free non-accelerated child nodes for this listener */
887 INIT_LIST_HEAD(&reset_list);
933 if (free_hanging_nodes) { 888 if (free_hanging_nodes) {
934 spin_lock_irqsave(&cm_core->ht_lock, flags); 889 spin_lock_irqsave(&cm_core->ht_lock, flags);
935 list_for_each_safe(list_pos, list_temp, 890 list_for_each_safe(list_pos, list_temp,
936 &g_cm_core->connected_nodes) { 891 &g_cm_core->connected_nodes) {
937 cm_node = container_of(list_pos, struct nes_cm_node, 892 cm_node = container_of(list_pos, struct nes_cm_node,
938 list); 893 list);
939 if ((cm_node->listener == listener) && 894 if ((cm_node->listener == listener) &&
940 (!cm_node->accelerated)) { 895 (!cm_node->accelerated)) {
941 cleanup_retrans_entry(cm_node); 896 add_ref_cm_node(cm_node);
942 spin_unlock_irqrestore(&cm_core->ht_lock, 897 list_add(&cm_node->reset_entry, &reset_list);
943 flags);
944 send_reset(cm_node, NULL);
945 spin_lock_irqsave(&cm_core->ht_lock, flags);
946 } 898 }
947 } 899 }
948 spin_unlock_irqrestore(&cm_core->ht_lock, flags); 900 spin_unlock_irqrestore(&cm_core->ht_lock, flags);
949 } 901 }
902
903 list_for_each_safe(list_pos, list_temp, &reset_list) {
904 cm_node = container_of(list_pos, struct nes_cm_node,
905 reset_entry);
906 cleanup_retrans_entry(cm_node);
907 send_reset(cm_node, NULL);
908 rem_ref_cm_node(cm_node->cm_core, cm_node);
909 }
910
950 spin_lock_irqsave(&cm_core->listen_list_lock, flags); 911 spin_lock_irqsave(&cm_core->listen_list_lock, flags);
951 if (!atomic_dec_return(&listener->ref_count)) { 912 if (!atomic_dec_return(&listener->ref_count)) {
952 list_del(&listener->list); 913 list_del(&listener->list);
@@ -1126,7 +1087,10 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
1126 1087
1127 cm_node->loopbackpartner = NULL; 1088 cm_node->loopbackpartner = NULL;
1128 /* get the mac addr for the remote node */ 1089 /* get the mac addr for the remote node */
1129 arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE); 1090 if (ipv4_is_loopback(htonl(cm_node->rem_addr)))
1091 arpindex = nes_arp_table(nesdev, ntohl(nesvnic->local_ipaddr), NULL, NES_ARP_RESOLVE);
1092 else
1093 arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
1130 if (arpindex < 0) { 1094 if (arpindex < 0) {
1131 arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr); 1095 arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr);
1132 if (arpindex < 0) { 1096 if (arpindex < 0) {
@@ -1306,7 +1270,6 @@ static void drop_packet(struct sk_buff *skb)
1306static void handle_fin_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, 1270static void handle_fin_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1307 struct tcphdr *tcph) 1271 struct tcphdr *tcph)
1308{ 1272{
1309 atomic_inc(&cm_resets_recvd);
1310 nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. " 1273 nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. "
1311 "refcnt=%d\n", cm_node, cm_node->state, 1274 "refcnt=%d\n", cm_node, cm_node->state,
1312 atomic_read(&cm_node->ref_count)); 1275 atomic_read(&cm_node->ref_count));
@@ -1344,6 +1307,7 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1344{ 1307{
1345 1308
1346 int reset = 0; /* whether to send reset in case of err.. */ 1309 int reset = 0; /* whether to send reset in case of err.. */
1310 int passive_state;
1347 atomic_inc(&cm_resets_recvd); 1311 atomic_inc(&cm_resets_recvd);
1348 nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u." 1312 nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u."
1349 " refcnt=%d\n", cm_node, cm_node->state, 1313 " refcnt=%d\n", cm_node, cm_node->state,
@@ -1357,7 +1321,14 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1357 cm_node->listener, cm_node->state); 1321 cm_node->listener, cm_node->state);
1358 active_open_err(cm_node, skb, reset); 1322 active_open_err(cm_node, skb, reset);
1359 break; 1323 break;
1360 /* For PASSIVE open states, remove the cm_node event */ 1324 case NES_CM_STATE_MPAREQ_RCVD:
1325 passive_state = atomic_add_return(1, &cm_node->passive_state);
1326 if (passive_state == NES_SEND_RESET_EVENT)
1327 create_event(cm_node, NES_CM_EVENT_RESET);
1328 cleanup_retrans_entry(cm_node);
1329 cm_node->state = NES_CM_STATE_CLOSED;
1330 dev_kfree_skb_any(skb);
1331 break;
1361 case NES_CM_STATE_ESTABLISHED: 1332 case NES_CM_STATE_ESTABLISHED:
1362 case NES_CM_STATE_SYN_RCVD: 1333 case NES_CM_STATE_SYN_RCVD:
1363 case NES_CM_STATE_LISTENING: 1334 case NES_CM_STATE_LISTENING:
@@ -1365,7 +1336,14 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1365 passive_open_err(cm_node, skb, reset); 1336 passive_open_err(cm_node, skb, reset);
1366 break; 1337 break;
1367 case NES_CM_STATE_TSA: 1338 case NES_CM_STATE_TSA:
1339 active_open_err(cm_node, skb, reset);
1340 break;
1341 case NES_CM_STATE_CLOSED:
1342 cleanup_retrans_entry(cm_node);
1343 drop_packet(skb);
1344 break;
1368 default: 1345 default:
1346 drop_packet(skb);
1369 break; 1347 break;
1370 } 1348 }
1371} 1349}
@@ -1394,6 +1372,9 @@ static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb,
1394 dev_kfree_skb_any(skb); 1372 dev_kfree_skb_any(skb);
1395 if (type == NES_CM_EVENT_CONNECTED) 1373 if (type == NES_CM_EVENT_CONNECTED)
1396 cm_node->state = NES_CM_STATE_TSA; 1374 cm_node->state = NES_CM_STATE_TSA;
1375 else
1376 atomic_set(&cm_node->passive_state,
1377 NES_PASSIVE_STATE_INDICATED);
1397 create_event(cm_node, type); 1378 create_event(cm_node, type);
1398 1379
1399 } 1380 }
@@ -1474,7 +1455,7 @@ static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1474 int optionsize; 1455 int optionsize;
1475 1456
1476 optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); 1457 optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
1477 skb_pull(skb, tcph->doff << 2); 1458 skb_trim(skb, 0);
1478 inc_sequence = ntohl(tcph->seq); 1459 inc_sequence = ntohl(tcph->seq);
1479 1460
1480 switch (cm_node->state) { 1461 switch (cm_node->state) {
@@ -1507,6 +1488,10 @@ static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1507 cm_node->state = NES_CM_STATE_SYN_RCVD; 1488 cm_node->state = NES_CM_STATE_SYN_RCVD;
1508 send_syn(cm_node, 1, skb); 1489 send_syn(cm_node, 1, skb);
1509 break; 1490 break;
1491 case NES_CM_STATE_CLOSED:
1492 cleanup_retrans_entry(cm_node);
1493 send_reset(cm_node, skb);
1494 break;
1510 case NES_CM_STATE_TSA: 1495 case NES_CM_STATE_TSA:
1511 case NES_CM_STATE_ESTABLISHED: 1496 case NES_CM_STATE_ESTABLISHED:
1512 case NES_CM_STATE_FIN_WAIT1: 1497 case NES_CM_STATE_FIN_WAIT1:
@@ -1515,7 +1500,6 @@ static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1515 case NES_CM_STATE_LAST_ACK: 1500 case NES_CM_STATE_LAST_ACK:
1516 case NES_CM_STATE_CLOSING: 1501 case NES_CM_STATE_CLOSING:
1517 case NES_CM_STATE_UNKNOWN: 1502 case NES_CM_STATE_UNKNOWN:
1518 case NES_CM_STATE_CLOSED:
1519 default: 1503 default:
1520 drop_packet(skb); 1504 drop_packet(skb);
1521 break; 1505 break;
@@ -1531,7 +1515,7 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1531 int optionsize; 1515 int optionsize;
1532 1516
1533 optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); 1517 optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
1534 skb_pull(skb, tcph->doff << 2); 1518 skb_trim(skb, 0);
1535 inc_sequence = ntohl(tcph->seq); 1519 inc_sequence = ntohl(tcph->seq);
1536 switch (cm_node->state) { 1520 switch (cm_node->state) {
1537 case NES_CM_STATE_SYN_SENT: 1521 case NES_CM_STATE_SYN_SENT:
@@ -1555,6 +1539,12 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1555 /* passive open, so should not be here */ 1539 /* passive open, so should not be here */
1556 passive_open_err(cm_node, skb, 1); 1540 passive_open_err(cm_node, skb, 1);
1557 break; 1541 break;
1542 case NES_CM_STATE_LISTENING:
1543 case NES_CM_STATE_CLOSED:
1544 cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
1545 cleanup_retrans_entry(cm_node);
1546 send_reset(cm_node, skb);
1547 break;
1558 case NES_CM_STATE_ESTABLISHED: 1548 case NES_CM_STATE_ESTABLISHED:
1559 case NES_CM_STATE_FIN_WAIT1: 1549 case NES_CM_STATE_FIN_WAIT1:
1560 case NES_CM_STATE_FIN_WAIT2: 1550 case NES_CM_STATE_FIN_WAIT2:
@@ -1562,7 +1552,6 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1562 case NES_CM_STATE_TSA: 1552 case NES_CM_STATE_TSA:
1563 case NES_CM_STATE_CLOSING: 1553 case NES_CM_STATE_CLOSING:
1564 case NES_CM_STATE_UNKNOWN: 1554 case NES_CM_STATE_UNKNOWN:
1565 case NES_CM_STATE_CLOSED:
1566 case NES_CM_STATE_MPAREQ_SENT: 1555 case NES_CM_STATE_MPAREQ_SENT:
1567 default: 1556 default:
1568 drop_packet(skb); 1557 drop_packet(skb);
@@ -1577,6 +1566,13 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1577 u32 inc_sequence; 1566 u32 inc_sequence;
1578 u32 rem_seq_ack; 1567 u32 rem_seq_ack;
1579 u32 rem_seq; 1568 u32 rem_seq;
1569 int ret;
1570 int optionsize;
1571 u32 temp_seq = cm_node->tcp_cntxt.loc_seq_num;
1572
1573 optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
1574 cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
1575
1580 if (check_seq(cm_node, tcph, skb)) 1576 if (check_seq(cm_node, tcph, skb))
1581 return; 1577 return;
1582 1578
@@ -1589,7 +1585,18 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1589 switch (cm_node->state) { 1585 switch (cm_node->state) {
1590 case NES_CM_STATE_SYN_RCVD: 1586 case NES_CM_STATE_SYN_RCVD:
1591 /* Passive OPEN */ 1587 /* Passive OPEN */
1588 ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 1);
1589 if (ret)
1590 break;
1592 cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); 1591 cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
1592 cm_node->tcp_cntxt.loc_seq_num = temp_seq;
1593 if (cm_node->tcp_cntxt.rem_ack_num !=
1594 cm_node->tcp_cntxt.loc_seq_num) {
1595 nes_debug(NES_DBG_CM, "rem_ack_num != loc_seq_num\n");
1596 cleanup_retrans_entry(cm_node);
1597 send_reset(cm_node, skb);
1598 return;
1599 }
1593 cm_node->state = NES_CM_STATE_ESTABLISHED; 1600 cm_node->state = NES_CM_STATE_ESTABLISHED;
1594 if (datasize) { 1601 if (datasize) {
1595 cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; 1602 cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
@@ -1621,11 +1628,15 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1621 dev_kfree_skb_any(skb); 1628 dev_kfree_skb_any(skb);
1622 } 1629 }
1623 break; 1630 break;
1631 case NES_CM_STATE_LISTENING:
1632 case NES_CM_STATE_CLOSED:
1633 cleanup_retrans_entry(cm_node);
1634 send_reset(cm_node, skb);
1635 break;
1624 case NES_CM_STATE_FIN_WAIT1: 1636 case NES_CM_STATE_FIN_WAIT1:
1625 case NES_CM_STATE_SYN_SENT: 1637 case NES_CM_STATE_SYN_SENT:
1626 case NES_CM_STATE_FIN_WAIT2: 1638 case NES_CM_STATE_FIN_WAIT2:
1627 case NES_CM_STATE_TSA: 1639 case NES_CM_STATE_TSA:
1628 case NES_CM_STATE_CLOSED:
1629 case NES_CM_STATE_MPAREQ_RCVD: 1640 case NES_CM_STATE_MPAREQ_RCVD:
1630 case NES_CM_STATE_LAST_ACK: 1641 case NES_CM_STATE_LAST_ACK:
1631 case NES_CM_STATE_CLOSING: 1642 case NES_CM_STATE_CLOSING:
@@ -1648,9 +1659,9 @@ static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph,
1648 nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n", 1659 nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n",
1649 __func__, cm_node); 1660 __func__, cm_node);
1650 if (passive) 1661 if (passive)
1651 passive_open_err(cm_node, skb, 0); 1662 passive_open_err(cm_node, skb, 1);
1652 else 1663 else
1653 active_open_err(cm_node, skb, 0); 1664 active_open_err(cm_node, skb, 1);
1654 return 1; 1665 return 1;
1655 } 1666 }
1656 } 1667 }
@@ -1970,6 +1981,7 @@ static int mini_cm_reject(struct nes_cm_core *cm_core,
1970 struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node) 1981 struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node)
1971{ 1982{
1972 int ret = 0; 1983 int ret = 0;
1984 int passive_state;
1973 1985
1974 nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n", 1986 nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n",
1975 __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state); 1987 __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state);
@@ -1977,9 +1989,13 @@ static int mini_cm_reject(struct nes_cm_core *cm_core,
1977 if (cm_node->tcp_cntxt.client) 1989 if (cm_node->tcp_cntxt.client)
1978 return ret; 1990 return ret;
1979 cleanup_retrans_entry(cm_node); 1991 cleanup_retrans_entry(cm_node);
1980 cm_node->state = NES_CM_STATE_CLOSED;
1981 1992
1982 ret = send_reset(cm_node, NULL); 1993 passive_state = atomic_add_return(1, &cm_node->passive_state);
1994 cm_node->state = NES_CM_STATE_CLOSED;
1995 if (passive_state == NES_SEND_RESET_EVENT)
1996 rem_ref_cm_node(cm_core, cm_node);
1997 else
1998 ret = send_reset(cm_node, NULL);
1983 return ret; 1999 return ret;
1984} 2000}
1985 2001
@@ -2037,7 +2053,7 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod
2037 * recv_pkt - recv an ETHERNET packet, and process it through CM 2053 * recv_pkt - recv an ETHERNET packet, and process it through CM
2038 * node state machine 2054 * node state machine
2039 */ 2055 */
2040static void mini_cm_recv_pkt(struct nes_cm_core *cm_core, 2056static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
2041 struct nes_vnic *nesvnic, struct sk_buff *skb) 2057 struct nes_vnic *nesvnic, struct sk_buff *skb)
2042{ 2058{
2043 struct nes_cm_node *cm_node = NULL; 2059 struct nes_cm_node *cm_node = NULL;
@@ -2045,23 +2061,16 @@ static void mini_cm_recv_pkt(struct nes_cm_core *cm_core,
2045 struct iphdr *iph; 2061 struct iphdr *iph;
2046 struct tcphdr *tcph; 2062 struct tcphdr *tcph;
2047 struct nes_cm_info nfo; 2063 struct nes_cm_info nfo;
2064 int skb_handled = 1;
2048 2065
2049 if (!skb) 2066 if (!skb)
2050 return; 2067 return 0;
2051 if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) { 2068 if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) {
2052 dev_kfree_skb_any(skb); 2069 return 0;
2053 return;
2054 } 2070 }
2055 2071
2056 iph = (struct iphdr *)skb->data; 2072 iph = (struct iphdr *)skb->data;
2057 tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr)); 2073 tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr));
2058 skb_reset_network_header(skb);
2059 skb_set_transport_header(skb, sizeof(*tcph));
2060 if (!tcph) {
2061 dev_kfree_skb_any(skb);
2062 return;
2063 }
2064 skb->len = ntohs(iph->tot_len);
2065 2074
2066 nfo.loc_addr = ntohl(iph->daddr); 2075 nfo.loc_addr = ntohl(iph->daddr);
2067 nfo.loc_port = ntohs(tcph->dest); 2076 nfo.loc_port = ntohs(tcph->dest);
@@ -2082,23 +2091,21 @@ static void mini_cm_recv_pkt(struct nes_cm_core *cm_core,
2082 /* Only type of packet accepted are for */ 2091 /* Only type of packet accepted are for */
2083 /* the PASSIVE open (syn only) */ 2092 /* the PASSIVE open (syn only) */
2084 if ((!tcph->syn) || (tcph->ack)) { 2093 if ((!tcph->syn) || (tcph->ack)) {
2085 cm_packets_dropped++; 2094 skb_handled = 0;
2086 break; 2095 break;
2087 } 2096 }
2088 listener = find_listener(cm_core, nfo.loc_addr, 2097 listener = find_listener(cm_core, nfo.loc_addr,
2089 nfo.loc_port, 2098 nfo.loc_port,
2090 NES_CM_LISTENER_ACTIVE_STATE); 2099 NES_CM_LISTENER_ACTIVE_STATE);
2091 if (listener) { 2100 if (!listener) {
2092 nfo.cm_id = listener->cm_id; 2101 nfo.cm_id = NULL;
2093 nfo.conn_type = listener->conn_type; 2102 nfo.conn_type = 0;
2094 } else { 2103 nes_debug(NES_DBG_CM, "Unable to find listener for the pkt\n");
2095 nes_debug(NES_DBG_CM, "Unable to find listener " 2104 skb_handled = 0;
2096 "for the pkt\n");
2097 cm_packets_dropped++;
2098 dev_kfree_skb_any(skb);
2099 break; 2105 break;
2100 } 2106 }
2101 2107 nfo.cm_id = listener->cm_id;
2108 nfo.conn_type = listener->conn_type;
2102 cm_node = make_cm_node(cm_core, nesvnic, &nfo, 2109 cm_node = make_cm_node(cm_core, nesvnic, &nfo,
2103 listener); 2110 listener);
2104 if (!cm_node) { 2111 if (!cm_node) {
@@ -2124,9 +2131,13 @@ static void mini_cm_recv_pkt(struct nes_cm_core *cm_core,
2124 dev_kfree_skb_any(skb); 2131 dev_kfree_skb_any(skb);
2125 break; 2132 break;
2126 } 2133 }
2134 skb_reset_network_header(skb);
2135 skb_set_transport_header(skb, sizeof(*tcph));
2136 skb->len = ntohs(iph->tot_len);
2127 process_packet(cm_node, skb, cm_core); 2137 process_packet(cm_node, skb, cm_core);
2128 rem_ref_cm_node(cm_core, cm_node); 2138 rem_ref_cm_node(cm_core, cm_node);
2129 } while (0); 2139 } while (0);
2140 return skb_handled;
2130} 2141}
2131 2142
2132 2143
@@ -2135,10 +2146,7 @@ static void mini_cm_recv_pkt(struct nes_cm_core *cm_core,
2135 */ 2146 */
2136static struct nes_cm_core *nes_cm_alloc_core(void) 2147static struct nes_cm_core *nes_cm_alloc_core(void)
2137{ 2148{
2138 int i;
2139
2140 struct nes_cm_core *cm_core; 2149 struct nes_cm_core *cm_core;
2141 struct sk_buff *skb = NULL;
2142 2150
2143 /* setup the CM core */ 2151 /* setup the CM core */
2144 /* alloc top level core control structure */ 2152 /* alloc top level core control structure */
@@ -2156,19 +2164,6 @@ static struct nes_cm_core *nes_cm_alloc_core(void)
2156 2164
2157 atomic_set(&cm_core->events_posted, 0); 2165 atomic_set(&cm_core->events_posted, 0);
2158 2166
2159 /* init the packet lists */
2160 skb_queue_head_init(&cm_core->tx_free_list);
2161
2162 for (i = 0; i < NES_CM_DEFAULT_FRAME_CNT; i++) {
2163 skb = dev_alloc_skb(cm_core->mtu);
2164 if (!skb) {
2165 kfree(cm_core);
2166 return NULL;
2167 }
2168 /* add 'raw' skb to free frame list */
2169 skb_queue_head(&cm_core->tx_free_list, skb);
2170 }
2171
2172 cm_core->api = &nes_cm_api; 2167 cm_core->api = &nes_cm_api;
2173 2168
2174 spin_lock_init(&cm_core->ht_lock); 2169 spin_lock_init(&cm_core->ht_lock);
@@ -2397,7 +2392,6 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
2397 atomic_inc(&cm_disconnects); 2392 atomic_inc(&cm_disconnects);
2398 cm_event.event = IW_CM_EVENT_DISCONNECT; 2393 cm_event.event = IW_CM_EVENT_DISCONNECT;
2399 if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET) { 2394 if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET) {
2400 issued_disconnect_reset = 1;
2401 cm_event.status = IW_CM_EVENT_STATUS_RESET; 2395 cm_event.status = IW_CM_EVENT_STATUS_RESET;
2402 nes_debug(NES_DBG_CM, "Generating a CM " 2396 nes_debug(NES_DBG_CM, "Generating a CM "
2403 "Disconnect Event (status reset) for " 2397 "Disconnect Event (status reset) for "
@@ -2547,6 +2541,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2547 struct nes_v4_quad nes_quad; 2541 struct nes_v4_quad nes_quad;
2548 u32 crc_value; 2542 u32 crc_value;
2549 int ret; 2543 int ret;
2544 int passive_state;
2550 2545
2551 ibqp = nes_get_qp(cm_id->device, conn_param->qpn); 2546 ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
2552 if (!ibqp) 2547 if (!ibqp)
@@ -2714,8 +2709,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2714 conn_param->private_data_len + 2709 conn_param->private_data_len +
2715 sizeof(struct ietf_mpa_frame)); 2710 sizeof(struct ietf_mpa_frame));
2716 2711
2717 attr.qp_state = IB_QPS_RTS;
2718 nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
2719 2712
2720 /* notify OF layer that accept event was successfull */ 2713 /* notify OF layer that accept event was successfull */
2721 cm_id->add_ref(cm_id); 2714 cm_id->add_ref(cm_id);
@@ -2728,6 +2721,8 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2728 cm_event.private_data = NULL; 2721 cm_event.private_data = NULL;
2729 cm_event.private_data_len = 0; 2722 cm_event.private_data_len = 0;
2730 ret = cm_id->event_handler(cm_id, &cm_event); 2723 ret = cm_id->event_handler(cm_id, &cm_event);
2724 attr.qp_state = IB_QPS_RTS;
2725 nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
2731 if (cm_node->loopbackpartner) { 2726 if (cm_node->loopbackpartner) {
2732 cm_node->loopbackpartner->mpa_frame_size = 2727 cm_node->loopbackpartner->mpa_frame_size =
2733 nesqp->private_data_len; 2728 nesqp->private_data_len;
@@ -2740,6 +2735,9 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2740 printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " 2735 printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
2741 "ret=%d\n", __func__, __LINE__, ret); 2736 "ret=%d\n", __func__, __LINE__, ret);
2742 2737
2738 passive_state = atomic_add_return(1, &cm_node->passive_state);
2739 if (passive_state == NES_SEND_RESET_EVENT)
2740 create_event(cm_node, NES_CM_EVENT_RESET);
2743 return 0; 2741 return 0;
2744} 2742}
2745 2743
@@ -2943,15 +2941,16 @@ int nes_destroy_listen(struct iw_cm_id *cm_id)
2943 */ 2941 */
2944int nes_cm_recv(struct sk_buff *skb, struct net_device *netdevice) 2942int nes_cm_recv(struct sk_buff *skb, struct net_device *netdevice)
2945{ 2943{
2944 int rc = 0;
2946 cm_packets_received++; 2945 cm_packets_received++;
2947 if ((g_cm_core) && (g_cm_core->api)) { 2946 if ((g_cm_core) && (g_cm_core->api)) {
2948 g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb); 2947 rc = g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb);
2949 } else { 2948 } else {
2950 nes_debug(NES_DBG_CM, "Unable to process packet for CM," 2949 nes_debug(NES_DBG_CM, "Unable to process packet for CM,"
2951 " cm is not setup properly.\n"); 2950 " cm is not setup properly.\n");
2952 } 2951 }
2953 2952
2954 return 0; 2953 return rc;
2955} 2954}
2956 2955
2957 2956
@@ -3222,6 +3221,18 @@ static void cm_event_reset(struct nes_cm_event *event)
3222 cm_event.private_data_len = 0; 3221 cm_event.private_data_len = 0;
3223 3222
3224 ret = cm_id->event_handler(cm_id, &cm_event); 3223 ret = cm_id->event_handler(cm_id, &cm_event);
3224 cm_id->add_ref(cm_id);
3225 atomic_inc(&cm_closes);
3226 cm_event.event = IW_CM_EVENT_CLOSE;
3227 cm_event.status = IW_CM_EVENT_STATUS_OK;
3228 cm_event.provider_data = cm_id->provider_data;
3229 cm_event.local_addr = cm_id->local_addr;
3230 cm_event.remote_addr = cm_id->remote_addr;
3231 cm_event.private_data = NULL;
3232 cm_event.private_data_len = 0;
3233 nes_debug(NES_DBG_CM, "NODE %p Generating CLOSE\n", event->cm_node);
3234 ret = cm_id->event_handler(cm_id, &cm_event);
3235
3225 nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); 3236 nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
3226 3237
3227 3238
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index 367b3d290140..fafa35042ebd 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -76,6 +76,10 @@ enum nes_timer_type {
76 NES_TIMER_TYPE_CLOSE, 76 NES_TIMER_TYPE_CLOSE,
77}; 77};
78 78
79#define NES_PASSIVE_STATE_INDICATED 0
80#define NES_DO_NOT_SEND_RESET_EVENT 1
81#define NES_SEND_RESET_EVENT 2
82
79#define MAX_NES_IFS 4 83#define MAX_NES_IFS 4
80 84
81#define SET_ACK 1 85#define SET_ACK 1
@@ -161,6 +165,8 @@ struct nes_timer_entry {
161 165
162#define NES_CM_DEF_SEQ2 0x18ed5740 166#define NES_CM_DEF_SEQ2 0x18ed5740
163#define NES_CM_DEF_LOCAL_ID2 0xb807 167#define NES_CM_DEF_LOCAL_ID2 0xb807
168#define MAX_CM_BUFFER 512
169
164 170
165typedef u32 nes_addr_t; 171typedef u32 nes_addr_t;
166 172
@@ -254,8 +260,6 @@ struct nes_cm_listener {
254 260
255/* per connection node and node state information */ 261/* per connection node and node state information */
256struct nes_cm_node { 262struct nes_cm_node {
257 u32 hashkey;
258
259 nes_addr_t loc_addr, rem_addr; 263 nes_addr_t loc_addr, rem_addr;
260 u16 loc_port, rem_port; 264 u16 loc_port, rem_port;
261 265
@@ -292,7 +296,10 @@ struct nes_cm_node {
292 int apbvt_set; 296 int apbvt_set;
293 int accept_pend; 297 int accept_pend;
294 int freed; 298 int freed;
299 struct list_head timer_entry;
300 struct list_head reset_entry;
295 struct nes_qp *nesqp; 301 struct nes_qp *nesqp;
302 atomic_t passive_state;
296}; 303};
297 304
298/* structure for client or CM to fill when making CM api calls. */ 305/* structure for client or CM to fill when making CM api calls. */
@@ -350,7 +357,6 @@ struct nes_cm_core {
350 u32 mtu; 357 u32 mtu;
351 u32 free_tx_pkt_max; 358 u32 free_tx_pkt_max;
352 u32 rx_pkt_posted; 359 u32 rx_pkt_posted;
353 struct sk_buff_head tx_free_list;
354 atomic_t ht_node_cnt; 360 atomic_t ht_node_cnt;
355 struct list_head connected_nodes; 361 struct list_head connected_nodes;
356 /* struct list_head hashtable[NES_CM_HASHTABLE_SIZE]; */ 362 /* struct list_head hashtable[NES_CM_HASHTABLE_SIZE]; */
@@ -390,7 +396,7 @@ struct nes_cm_ops {
390 struct nes_cm_node *); 396 struct nes_cm_node *);
391 int (*reject)(struct nes_cm_core *, struct ietf_mpa_frame *, 397 int (*reject)(struct nes_cm_core *, struct ietf_mpa_frame *,
392 struct nes_cm_node *); 398 struct nes_cm_node *);
393 void (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *, 399 int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *,
394 struct sk_buff *); 400 struct sk_buff *);
395 int (*destroy_cm_core)(struct nes_cm_core *); 401 int (*destroy_cm_core)(struct nes_cm_core *);
396 int (*get)(struct nes_cm_core *); 402 int (*get)(struct nes_cm_core *);
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 7c49cc882d75..8f70ff2dcc58 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -2700,27 +2700,33 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
2700 pkt_type, (pkt_type & NES_PKT_TYPE_APBVT_MASK)); */ 2700 pkt_type, (pkt_type & NES_PKT_TYPE_APBVT_MASK)); */
2701 2701
2702 if ((pkt_type & NES_PKT_TYPE_APBVT_MASK) == NES_PKT_TYPE_APBVT_BITS) { 2702 if ((pkt_type & NES_PKT_TYPE_APBVT_MASK) == NES_PKT_TYPE_APBVT_BITS) {
2703 nes_cm_recv(rx_skb, nesvnic->netdev); 2703 if (nes_cm_recv(rx_skb, nesvnic->netdev))
2704 rx_skb = NULL;
2705 }
2706 if (rx_skb == NULL)
2707 goto skip_rx_indicate0;
2708
2709
2710 if ((cqe_misc & NES_NIC_CQE_TAG_VALID) &&
2711 (nesvnic->vlan_grp != NULL)) {
2712 vlan_tag = (u16)(le32_to_cpu(
2713 cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX])
2714 >> 16);
2715 nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n",
2716 nesvnic->netdev->name, vlan_tag);
2717 if (nes_use_lro)
2718 lro_vlan_hwaccel_receive_skb(&nesvnic->lro_mgr, rx_skb,
2719 nesvnic->vlan_grp, vlan_tag, NULL);
2720 else
2721 nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag);
2704 } else { 2722 } else {
2705 if ((cqe_misc & NES_NIC_CQE_TAG_VALID) && (nesvnic->vlan_grp != NULL)) { 2723 if (nes_use_lro)
2706 vlan_tag = (u16)(le32_to_cpu( 2724 lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL);
2707 cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX]) 2725 else
2708 >> 16); 2726 nes_netif_rx(rx_skb);
2709 nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n",
2710 nesvnic->netdev->name, vlan_tag);
2711 if (nes_use_lro)
2712 lro_vlan_hwaccel_receive_skb(&nesvnic->lro_mgr, rx_skb,
2713 nesvnic->vlan_grp, vlan_tag, NULL);
2714 else
2715 nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag);
2716 } else {
2717 if (nes_use_lro)
2718 lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL);
2719 else
2720 nes_netif_rx(rx_skb);
2721 }
2722 } 2727 }
2723 2728
2729skip_rx_indicate0:
2724 nesvnic->netdev->last_rx = jiffies; 2730 nesvnic->netdev->last_rx = jiffies;
2725 /* nesvnic->netstats.rx_packets++; */ 2731 /* nesvnic->netstats.rx_packets++; */
2726 /* nesvnic->netstats.rx_bytes += rx_pkt_size; */ 2732 /* nesvnic->netstats.rx_bytes += rx_pkt_size; */
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index fb8cbd71a2ef..5611a73d5831 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -540,11 +540,14 @@ struct nes_cqp_request *nes_get_cqp_request(struct nes_device *nesdev)
540 540
541 if (!list_empty(&nesdev->cqp_avail_reqs)) { 541 if (!list_empty(&nesdev->cqp_avail_reqs)) {
542 spin_lock_irqsave(&nesdev->cqp.lock, flags); 542 spin_lock_irqsave(&nesdev->cqp.lock, flags);
543 cqp_request = list_entry(nesdev->cqp_avail_reqs.next, 543 if (!list_empty(&nesdev->cqp_avail_reqs)) {
544 cqp_request = list_entry(nesdev->cqp_avail_reqs.next,
544 struct nes_cqp_request, list); 545 struct nes_cqp_request, list);
545 list_del_init(&cqp_request->list); 546 list_del_init(&cqp_request->list);
547 }
546 spin_unlock_irqrestore(&nesdev->cqp.lock, flags); 548 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
547 } else { 549 }
550 if (cqp_request == NULL) {
548 cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_KERNEL); 551 cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_KERNEL);
549 if (cqp_request) { 552 if (cqp_request) {
550 cqp_request->dynamic = 1; 553 cqp_request->dynamic = 1;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index d36c9a0bf1bb..4fdb72454f94 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1695,13 +1695,8 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
1695 /* use 4k pbl */ 1695 /* use 4k pbl */
1696 nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 4k PBL\n", pbl_entries); 1696 nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 4k PBL\n", pbl_entries);
1697 if (nesadapter->free_4kpbl == 0) { 1697 if (nesadapter->free_4kpbl == 0) {
1698 if (cqp_request->dynamic) { 1698 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
1699 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); 1699 nes_free_cqp_request(nesdev, cqp_request);
1700 kfree(cqp_request);
1701 } else {
1702 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
1703 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
1704 }
1705 if (!context) 1700 if (!context)
1706 pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, 1701 pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
1707 nescq->hw_cq.cq_pbase); 1702 nescq->hw_cq.cq_pbase);
@@ -1717,13 +1712,8 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
1717 /* use 256 byte pbl */ 1712 /* use 256 byte pbl */
1718 nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 256 byte PBL\n", pbl_entries); 1713 nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 256 byte PBL\n", pbl_entries);
1719 if (nesadapter->free_256pbl == 0) { 1714 if (nesadapter->free_256pbl == 0) {
1720 if (cqp_request->dynamic) { 1715 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
1721 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); 1716 nes_free_cqp_request(nesdev, cqp_request);
1722 kfree(cqp_request);
1723 } else {
1724 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
1725 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
1726 }
1727 if (!context) 1717 if (!context)
1728 pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, 1718 pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
1729 nescq->hw_cq.cq_pbase); 1719 nescq->hw_cq.cq_pbase);
@@ -1928,13 +1918,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
1928 /* Two level PBL */ 1918 /* Two level PBL */
1929 if ((pbl_count+1) > nesadapter->free_4kpbl) { 1919 if ((pbl_count+1) > nesadapter->free_4kpbl) {
1930 nes_debug(NES_DBG_MR, "Out of 4KB Pbls for two level request.\n"); 1920 nes_debug(NES_DBG_MR, "Out of 4KB Pbls for two level request.\n");
1931 if (cqp_request->dynamic) { 1921 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
1932 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); 1922 nes_free_cqp_request(nesdev, cqp_request);
1933 kfree(cqp_request);
1934 } else {
1935 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
1936 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
1937 }
1938 return -ENOMEM; 1923 return -ENOMEM;
1939 } else { 1924 } else {
1940 nesadapter->free_4kpbl -= pbl_count+1; 1925 nesadapter->free_4kpbl -= pbl_count+1;
@@ -1942,13 +1927,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
1942 } else if (residual_page_count > 32) { 1927 } else if (residual_page_count > 32) {
1943 if (pbl_count > nesadapter->free_4kpbl) { 1928 if (pbl_count > nesadapter->free_4kpbl) {
1944 nes_debug(NES_DBG_MR, "Out of 4KB Pbls.\n"); 1929 nes_debug(NES_DBG_MR, "Out of 4KB Pbls.\n");
1945 if (cqp_request->dynamic) { 1930 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
1946 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); 1931 nes_free_cqp_request(nesdev, cqp_request);
1947 kfree(cqp_request);
1948 } else {
1949 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
1950 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
1951 }
1952 return -ENOMEM; 1932 return -ENOMEM;
1953 } else { 1933 } else {
1954 nesadapter->free_4kpbl -= pbl_count; 1934 nesadapter->free_4kpbl -= pbl_count;
@@ -1956,13 +1936,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
1956 } else { 1936 } else {
1957 if (pbl_count > nesadapter->free_256pbl) { 1937 if (pbl_count > nesadapter->free_256pbl) {
1958 nes_debug(NES_DBG_MR, "Out of 256B Pbls.\n"); 1938 nes_debug(NES_DBG_MR, "Out of 256B Pbls.\n");
1959 if (cqp_request->dynamic) { 1939 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
1960 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); 1940 nes_free_cqp_request(nesdev, cqp_request);
1961 kfree(cqp_request);
1962 } else {
1963 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
1964 spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
1965 }
1966 return -ENOMEM; 1941 return -ENOMEM;
1967 } else { 1942 } else {
1968 nesadapter->free_256pbl -= pbl_count; 1943 nesadapter->free_256pbl -= pbl_count;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 81a82628a5f1..861119593f2b 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -252,6 +252,9 @@ struct iser_conn {
252 wait_queue_head_t wait; /* waitq for conn/disconn */ 252 wait_queue_head_t wait; /* waitq for conn/disconn */
253 atomic_t post_recv_buf_count; /* posted rx count */ 253 atomic_t post_recv_buf_count; /* posted rx count */
254 atomic_t post_send_buf_count; /* posted tx count */ 254 atomic_t post_send_buf_count; /* posted tx count */
255 atomic_t unexpected_pdu_count;/* count of received *
256 * unexpected pdus *
257 * not yet retired */
255 char name[ISER_OBJECT_NAME_SIZE]; 258 char name[ISER_OBJECT_NAME_SIZE];
256 struct iser_page_vec *page_vec; /* represents SG to fmr maps* 259 struct iser_page_vec *page_vec; /* represents SG to fmr maps*
257 * maps serialized as tx is*/ 260 * maps serialized as tx is*/
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index cdd283189047..ed1aff21b7ea 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -183,14 +183,8 @@ static int iser_post_receive_control(struct iscsi_conn *conn)
183 struct iser_regd_buf *regd_data; 183 struct iser_regd_buf *regd_data;
184 struct iser_dto *recv_dto = NULL; 184 struct iser_dto *recv_dto = NULL;
185 struct iser_device *device = iser_conn->ib_conn->device; 185 struct iser_device *device = iser_conn->ib_conn->device;
186 int rx_data_size, err = 0; 186 int rx_data_size, err;
187 187 int posts, outstanding_unexp_pdus;
188 rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO);
189 if (rx_desc == NULL) {
190 iser_err("Failed to alloc desc for post recv\n");
191 return -ENOMEM;
192 }
193 rx_desc->type = ISCSI_RX;
194 188
195 /* for the login sequence we must support rx of upto 8K; login is done 189 /* for the login sequence we must support rx of upto 8K; login is done
196 * after conn create/bind (connect) and conn stop/bind (reconnect), 190 * after conn create/bind (connect) and conn stop/bind (reconnect),
@@ -201,46 +195,80 @@ static int iser_post_receive_control(struct iscsi_conn *conn)
201 else /* FIXME till user space sets conn->max_recv_dlength correctly */ 195 else /* FIXME till user space sets conn->max_recv_dlength correctly */
202 rx_data_size = 128; 196 rx_data_size = 128;
203 197
204 rx_desc->data = kmalloc(rx_data_size, GFP_NOIO); 198 outstanding_unexp_pdus =
205 if (rx_desc->data == NULL) { 199 atomic_xchg(&iser_conn->ib_conn->unexpected_pdu_count, 0);
206 iser_err("Failed to alloc data buf for post recv\n");
207 err = -ENOMEM;
208 goto post_rx_kmalloc_failure;
209 }
210 200
211 recv_dto = &rx_desc->dto; 201 /*
212 recv_dto->ib_conn = iser_conn->ib_conn; 202 * in addition to the response buffer, replace those consumed by
213 recv_dto->regd_vector_len = 0; 203 * unexpected pdus.
204 */
205 for (posts = 0; posts < 1 + outstanding_unexp_pdus; posts++) {
206 rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO);
207 if (rx_desc == NULL) {
208 iser_err("Failed to alloc desc for post recv %d\n",
209 posts);
210 err = -ENOMEM;
211 goto post_rx_cache_alloc_failure;
212 }
213 rx_desc->type = ISCSI_RX;
214 rx_desc->data = kmalloc(rx_data_size, GFP_NOIO);
215 if (rx_desc->data == NULL) {
216 iser_err("Failed to alloc data buf for post recv %d\n",
217 posts);
218 err = -ENOMEM;
219 goto post_rx_kmalloc_failure;
220 }
214 221
215 regd_hdr = &rx_desc->hdr_regd_buf; 222 recv_dto = &rx_desc->dto;
216 memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); 223 recv_dto->ib_conn = iser_conn->ib_conn;
217 regd_hdr->device = device; 224 recv_dto->regd_vector_len = 0;
218 regd_hdr->virt_addr = rx_desc; /* == &rx_desc->iser_header */
219 regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN;
220 225
221 iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE); 226 regd_hdr = &rx_desc->hdr_regd_buf;
227 memset(regd_hdr, 0, sizeof(struct iser_regd_buf));
228 regd_hdr->device = device;
229 regd_hdr->virt_addr = rx_desc; /* == &rx_desc->iser_header */
230 regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN;
222 231
223 iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0); 232 iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE);
224 233
225 regd_data = &rx_desc->data_regd_buf; 234 iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0);
226 memset(regd_data, 0, sizeof(struct iser_regd_buf));
227 regd_data->device = device;
228 regd_data->virt_addr = rx_desc->data;
229 regd_data->data_size = rx_data_size;
230 235
231 iser_reg_single(device, regd_data, DMA_FROM_DEVICE); 236 regd_data = &rx_desc->data_regd_buf;
237 memset(regd_data, 0, sizeof(struct iser_regd_buf));
238 regd_data->device = device;
239 regd_data->virt_addr = rx_desc->data;
240 regd_data->data_size = rx_data_size;
232 241
233 iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0); 242 iser_reg_single(device, regd_data, DMA_FROM_DEVICE);
234 243
235 err = iser_post_recv(rx_desc); 244 iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0);
236 if (!err)
237 return 0;
238 245
239 /* iser_post_recv failed */ 246 err = iser_post_recv(rx_desc);
247 if (err) {
248 iser_err("Failed iser_post_recv for post %d\n", posts);
249 goto post_rx_post_recv_failure;
250 }
251 }
252 /* all posts successful */
253 return 0;
254
255post_rx_post_recv_failure:
240 iser_dto_buffs_release(recv_dto); 256 iser_dto_buffs_release(recv_dto);
241 kfree(rx_desc->data); 257 kfree(rx_desc->data);
242post_rx_kmalloc_failure: 258post_rx_kmalloc_failure:
243 kmem_cache_free(ig.desc_cache, rx_desc); 259 kmem_cache_free(ig.desc_cache, rx_desc);
260post_rx_cache_alloc_failure:
261 if (posts > 0) {
262 /*
263 * response buffer posted, but did not replace all unexpected
264 * pdu recv bufs. Ignore error, retry occurs next send
265 */
266 outstanding_unexp_pdus -= (posts - 1);
267 err = 0;
268 }
269 atomic_add(outstanding_unexp_pdus,
270 &iser_conn->ib_conn->unexpected_pdu_count);
271
244 return err; 272 return err;
245} 273}
246 274
@@ -274,8 +302,10 @@ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn)
274 struct iscsi_iser_conn *iser_conn = conn->dd_data; 302 struct iscsi_iser_conn *iser_conn = conn->dd_data;
275 303
276 int i; 304 int i;
277 /* no need to keep it in a var, we are after login so if this should 305 /*
278 * be negotiated, by now the result should be available here */ 306 * FIXME this value should be declared to the target during login with
307 * the MaxOutstandingUnexpectedPDUs key when supported
308 */
279 int initial_post_recv_bufs_num = ISER_MAX_RX_MISC_PDUS; 309 int initial_post_recv_bufs_num = ISER_MAX_RX_MISC_PDUS;
280 310
281 iser_dbg("Initially post: %d\n", initial_post_recv_bufs_num); 311 iser_dbg("Initially post: %d\n", initial_post_recv_bufs_num);
@@ -478,6 +508,7 @@ int iser_send_control(struct iscsi_conn *conn,
478 int err = 0; 508 int err = 0;
479 struct iser_regd_buf *regd_buf; 509 struct iser_regd_buf *regd_buf;
480 struct iser_device *device; 510 struct iser_device *device;
511 unsigned char opcode;
481 512
482 if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) { 513 if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
483 iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn); 514 iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
@@ -512,10 +543,15 @@ int iser_send_control(struct iscsi_conn *conn,
512 data_seg_len); 543 data_seg_len);
513 } 544 }
514 545
515 if (iser_post_receive_control(conn) != 0) { 546 opcode = task->hdr->opcode & ISCSI_OPCODE_MASK;
516 iser_err("post_rcv_buff failed!\n"); 547
517 err = -ENOMEM; 548 /* post recv buffer for response if one is expected */
518 goto send_control_error; 549 if (!(opcode == ISCSI_OP_NOOP_OUT && task->hdr->itt == RESERVED_ITT)) {
550 if (iser_post_receive_control(conn) != 0) {
551 iser_err("post_rcv_buff failed!\n");
552 err = -ENOMEM;
553 goto send_control_error;
554 }
519 } 555 }
520 556
521 err = iser_post_send(mdesc); 557 err = iser_post_send(mdesc);
@@ -586,6 +622,20 @@ void iser_rcv_completion(struct iser_desc *rx_desc,
586 * parallel to the execution of iser_conn_term. So the code that waits * 622 * parallel to the execution of iser_conn_term. So the code that waits *
587 * for the posted rx bufs refcount to become zero handles everything */ 623 * for the posted rx bufs refcount to become zero handles everything */
588 atomic_dec(&conn->ib_conn->post_recv_buf_count); 624 atomic_dec(&conn->ib_conn->post_recv_buf_count);
625
626 /*
627 * if an unexpected PDU was received then the recv wr consumed must
628 * be replaced, this is done in the next send of a control-type PDU
629 */
630 if (opcode == ISCSI_OP_NOOP_IN && hdr->itt == RESERVED_ITT) {
631 /* nop-in with itt = 0xffffffff */
632 atomic_inc(&conn->ib_conn->unexpected_pdu_count);
633 }
634 else if (opcode == ISCSI_OP_ASYNC_EVENT) {
635 /* asyncronous message */
636 atomic_inc(&conn->ib_conn->unexpected_pdu_count);
637 }
638 /* a reject PDU consumes the recv buf posted for the response */
589} 639}
590 640
591void iser_snd_completion(struct iser_desc *tx_desc) 641void iser_snd_completion(struct iser_desc *tx_desc)
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 26ff6214a81f..6dc6b174cdd4 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -498,6 +498,7 @@ void iser_conn_init(struct iser_conn *ib_conn)
498 init_waitqueue_head(&ib_conn->wait); 498 init_waitqueue_head(&ib_conn->wait);
499 atomic_set(&ib_conn->post_recv_buf_count, 0); 499 atomic_set(&ib_conn->post_recv_buf_count, 0);
500 atomic_set(&ib_conn->post_send_buf_count, 0); 500 atomic_set(&ib_conn->post_send_buf_count, 0);
501 atomic_set(&ib_conn->unexpected_pdu_count, 0);
501 atomic_set(&ib_conn->refcount, 1); 502 atomic_set(&ib_conn->refcount, 1);
502 INIT_LIST_HEAD(&ib_conn->conn_list); 503 INIT_LIST_HEAD(&ib_conn->conn_list);
503 spin_lock_init(&ib_conn->lock); 504 spin_lock_init(&ib_conn->lock);
diff --git a/drivers/net/mlx4/cq.c b/drivers/net/mlx4/cq.c
index b7ad2829d67e..ac57b6a42c6e 100644
--- a/drivers/net/mlx4/cq.c
+++ b/drivers/net/mlx4/cq.c
@@ -189,7 +189,7 @@ EXPORT_SYMBOL_GPL(mlx4_cq_resize);
189 189
190int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, 190int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
191 struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, 191 struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
192 int collapsed) 192 unsigned vector, int collapsed)
193{ 193{
194 struct mlx4_priv *priv = mlx4_priv(dev); 194 struct mlx4_priv *priv = mlx4_priv(dev);
195 struct mlx4_cq_table *cq_table = &priv->cq_table; 195 struct mlx4_cq_table *cq_table = &priv->cq_table;
@@ -198,6 +198,11 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
198 u64 mtt_addr; 198 u64 mtt_addr;
199 int err; 199 int err;
200 200
201 if (vector >= dev->caps.num_comp_vectors)
202 return -EINVAL;
203
204 cq->vector = vector;
205
201 cq->cqn = mlx4_bitmap_alloc(&cq_table->bitmap); 206 cq->cqn = mlx4_bitmap_alloc(&cq_table->bitmap);
202 if (cq->cqn == -1) 207 if (cq->cqn == -1)
203 return -ENOMEM; 208 return -ENOMEM;
@@ -227,7 +232,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
227 232
228 cq_context->flags = cpu_to_be32(!!collapsed << 18); 233 cq_context->flags = cpu_to_be32(!!collapsed << 18);
229 cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index); 234 cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index);
230 cq_context->comp_eqn = priv->eq_table.eq[MLX4_EQ_COMP].eqn; 235 cq_context->comp_eqn = priv->eq_table.eq[vector].eqn;
231 cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; 236 cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
232 237
233 mtt_addr = mlx4_mtt_addr(dev, mtt); 238 mtt_addr = mlx4_mtt_addr(dev, mtt);
@@ -276,7 +281,7 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
276 if (err) 281 if (err)
277 mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn); 282 mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn);
278 283
279 synchronize_irq(priv->eq_table.eq[MLX4_EQ_COMP].irq); 284 synchronize_irq(priv->eq_table.eq[cq->vector].irq);
280 285
281 spin_lock_irq(&cq_table->lock); 286 spin_lock_irq(&cq_table->lock);
282 radix_tree_delete(&cq_table->tree, cq->cqn); 287 radix_tree_delete(&cq_table->tree, cq->cqn);
diff --git a/drivers/net/mlx4/en_cq.c b/drivers/net/mlx4/en_cq.c
index 1368a8010af4..674f836e225b 100644
--- a/drivers/net/mlx4/en_cq.c
+++ b/drivers/net/mlx4/en_cq.c
@@ -51,10 +51,13 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
51 int err; 51 int err;
52 52
53 cq->size = entries; 53 cq->size = entries;
54 if (mode == RX) 54 if (mode == RX) {
55 cq->buf_size = cq->size * sizeof(struct mlx4_cqe); 55 cq->buf_size = cq->size * sizeof(struct mlx4_cqe);
56 else 56 cq->vector = ring % mdev->dev->caps.num_comp_vectors;
57 } else {
57 cq->buf_size = sizeof(struct mlx4_cqe); 58 cq->buf_size = sizeof(struct mlx4_cqe);
59 cq->vector = 0;
60 }
58 61
59 cq->ring = ring; 62 cq->ring = ring;
60 cq->is_tx = mode; 63 cq->is_tx = mode;
@@ -86,7 +89,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
86 memset(cq->buf, 0, cq->buf_size); 89 memset(cq->buf, 0, cq->buf_size);
87 90
88 err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar, 91 err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar,
89 cq->wqres.db.dma, &cq->mcq, cq->is_tx); 92 cq->wqres.db.dma, &cq->mcq, cq->vector, cq->is_tx);
90 if (err) 93 if (err)
91 return err; 94 return err;
92 95
diff --git a/drivers/net/mlx4/en_main.c b/drivers/net/mlx4/en_main.c
index 4b9794e97a79..c1c05852a95e 100644
--- a/drivers/net/mlx4/en_main.c
+++ b/drivers/net/mlx4/en_main.c
@@ -170,9 +170,9 @@ static void *mlx4_en_add(struct mlx4_dev *dev)
170 mlx4_info(mdev, "Using %d tx rings for port:%d\n", 170 mlx4_info(mdev, "Using %d tx rings for port:%d\n",
171 mdev->profile.prof[i].tx_ring_num, i); 171 mdev->profile.prof[i].tx_ring_num, i);
172 if (!mdev->profile.prof[i].rx_ring_num) { 172 if (!mdev->profile.prof[i].rx_ring_num) {
173 mdev->profile.prof[i].rx_ring_num = 1; 173 mdev->profile.prof[i].rx_ring_num = dev->caps.num_comp_vectors;
174 mlx4_info(mdev, "Defaulting to %d rx rings for port:%d\n", 174 mlx4_info(mdev, "Defaulting to %d rx rings for port:%d\n",
175 1, i); 175 mdev->profile.prof[i].rx_ring_num, i);
176 } else 176 } else
177 mlx4_info(mdev, "Using %d rx rings for port:%d\n", 177 mlx4_info(mdev, "Using %d rx rings for port:%d\n",
178 mdev->profile.prof[i].rx_ring_num, i); 178 mdev->profile.prof[i].rx_ring_num, i);
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index de169338cd90..2c19bff7cbab 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -243,10 +243,6 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
243 * least that often. 243 * least that often.
244 */ 244 */
245 if (unlikely(set_ci >= MLX4_NUM_SPARE_EQE)) { 245 if (unlikely(set_ci >= MLX4_NUM_SPARE_EQE)) {
246 /*
247 * Conditional on hca_type is OK here because
248 * this is a rare case, not the fast path.
249 */
250 eq_set_ci(eq, 0); 246 eq_set_ci(eq, 0);
251 set_ci = 0; 247 set_ci = 0;
252 } 248 }
@@ -266,7 +262,7 @@ static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr)
266 262
267 writel(priv->eq_table.clr_mask, priv->eq_table.clr_int); 263 writel(priv->eq_table.clr_mask, priv->eq_table.clr_int);
268 264
269 for (i = 0; i < MLX4_NUM_EQ; ++i) 265 for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
270 work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]); 266 work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]);
271 267
272 return IRQ_RETVAL(work); 268 return IRQ_RETVAL(work);
@@ -304,6 +300,17 @@ static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
304 MLX4_CMD_TIME_CLASS_A); 300 MLX4_CMD_TIME_CLASS_A);
305} 301}
306 302
303static int mlx4_num_eq_uar(struct mlx4_dev *dev)
304{
305 /*
306 * Each UAR holds 4 EQ doorbells. To figure out how many UARs
307 * we need to map, take the difference of highest index and
308 * the lowest index we'll use and add 1.
309 */
310 return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs) / 4 -
311 dev->caps.reserved_eqs / 4 + 1;
312}
313
307static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq) 314static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq)
308{ 315{
309 struct mlx4_priv *priv = mlx4_priv(dev); 316 struct mlx4_priv *priv = mlx4_priv(dev);
@@ -483,9 +490,11 @@ static void mlx4_free_irqs(struct mlx4_dev *dev)
483 490
484 if (eq_table->have_irq) 491 if (eq_table->have_irq)
485 free_irq(dev->pdev->irq, dev); 492 free_irq(dev->pdev->irq, dev);
486 for (i = 0; i < MLX4_NUM_EQ; ++i) 493 for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
487 if (eq_table->eq[i].have_irq) 494 if (eq_table->eq[i].have_irq)
488 free_irq(eq_table->eq[i].irq, eq_table->eq + i); 495 free_irq(eq_table->eq[i].irq, eq_table->eq + i);
496
497 kfree(eq_table->irq_names);
489} 498}
490 499
491static int mlx4_map_clr_int(struct mlx4_dev *dev) 500static int mlx4_map_clr_int(struct mlx4_dev *dev)
@@ -551,57 +560,93 @@ void mlx4_unmap_eq_icm(struct mlx4_dev *dev)
551 __free_page(priv->eq_table.icm_page); 560 __free_page(priv->eq_table.icm_page);
552} 561}
553 562
563int mlx4_alloc_eq_table(struct mlx4_dev *dev)
564{
565 struct mlx4_priv *priv = mlx4_priv(dev);
566
567 priv->eq_table.eq = kcalloc(dev->caps.num_eqs - dev->caps.reserved_eqs,
568 sizeof *priv->eq_table.eq, GFP_KERNEL);
569 if (!priv->eq_table.eq)
570 return -ENOMEM;
571
572 return 0;
573}
574
575void mlx4_free_eq_table(struct mlx4_dev *dev)
576{
577 kfree(mlx4_priv(dev)->eq_table.eq);
578}
579
554int mlx4_init_eq_table(struct mlx4_dev *dev) 580int mlx4_init_eq_table(struct mlx4_dev *dev)
555{ 581{
556 struct mlx4_priv *priv = mlx4_priv(dev); 582 struct mlx4_priv *priv = mlx4_priv(dev);
557 int err; 583 int err;
558 int i; 584 int i;
559 585
586 priv->eq_table.uar_map = kcalloc(sizeof *priv->eq_table.uar_map,
587 mlx4_num_eq_uar(dev), GFP_KERNEL);
588 if (!priv->eq_table.uar_map) {
589 err = -ENOMEM;
590 goto err_out_free;
591 }
592
560 err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs, 593 err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs,
561 dev->caps.num_eqs - 1, dev->caps.reserved_eqs, 0); 594 dev->caps.num_eqs - 1, dev->caps.reserved_eqs, 0);
562 if (err) 595 if (err)
563 return err; 596 goto err_out_free;
564 597
565 for (i = 0; i < ARRAY_SIZE(priv->eq_table.uar_map); ++i) 598 for (i = 0; i < mlx4_num_eq_uar(dev); ++i)
566 priv->eq_table.uar_map[i] = NULL; 599 priv->eq_table.uar_map[i] = NULL;
567 600
568 err = mlx4_map_clr_int(dev); 601 err = mlx4_map_clr_int(dev);
569 if (err) 602 if (err)
570 goto err_out_free; 603 goto err_out_bitmap;
571 604
572 priv->eq_table.clr_mask = 605 priv->eq_table.clr_mask =
573 swab32(1 << (priv->eq_table.inta_pin & 31)); 606 swab32(1 << (priv->eq_table.inta_pin & 31));
574 priv->eq_table.clr_int = priv->clr_base + 607 priv->eq_table.clr_int = priv->clr_base +
575 (priv->eq_table.inta_pin < 32 ? 4 : 0); 608 (priv->eq_table.inta_pin < 32 ? 4 : 0);
576 609
577 err = mlx4_create_eq(dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE, 610 priv->eq_table.irq_names = kmalloc(16 * dev->caps.num_comp_vectors, GFP_KERNEL);
578 (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_COMP : 0, 611 if (!priv->eq_table.irq_names) {
579 &priv->eq_table.eq[MLX4_EQ_COMP]); 612 err = -ENOMEM;
580 if (err) 613 goto err_out_bitmap;
581 goto err_out_unmap; 614 }
615
616 for (i = 0; i < dev->caps.num_comp_vectors; ++i) {
617 err = mlx4_create_eq(dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE,
618 (dev->flags & MLX4_FLAG_MSI_X) ? i : 0,
619 &priv->eq_table.eq[i]);
620 if (err)
621 goto err_out_unmap;
622 }
582 623
583 err = mlx4_create_eq(dev, MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE, 624 err = mlx4_create_eq(dev, MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE,
584 (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_ASYNC : 0, 625 (dev->flags & MLX4_FLAG_MSI_X) ? dev->caps.num_comp_vectors : 0,
585 &priv->eq_table.eq[MLX4_EQ_ASYNC]); 626 &priv->eq_table.eq[dev->caps.num_comp_vectors]);
586 if (err) 627 if (err)
587 goto err_out_comp; 628 goto err_out_comp;
588 629
589 if (dev->flags & MLX4_FLAG_MSI_X) { 630 if (dev->flags & MLX4_FLAG_MSI_X) {
590 static const char *eq_name[] = { 631 static const char async_eq_name[] = "mlx4-async";
591 [MLX4_EQ_COMP] = DRV_NAME " (comp)", 632 const char *eq_name;
592 [MLX4_EQ_ASYNC] = DRV_NAME " (async)" 633
593 }; 634 for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) {
635 if (i < dev->caps.num_comp_vectors) {
636 snprintf(priv->eq_table.irq_names + i * 16, 16,
637 "mlx4-comp-%d", i);
638 eq_name = priv->eq_table.irq_names + i * 16;
639 } else
640 eq_name = async_eq_name;
594 641
595 for (i = 0; i < MLX4_NUM_EQ; ++i) {
596 err = request_irq(priv->eq_table.eq[i].irq, 642 err = request_irq(priv->eq_table.eq[i].irq,
597 mlx4_msi_x_interrupt, 643 mlx4_msi_x_interrupt, 0, eq_name,
598 0, eq_name[i], priv->eq_table.eq + i); 644 priv->eq_table.eq + i);
599 if (err) 645 if (err)
600 goto err_out_async; 646 goto err_out_async;
601 647
602 priv->eq_table.eq[i].have_irq = 1; 648 priv->eq_table.eq[i].have_irq = 1;
603 } 649 }
604
605 } else { 650 } else {
606 err = request_irq(dev->pdev->irq, mlx4_interrupt, 651 err = request_irq(dev->pdev->irq, mlx4_interrupt,
607 IRQF_SHARED, DRV_NAME, dev); 652 IRQF_SHARED, DRV_NAME, dev);
@@ -612,28 +657,36 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
612 } 657 }
613 658
614 err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0, 659 err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0,
615 priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); 660 priv->eq_table.eq[dev->caps.num_comp_vectors].eqn);
616 if (err) 661 if (err)
617 mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n", 662 mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
618 priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err); 663 priv->eq_table.eq[dev->caps.num_comp_vectors].eqn, err);
619 664
620 for (i = 0; i < MLX4_NUM_EQ; ++i) 665 for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
621 eq_set_ci(&priv->eq_table.eq[i], 1); 666 eq_set_ci(&priv->eq_table.eq[i], 1);
622 667
623 return 0; 668 return 0;
624 669
625err_out_async: 670err_out_async:
626 mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_ASYNC]); 671 mlx4_free_eq(dev, &priv->eq_table.eq[dev->caps.num_comp_vectors]);
627 672
628err_out_comp: 673err_out_comp:
629 mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_COMP]); 674 i = dev->caps.num_comp_vectors - 1;
630 675
631err_out_unmap: 676err_out_unmap:
677 while (i >= 0) {
678 mlx4_free_eq(dev, &priv->eq_table.eq[i]);
679 --i;
680 }
632 mlx4_unmap_clr_int(dev); 681 mlx4_unmap_clr_int(dev);
633 mlx4_free_irqs(dev); 682 mlx4_free_irqs(dev);
634 683
635err_out_free: 684err_out_bitmap:
636 mlx4_bitmap_cleanup(&priv->eq_table.bitmap); 685 mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
686
687err_out_free:
688 kfree(priv->eq_table.uar_map);
689
637 return err; 690 return err;
638} 691}
639 692
@@ -643,18 +696,20 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev)
643 int i; 696 int i;
644 697
645 mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 1, 698 mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 1,
646 priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); 699 priv->eq_table.eq[dev->caps.num_comp_vectors].eqn);
647 700
648 mlx4_free_irqs(dev); 701 mlx4_free_irqs(dev);
649 702
650 for (i = 0; i < MLX4_NUM_EQ; ++i) 703 for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
651 mlx4_free_eq(dev, &priv->eq_table.eq[i]); 704 mlx4_free_eq(dev, &priv->eq_table.eq[i]);
652 705
653 mlx4_unmap_clr_int(dev); 706 mlx4_unmap_clr_int(dev);
654 707
655 for (i = 0; i < ARRAY_SIZE(priv->eq_table.uar_map); ++i) 708 for (i = 0; i < mlx4_num_eq_uar(dev); ++i)
656 if (priv->eq_table.uar_map[i]) 709 if (priv->eq_table.uar_map[i])
657 iounmap(priv->eq_table.uar_map[i]); 710 iounmap(priv->eq_table.uar_map[i]);
658 711
659 mlx4_bitmap_cleanup(&priv->eq_table.bitmap); 712 mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
713
714 kfree(priv->eq_table.uar_map);
660} 715}
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index 90a0281d15ea..710c79e7a2db 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -421,9 +421,7 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
421 ((u64) (MLX4_CMPT_TYPE_EQ * 421 ((u64) (MLX4_CMPT_TYPE_EQ *
422 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 422 cmpt_entry_sz) << MLX4_CMPT_SHIFT),
423 cmpt_entry_sz, 423 cmpt_entry_sz,
424 roundup_pow_of_two(MLX4_NUM_EQ + 424 dev->caps.num_eqs, dev->caps.num_eqs, 0, 0);
425 dev->caps.reserved_eqs),
426 MLX4_NUM_EQ + dev->caps.reserved_eqs, 0, 0);
427 if (err) 425 if (err)
428 goto err_cq; 426 goto err_cq;
429 427
@@ -810,12 +808,12 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
810 if (dev->flags & MLX4_FLAG_MSI_X) { 808 if (dev->flags & MLX4_FLAG_MSI_X) {
811 mlx4_warn(dev, "NOP command failed to generate MSI-X " 809 mlx4_warn(dev, "NOP command failed to generate MSI-X "
812 "interrupt IRQ %d).\n", 810 "interrupt IRQ %d).\n",
813 priv->eq_table.eq[MLX4_EQ_ASYNC].irq); 811 priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
814 mlx4_warn(dev, "Trying again without MSI-X.\n"); 812 mlx4_warn(dev, "Trying again without MSI-X.\n");
815 } else { 813 } else {
816 mlx4_err(dev, "NOP command failed to generate interrupt " 814 mlx4_err(dev, "NOP command failed to generate interrupt "
817 "(IRQ %d), aborting.\n", 815 "(IRQ %d), aborting.\n",
818 priv->eq_table.eq[MLX4_EQ_ASYNC].irq); 816 priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
819 mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n"); 817 mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
820 } 818 }
821 819
@@ -908,31 +906,50 @@ err_uar_table_free:
908static void mlx4_enable_msi_x(struct mlx4_dev *dev) 906static void mlx4_enable_msi_x(struct mlx4_dev *dev)
909{ 907{
910 struct mlx4_priv *priv = mlx4_priv(dev); 908 struct mlx4_priv *priv = mlx4_priv(dev);
911 struct msix_entry entries[MLX4_NUM_EQ]; 909 struct msix_entry *entries;
910 int nreq;
912 int err; 911 int err;
913 int i; 912 int i;
914 913
915 if (msi_x) { 914 if (msi_x) {
916 for (i = 0; i < MLX4_NUM_EQ; ++i) 915 nreq = min(dev->caps.num_eqs - dev->caps.reserved_eqs,
916 num_possible_cpus() + 1);
917 entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
918 if (!entries)
919 goto no_msi;
920
921 for (i = 0; i < nreq; ++i)
917 entries[i].entry = i; 922 entries[i].entry = i;
918 923
919 err = pci_enable_msix(dev->pdev, entries, ARRAY_SIZE(entries)); 924 retry:
925 err = pci_enable_msix(dev->pdev, entries, nreq);
920 if (err) { 926 if (err) {
921 if (err > 0) 927 /* Try again if at least 2 vectors are available */
922 mlx4_info(dev, "Only %d MSI-X vectors available, " 928 if (err > 1) {
923 "not using MSI-X\n", err); 929 mlx4_info(dev, "Requested %d vectors, "
930 "but only %d MSI-X vectors available, "
931 "trying again\n", nreq, err);
932 nreq = err;
933 goto retry;
934 }
935
924 goto no_msi; 936 goto no_msi;
925 } 937 }
926 938
927 for (i = 0; i < MLX4_NUM_EQ; ++i) 939 dev->caps.num_comp_vectors = nreq - 1;
940 for (i = 0; i < nreq; ++i)
928 priv->eq_table.eq[i].irq = entries[i].vector; 941 priv->eq_table.eq[i].irq = entries[i].vector;
929 942
930 dev->flags |= MLX4_FLAG_MSI_X; 943 dev->flags |= MLX4_FLAG_MSI_X;
944
945 kfree(entries);
931 return; 946 return;
932 } 947 }
933 948
934no_msi: 949no_msi:
935 for (i = 0; i < MLX4_NUM_EQ; ++i) 950 dev->caps.num_comp_vectors = 1;
951
952 for (i = 0; i < 2; ++i)
936 priv->eq_table.eq[i].irq = dev->pdev->irq; 953 priv->eq_table.eq[i].irq = dev->pdev->irq;
937} 954}
938 955
@@ -1074,6 +1091,10 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
1074 if (err) 1091 if (err)
1075 goto err_cmd; 1092 goto err_cmd;
1076 1093
1094 err = mlx4_alloc_eq_table(dev);
1095 if (err)
1096 goto err_close;
1097
1077 mlx4_enable_msi_x(dev); 1098 mlx4_enable_msi_x(dev);
1078 1099
1079 err = mlx4_setup_hca(dev); 1100 err = mlx4_setup_hca(dev);
@@ -1084,7 +1105,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
1084 } 1105 }
1085 1106
1086 if (err) 1107 if (err)
1087 goto err_close; 1108 goto err_free_eq;
1088 1109
1089 for (port = 1; port <= dev->caps.num_ports; port++) { 1110 for (port = 1; port <= dev->caps.num_ports; port++) {
1090 err = mlx4_init_port_info(dev, port); 1111 err = mlx4_init_port_info(dev, port);
@@ -1114,6 +1135,9 @@ err_port:
1114 mlx4_cleanup_pd_table(dev); 1135 mlx4_cleanup_pd_table(dev);
1115 mlx4_cleanup_uar_table(dev); 1136 mlx4_cleanup_uar_table(dev);
1116 1137
1138err_free_eq:
1139 mlx4_free_eq_table(dev);
1140
1117err_close: 1141err_close:
1118 if (dev->flags & MLX4_FLAG_MSI_X) 1142 if (dev->flags & MLX4_FLAG_MSI_X)
1119 pci_disable_msix(pdev); 1143 pci_disable_msix(pdev);
@@ -1177,6 +1201,7 @@ static void mlx4_remove_one(struct pci_dev *pdev)
1177 iounmap(priv->kar); 1201 iounmap(priv->kar);
1178 mlx4_uar_free(dev, &priv->driver_uar); 1202 mlx4_uar_free(dev, &priv->driver_uar);
1179 mlx4_cleanup_uar_table(dev); 1203 mlx4_cleanup_uar_table(dev);
1204 mlx4_free_eq_table(dev);
1180 mlx4_close_hca(dev); 1205 mlx4_close_hca(dev);
1181 mlx4_cmd_cleanup(dev); 1206 mlx4_cmd_cleanup(dev);
1182 1207
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 34c909deaff3..e0213bad61c7 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -63,12 +63,6 @@ enum {
63}; 63};
64 64
65enum { 65enum {
66 MLX4_EQ_ASYNC,
67 MLX4_EQ_COMP,
68 MLX4_NUM_EQ
69};
70
71enum {
72 MLX4_NUM_PDS = 1 << 15 66 MLX4_NUM_PDS = 1 << 15
73}; 67};
74 68
@@ -205,10 +199,11 @@ struct mlx4_cq_table {
205 199
206struct mlx4_eq_table { 200struct mlx4_eq_table {
207 struct mlx4_bitmap bitmap; 201 struct mlx4_bitmap bitmap;
202 char *irq_names;
208 void __iomem *clr_int; 203 void __iomem *clr_int;
209 void __iomem *uar_map[(MLX4_NUM_EQ + 6) / 4]; 204 void __iomem **uar_map;
210 u32 clr_mask; 205 u32 clr_mask;
211 struct mlx4_eq eq[MLX4_NUM_EQ]; 206 struct mlx4_eq *eq;
212 u64 icm_virt; 207 u64 icm_virt;
213 struct page *icm_page; 208 struct page *icm_page;
214 dma_addr_t icm_dma; 209 dma_addr_t icm_dma;
@@ -328,6 +323,9 @@ void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap);
328 323
329int mlx4_reset(struct mlx4_dev *dev); 324int mlx4_reset(struct mlx4_dev *dev);
330 325
326int mlx4_alloc_eq_table(struct mlx4_dev *dev);
327void mlx4_free_eq_table(struct mlx4_dev *dev);
328
331int mlx4_init_pd_table(struct mlx4_dev *dev); 329int mlx4_init_pd_table(struct mlx4_dev *dev);
332int mlx4_init_uar_table(struct mlx4_dev *dev); 330int mlx4_init_uar_table(struct mlx4_dev *dev);
333int mlx4_init_mr_table(struct mlx4_dev *dev); 331int mlx4_init_mr_table(struct mlx4_dev *dev);
diff --git a/drivers/net/mlx4/profile.c b/drivers/net/mlx4/profile.c
index 9ca42b213d54..919fb9eb1b62 100644
--- a/drivers/net/mlx4/profile.c
+++ b/drivers/net/mlx4/profile.c
@@ -107,7 +107,9 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
107 profile[MLX4_RES_AUXC].num = request->num_qp; 107 profile[MLX4_RES_AUXC].num = request->num_qp;
108 profile[MLX4_RES_SRQ].num = request->num_srq; 108 profile[MLX4_RES_SRQ].num = request->num_srq;
109 profile[MLX4_RES_CQ].num = request->num_cq; 109 profile[MLX4_RES_CQ].num = request->num_cq;
110 profile[MLX4_RES_EQ].num = MLX4_NUM_EQ + dev_cap->reserved_eqs; 110 profile[MLX4_RES_EQ].num = min(dev_cap->max_eqs,
111 dev_cap->reserved_eqs +
112 num_possible_cpus() + 1);
111 profile[MLX4_RES_DMPT].num = request->num_mpt; 113 profile[MLX4_RES_DMPT].num = request->num_mpt;
112 profile[MLX4_RES_CMPT].num = MLX4_NUM_CMPTS; 114 profile[MLX4_RES_CMPT].num = MLX4_NUM_CMPTS;
113 profile[MLX4_RES_MTT].num = request->num_mtt; 115 profile[MLX4_RES_MTT].num = request->num_mtt;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 371086fd946f..8f659cc29960 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -206,6 +206,7 @@ struct mlx4_caps {
206 int reserved_cqs; 206 int reserved_cqs;
207 int num_eqs; 207 int num_eqs;
208 int reserved_eqs; 208 int reserved_eqs;
209 int num_comp_vectors;
209 int num_mpts; 210 int num_mpts;
210 int num_mtt_segs; 211 int num_mtt_segs;
211 int fmr_reserved_mtts; 212 int fmr_reserved_mtts;
@@ -328,6 +329,7 @@ struct mlx4_cq {
328 int arm_sn; 329 int arm_sn;
329 330
330 int cqn; 331 int cqn;
332 unsigned vector;
331 333
332 atomic_t refcount; 334 atomic_t refcount;
333 struct completion free; 335 struct completion free;
@@ -437,7 +439,7 @@ void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres,
437 439
438int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, 440int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
439 struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, 441 struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
440 int collapsed); 442 unsigned vector, int collapsed);
441void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); 443void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq);
442 444
443int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base); 445int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base);