aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig5
-rw-r--r--drivers/infiniband/core/Makefile11
-rw-r--r--drivers/infiniband/core/addr.c367
-rw-r--r--drivers/infiniband/core/cache.c30
-rw-r--r--drivers/infiniband/core/cm.c119
-rw-r--r--drivers/infiniband/core/cma.c1927
-rw-r--r--drivers/infiniband/core/fmr_pool.c30
-rw-r--r--drivers/infiniband/core/mad.c97
-rw-r--r--drivers/infiniband/core/mad_priv.h2
-rw-r--r--drivers/infiniband/core/sa_query.c31
-rw-r--r--drivers/infiniband/core/ucm.c183
-rw-r--r--drivers/infiniband/core/uverbs.h4
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c971
-rw-r--r--drivers/infiniband/core/uverbs_main.c35
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c138
-rw-r--r--drivers/infiniband/core/verbs.c44
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c42
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c23
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c12
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c14
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c33
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c40
-rw-r--r--drivers/infiniband/hw/mthca/mthca_reset.c59
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c5
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h34
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c27
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c28
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c11
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c3
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c482
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h33
33 files changed, 3889 insertions, 958 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index afc612b8577d..ba2d6505e9a4 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -29,6 +29,11 @@ config INFINIBAND_USER_ACCESS
29 libibverbs, libibcm and a hardware driver library from 29 libibverbs, libibcm and a hardware driver library from
30 <http://www.openib.org>. 30 <http://www.openib.org>.
31 31
32config INFINIBAND_ADDR_TRANS
33 bool
34 depends on INFINIBAND && INET
35 default y
36
32source "drivers/infiniband/hw/mthca/Kconfig" 37source "drivers/infiniband/hw/mthca/Kconfig"
33source "drivers/infiniband/hw/ipath/Kconfig" 38source "drivers/infiniband/hw/ipath/Kconfig"
34 39
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index ec3353f24b27..68e73ec2d1f8 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -1,5 +1,7 @@
1infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o
2
1obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ 3obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \
2 ib_cm.o 4 ib_cm.o $(infiniband-y)
3obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o 5obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
4obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o 6obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o
5 7
@@ -12,8 +14,13 @@ ib_sa-y := sa_query.o
12 14
13ib_cm-y := cm.o 15ib_cm-y := cm.o
14 16
17rdma_cm-y := cma.o
18
19ib_addr-y := addr.o
20
15ib_umad-y := user_mad.o 21ib_umad-y := user_mad.o
16 22
17ib_ucm-y := ucm.o 23ib_ucm-y := ucm.o
18 24
19ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o 25ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \
26 uverbs_marshall.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
new file mode 100644
index 000000000000..d294bbc42f09
--- /dev/null
+++ b/drivers/infiniband/core/addr.c
@@ -0,0 +1,367 @@
1/*
2 * Copyright (c) 2005 Voltaire Inc. All rights reserved.
3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5 * Copyright (c) 2005 Intel Corporation. All rights reserved.
6 *
7 * This Software is licensed under one of the following licenses:
8 *
9 * 1) under the terms of the "Common Public License 1.0" a copy of which is
10 * available from the Open Source Initiative, see
11 * http://www.opensource.org/licenses/cpl.php.
12 *
13 * 2) under the terms of the "The BSD License" a copy of which is
14 * available from the Open Source Initiative, see
15 * http://www.opensource.org/licenses/bsd-license.php.
16 *
17 * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
18 * copy of which is available from the Open Source Initiative, see
19 * http://www.opensource.org/licenses/gpl-license.php.
20 *
21 * Licensee has the right to choose one of the above licenses.
22 *
23 * Redistributions of source code must retain the above copyright
24 * notice and one of the license notices.
25 *
26 * Redistributions in binary form must reproduce both the above copyright
27 * notice, one of the license notices in the documentation
28 * and/or other materials provided with the distribution.
29 */
30
31#include <linux/mutex.h>
32#include <linux/inetdevice.h>
33#include <linux/workqueue.h>
34#include <linux/if_arp.h>
35#include <net/arp.h>
36#include <net/neighbour.h>
37#include <net/route.h>
38#include <rdma/ib_addr.h>
39
40MODULE_AUTHOR("Sean Hefty");
41MODULE_DESCRIPTION("IB Address Translation");
42MODULE_LICENSE("Dual BSD/GPL");
43
44struct addr_req {
45 struct list_head list;
46 struct sockaddr src_addr;
47 struct sockaddr dst_addr;
48 struct rdma_dev_addr *addr;
49 void *context;
50 void (*callback)(int status, struct sockaddr *src_addr,
51 struct rdma_dev_addr *addr, void *context);
52 unsigned long timeout;
53 int status;
54};
55
56static void process_req(void *data);
57
58static DEFINE_MUTEX(lock);
59static LIST_HEAD(req_list);
60static DECLARE_WORK(work, process_req, NULL);
61static struct workqueue_struct *addr_wq;
62
63static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
64 unsigned char *dst_dev_addr)
65{
66 switch (dev->type) {
67 case ARPHRD_INFINIBAND:
68 dev_addr->dev_type = IB_NODE_CA;
69 break;
70 default:
71 return -EADDRNOTAVAIL;
72 }
73
74 memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
75 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
76 if (dst_dev_addr)
77 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
78 return 0;
79}
80
81int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
82{
83 struct net_device *dev;
84 u32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
85 int ret;
86
87 dev = ip_dev_find(ip);
88 if (!dev)
89 return -EADDRNOTAVAIL;
90
91 ret = copy_addr(dev_addr, dev, NULL);
92 dev_put(dev);
93 return ret;
94}
95EXPORT_SYMBOL(rdma_translate_ip);
96
97static void set_timeout(unsigned long time)
98{
99 unsigned long delay;
100
101 cancel_delayed_work(&work);
102
103 delay = time - jiffies;
104 if ((long)delay <= 0)
105 delay = 1;
106
107 queue_delayed_work(addr_wq, &work, delay);
108}
109
110static void queue_req(struct addr_req *req)
111{
112 struct addr_req *temp_req;
113
114 mutex_lock(&lock);
115 list_for_each_entry_reverse(temp_req, &req_list, list) {
116 if (time_after(req->timeout, temp_req->timeout))
117 break;
118 }
119
120 list_add(&req->list, &temp_req->list);
121
122 if (req_list.next == &req->list)
123 set_timeout(req->timeout);
124 mutex_unlock(&lock);
125}
126
127static void addr_send_arp(struct sockaddr_in *dst_in)
128{
129 struct rtable *rt;
130 struct flowi fl;
131 u32 dst_ip = dst_in->sin_addr.s_addr;
132
133 memset(&fl, 0, sizeof fl);
134 fl.nl_u.ip4_u.daddr = dst_ip;
135 if (ip_route_output_key(&rt, &fl))
136 return;
137
138 arp_send(ARPOP_REQUEST, ETH_P_ARP, rt->rt_gateway, rt->idev->dev,
139 rt->rt_src, NULL, rt->idev->dev->dev_addr, NULL);
140 ip_rt_put(rt);
141}
142
143static int addr_resolve_remote(struct sockaddr_in *src_in,
144 struct sockaddr_in *dst_in,
145 struct rdma_dev_addr *addr)
146{
147 u32 src_ip = src_in->sin_addr.s_addr;
148 u32 dst_ip = dst_in->sin_addr.s_addr;
149 struct flowi fl;
150 struct rtable *rt;
151 struct neighbour *neigh;
152 int ret;
153
154 memset(&fl, 0, sizeof fl);
155 fl.nl_u.ip4_u.daddr = dst_ip;
156 fl.nl_u.ip4_u.saddr = src_ip;
157 ret = ip_route_output_key(&rt, &fl);
158 if (ret)
159 goto out;
160
161 /* If the device does ARP internally, return 'done' */
162 if (rt->idev->dev->flags & IFF_NOARP) {
163 copy_addr(addr, rt->idev->dev, NULL);
164 goto put;
165 }
166
167 neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
168 if (!neigh) {
169 ret = -ENODATA;
170 goto put;
171 }
172
173 if (!(neigh->nud_state & NUD_VALID)) {
174 ret = -ENODATA;
175 goto release;
176 }
177
178 if (!src_ip) {
179 src_in->sin_family = dst_in->sin_family;
180 src_in->sin_addr.s_addr = rt->rt_src;
181 }
182
183 ret = copy_addr(addr, neigh->dev, neigh->ha);
184release:
185 neigh_release(neigh);
186put:
187 ip_rt_put(rt);
188out:
189 return ret;
190}
191
192static void process_req(void *data)
193{
194 struct addr_req *req, *temp_req;
195 struct sockaddr_in *src_in, *dst_in;
196 struct list_head done_list;
197
198 INIT_LIST_HEAD(&done_list);
199
200 mutex_lock(&lock);
201 list_for_each_entry_safe(req, temp_req, &req_list, list) {
202 if (req->status) {
203 src_in = (struct sockaddr_in *) &req->src_addr;
204 dst_in = (struct sockaddr_in *) &req->dst_addr;
205 req->status = addr_resolve_remote(src_in, dst_in,
206 req->addr);
207 }
208 if (req->status && time_after(jiffies, req->timeout))
209 req->status = -ETIMEDOUT;
210 else if (req->status == -ENODATA)
211 continue;
212
213 list_del(&req->list);
214 list_add_tail(&req->list, &done_list);
215 }
216
217 if (!list_empty(&req_list)) {
218 req = list_entry(req_list.next, struct addr_req, list);
219 set_timeout(req->timeout);
220 }
221 mutex_unlock(&lock);
222
223 list_for_each_entry_safe(req, temp_req, &done_list, list) {
224 list_del(&req->list);
225 req->callback(req->status, &req->src_addr, req->addr,
226 req->context);
227 kfree(req);
228 }
229}
230
231static int addr_resolve_local(struct sockaddr_in *src_in,
232 struct sockaddr_in *dst_in,
233 struct rdma_dev_addr *addr)
234{
235 struct net_device *dev;
236 u32 src_ip = src_in->sin_addr.s_addr;
237 u32 dst_ip = dst_in->sin_addr.s_addr;
238 int ret;
239
240 dev = ip_dev_find(dst_ip);
241 if (!dev)
242 return -EADDRNOTAVAIL;
243
244 if (ZERONET(src_ip)) {
245 src_in->sin_family = dst_in->sin_family;
246 src_in->sin_addr.s_addr = dst_ip;
247 ret = copy_addr(addr, dev, dev->dev_addr);
248 } else if (LOOPBACK(src_ip)) {
249 ret = rdma_translate_ip((struct sockaddr *)dst_in, addr);
250 if (!ret)
251 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
252 } else {
253 ret = rdma_translate_ip((struct sockaddr *)src_in, addr);
254 if (!ret)
255 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
256 }
257
258 dev_put(dev);
259 return ret;
260}
261
262int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
263 struct rdma_dev_addr *addr, int timeout_ms,
264 void (*callback)(int status, struct sockaddr *src_addr,
265 struct rdma_dev_addr *addr, void *context),
266 void *context)
267{
268 struct sockaddr_in *src_in, *dst_in;
269 struct addr_req *req;
270 int ret = 0;
271
272 req = kmalloc(sizeof *req, GFP_KERNEL);
273 if (!req)
274 return -ENOMEM;
275 memset(req, 0, sizeof *req);
276
277 if (src_addr)
278 memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr));
279 memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr));
280 req->addr = addr;
281 req->callback = callback;
282 req->context = context;
283
284 src_in = (struct sockaddr_in *) &req->src_addr;
285 dst_in = (struct sockaddr_in *) &req->dst_addr;
286
287 req->status = addr_resolve_local(src_in, dst_in, addr);
288 if (req->status == -EADDRNOTAVAIL)
289 req->status = addr_resolve_remote(src_in, dst_in, addr);
290
291 switch (req->status) {
292 case 0:
293 req->timeout = jiffies;
294 queue_req(req);
295 break;
296 case -ENODATA:
297 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
298 queue_req(req);
299 addr_send_arp(dst_in);
300 break;
301 default:
302 ret = req->status;
303 kfree(req);
304 break;
305 }
306 return ret;
307}
308EXPORT_SYMBOL(rdma_resolve_ip);
309
310void rdma_addr_cancel(struct rdma_dev_addr *addr)
311{
312 struct addr_req *req, *temp_req;
313
314 mutex_lock(&lock);
315 list_for_each_entry_safe(req, temp_req, &req_list, list) {
316 if (req->addr == addr) {
317 req->status = -ECANCELED;
318 req->timeout = jiffies;
319 list_del(&req->list);
320 list_add(&req->list, &req_list);
321 set_timeout(req->timeout);
322 break;
323 }
324 }
325 mutex_unlock(&lock);
326}
327EXPORT_SYMBOL(rdma_addr_cancel);
328
329static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev,
330 struct packet_type *pkt, struct net_device *orig_dev)
331{
332 struct arphdr *arp_hdr;
333
334 arp_hdr = (struct arphdr *) skb->nh.raw;
335
336 if (arp_hdr->ar_op == htons(ARPOP_REQUEST) ||
337 arp_hdr->ar_op == htons(ARPOP_REPLY))
338 set_timeout(jiffies);
339
340 kfree_skb(skb);
341 return 0;
342}
343
344static struct packet_type addr_arp = {
345 .type = __constant_htons(ETH_P_ARP),
346 .func = addr_arp_recv,
347 .af_packet_priv = (void*) 1,
348};
349
350static int addr_init(void)
351{
352 addr_wq = create_singlethread_workqueue("ib_addr_wq");
353 if (!addr_wq)
354 return -ENOMEM;
355
356 dev_add_pack(&addr_arp);
357 return 0;
358}
359
360static void addr_cleanup(void)
361{
362 dev_remove_pack(&addr_arp);
363 destroy_workqueue(addr_wq);
364}
365
366module_init(addr_init);
367module_exit(addr_cleanup);
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 50364c0b090c..e05ca2cdc73f 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -191,6 +191,24 @@ int ib_find_cached_pkey(struct ib_device *device,
191} 191}
192EXPORT_SYMBOL(ib_find_cached_pkey); 192EXPORT_SYMBOL(ib_find_cached_pkey);
193 193
194int ib_get_cached_lmc(struct ib_device *device,
195 u8 port_num,
196 u8 *lmc)
197{
198 unsigned long flags;
199 int ret = 0;
200
201 if (port_num < start_port(device) || port_num > end_port(device))
202 return -EINVAL;
203
204 read_lock_irqsave(&device->cache.lock, flags);
205 *lmc = device->cache.lmc_cache[port_num - start_port(device)];
206 read_unlock_irqrestore(&device->cache.lock, flags);
207
208 return ret;
209}
210EXPORT_SYMBOL(ib_get_cached_lmc);
211
194static void ib_cache_update(struct ib_device *device, 212static void ib_cache_update(struct ib_device *device,
195 u8 port) 213 u8 port)
196{ 214{
@@ -251,6 +269,8 @@ static void ib_cache_update(struct ib_device *device,
251 device->cache.pkey_cache[port - start_port(device)] = pkey_cache; 269 device->cache.pkey_cache[port - start_port(device)] = pkey_cache;
252 device->cache.gid_cache [port - start_port(device)] = gid_cache; 270 device->cache.gid_cache [port - start_port(device)] = gid_cache;
253 271
272 device->cache.lmc_cache[port - start_port(device)] = tprops->lmc;
273
254 write_unlock_irq(&device->cache.lock); 274 write_unlock_irq(&device->cache.lock);
255 275
256 kfree(old_pkey_cache); 276 kfree(old_pkey_cache);
@@ -305,7 +325,13 @@ static void ib_cache_setup_one(struct ib_device *device)
305 kmalloc(sizeof *device->cache.gid_cache * 325 kmalloc(sizeof *device->cache.gid_cache *
306 (end_port(device) - start_port(device) + 1), GFP_KERNEL); 326 (end_port(device) - start_port(device) + 1), GFP_KERNEL);
307 327
308 if (!device->cache.pkey_cache || !device->cache.gid_cache) { 328 device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache *
329 (end_port(device) -
330 start_port(device) + 1),
331 GFP_KERNEL);
332
333 if (!device->cache.pkey_cache || !device->cache.gid_cache ||
334 !device->cache.lmc_cache) {
309 printk(KERN_WARNING "Couldn't allocate cache " 335 printk(KERN_WARNING "Couldn't allocate cache "
310 "for %s\n", device->name); 336 "for %s\n", device->name);
311 goto err; 337 goto err;
@@ -333,6 +359,7 @@ err_cache:
333err: 359err:
334 kfree(device->cache.pkey_cache); 360 kfree(device->cache.pkey_cache);
335 kfree(device->cache.gid_cache); 361 kfree(device->cache.gid_cache);
362 kfree(device->cache.lmc_cache);
336} 363}
337 364
338static void ib_cache_cleanup_one(struct ib_device *device) 365static void ib_cache_cleanup_one(struct ib_device *device)
@@ -349,6 +376,7 @@ static void ib_cache_cleanup_one(struct ib_device *device)
349 376
350 kfree(device->cache.pkey_cache); 377 kfree(device->cache.pkey_cache);
351 kfree(device->cache.gid_cache); 378 kfree(device->cache.gid_cache);
379 kfree(device->cache.lmc_cache);
352} 380}
353 381
354static struct ib_client cache_client = { 382static struct ib_client cache_client = {
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 86fee43502cd..450adfe0a4f1 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -32,7 +32,7 @@
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE. 33 * SOFTWARE.
34 * 34 *
35 * $Id: cm.c 2821 2005-07-08 17:07:28Z sean.hefty $ 35 * $Id: cm.c 4311 2005-12-05 18:42:01Z sean.hefty $
36 */ 36 */
37 37
38#include <linux/completion.h> 38#include <linux/completion.h>
@@ -132,6 +132,7 @@ struct cm_id_private {
132 /* todo: use alternate port on send failure */ 132 /* todo: use alternate port on send failure */
133 struct cm_av av; 133 struct cm_av av;
134 struct cm_av alt_av; 134 struct cm_av alt_av;
135 struct ib_cm_compare_data *compare_data;
135 136
136 void *private_data; 137 void *private_data;
137 __be64 tid; 138 __be64 tid;
@@ -253,23 +254,13 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv,
253 cm_id_priv->private_data_len = private_data_len; 254 cm_id_priv->private_data_len = private_data_len;
254} 255}
255 256
256static void cm_set_ah_attr(struct ib_ah_attr *ah_attr, u8 port_num, 257static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
257 u16 dlid, u8 sl, u16 src_path_bits) 258 struct ib_grh *grh, struct cm_av *av)
258{
259 memset(ah_attr, 0, sizeof ah_attr);
260 ah_attr->dlid = dlid;
261 ah_attr->sl = sl;
262 ah_attr->src_path_bits = src_path_bits;
263 ah_attr->port_num = port_num;
264}
265
266static void cm_init_av_for_response(struct cm_port *port,
267 struct ib_wc *wc, struct cm_av *av)
268{ 259{
269 av->port = port; 260 av->port = port;
270 av->pkey_index = wc->pkey_index; 261 av->pkey_index = wc->pkey_index;
271 cm_set_ah_attr(&av->ah_attr, port->port_num, wc->slid, 262 ib_init_ah_from_wc(port->cm_dev->device, port->port_num, wc,
272 wc->sl, wc->dlid_path_bits); 263 grh, &av->ah_attr);
273} 264}
274 265
275static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) 266static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
@@ -299,9 +290,8 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
299 return ret; 290 return ret;
300 291
301 av->port = port; 292 av->port = port;
302 cm_set_ah_attr(&av->ah_attr, av->port->port_num, 293 ib_init_ah_from_path(cm_dev->device, port->port_num, path,
303 be16_to_cpu(path->dlid), path->sl, 294 &av->ah_attr);
304 be16_to_cpu(path->slid) & 0x7F);
305 av->packet_life_time = path->packet_life_time; 295 av->packet_life_time = path->packet_life_time;
306 return 0; 296 return 0;
307} 297}
@@ -357,6 +347,41 @@ static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
357 return cm_id_priv; 347 return cm_id_priv;
358} 348}
359 349
350static void cm_mask_copy(u8 *dst, u8 *src, u8 *mask)
351{
352 int i;
353
354 for (i = 0; i < IB_CM_COMPARE_SIZE / sizeof(unsigned long); i++)
355 ((unsigned long *) dst)[i] = ((unsigned long *) src)[i] &
356 ((unsigned long *) mask)[i];
357}
358
359static int cm_compare_data(struct ib_cm_compare_data *src_data,
360 struct ib_cm_compare_data *dst_data)
361{
362 u8 src[IB_CM_COMPARE_SIZE];
363 u8 dst[IB_CM_COMPARE_SIZE];
364
365 if (!src_data || !dst_data)
366 return 0;
367
368 cm_mask_copy(src, src_data->data, dst_data->mask);
369 cm_mask_copy(dst, dst_data->data, src_data->mask);
370 return memcmp(src, dst, IB_CM_COMPARE_SIZE);
371}
372
373static int cm_compare_private_data(u8 *private_data,
374 struct ib_cm_compare_data *dst_data)
375{
376 u8 src[IB_CM_COMPARE_SIZE];
377
378 if (!dst_data)
379 return 0;
380
381 cm_mask_copy(src, private_data, dst_data->mask);
382 return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE);
383}
384
360static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) 385static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
361{ 386{
362 struct rb_node **link = &cm.listen_service_table.rb_node; 387 struct rb_node **link = &cm.listen_service_table.rb_node;
@@ -364,14 +389,18 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
364 struct cm_id_private *cur_cm_id_priv; 389 struct cm_id_private *cur_cm_id_priv;
365 __be64 service_id = cm_id_priv->id.service_id; 390 __be64 service_id = cm_id_priv->id.service_id;
366 __be64 service_mask = cm_id_priv->id.service_mask; 391 __be64 service_mask = cm_id_priv->id.service_mask;
392 int data_cmp;
367 393
368 while (*link) { 394 while (*link) {
369 parent = *link; 395 parent = *link;
370 cur_cm_id_priv = rb_entry(parent, struct cm_id_private, 396 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
371 service_node); 397 service_node);
398 data_cmp = cm_compare_data(cm_id_priv->compare_data,
399 cur_cm_id_priv->compare_data);
372 if ((cur_cm_id_priv->id.service_mask & service_id) == 400 if ((cur_cm_id_priv->id.service_mask & service_id) ==
373 (service_mask & cur_cm_id_priv->id.service_id) && 401 (service_mask & cur_cm_id_priv->id.service_id) &&
374 (cm_id_priv->id.device == cur_cm_id_priv->id.device)) 402 (cm_id_priv->id.device == cur_cm_id_priv->id.device) &&
403 !data_cmp)
375 return cur_cm_id_priv; 404 return cur_cm_id_priv;
376 405
377 if (cm_id_priv->id.device < cur_cm_id_priv->id.device) 406 if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
@@ -380,6 +409,10 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
380 link = &(*link)->rb_right; 409 link = &(*link)->rb_right;
381 else if (service_id < cur_cm_id_priv->id.service_id) 410 else if (service_id < cur_cm_id_priv->id.service_id)
382 link = &(*link)->rb_left; 411 link = &(*link)->rb_left;
412 else if (service_id > cur_cm_id_priv->id.service_id)
413 link = &(*link)->rb_right;
414 else if (data_cmp < 0)
415 link = &(*link)->rb_left;
383 else 416 else
384 link = &(*link)->rb_right; 417 link = &(*link)->rb_right;
385 } 418 }
@@ -389,16 +422,20 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
389} 422}
390 423
391static struct cm_id_private * cm_find_listen(struct ib_device *device, 424static struct cm_id_private * cm_find_listen(struct ib_device *device,
392 __be64 service_id) 425 __be64 service_id,
426 u8 *private_data)
393{ 427{
394 struct rb_node *node = cm.listen_service_table.rb_node; 428 struct rb_node *node = cm.listen_service_table.rb_node;
395 struct cm_id_private *cm_id_priv; 429 struct cm_id_private *cm_id_priv;
430 int data_cmp;
396 431
397 while (node) { 432 while (node) {
398 cm_id_priv = rb_entry(node, struct cm_id_private, service_node); 433 cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
434 data_cmp = cm_compare_private_data(private_data,
435 cm_id_priv->compare_data);
399 if ((cm_id_priv->id.service_mask & service_id) == 436 if ((cm_id_priv->id.service_mask & service_id) ==
400 cm_id_priv->id.service_id && 437 cm_id_priv->id.service_id &&
401 (cm_id_priv->id.device == device)) 438 (cm_id_priv->id.device == device) && !data_cmp)
402 return cm_id_priv; 439 return cm_id_priv;
403 440
404 if (device < cm_id_priv->id.device) 441 if (device < cm_id_priv->id.device)
@@ -407,6 +444,10 @@ static struct cm_id_private * cm_find_listen(struct ib_device *device,
407 node = node->rb_right; 444 node = node->rb_right;
408 else if (service_id < cm_id_priv->id.service_id) 445 else if (service_id < cm_id_priv->id.service_id)
409 node = node->rb_left; 446 node = node->rb_left;
447 else if (service_id > cm_id_priv->id.service_id)
448 node = node->rb_right;
449 else if (data_cmp < 0)
450 node = node->rb_left;
410 else 451 else
411 node = node->rb_right; 452 node = node->rb_right;
412 } 453 }
@@ -730,15 +771,14 @@ retest:
730 wait_for_completion(&cm_id_priv->comp); 771 wait_for_completion(&cm_id_priv->comp);
731 while ((work = cm_dequeue_work(cm_id_priv)) != NULL) 772 while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
732 cm_free_work(work); 773 cm_free_work(work);
733 if (cm_id_priv->private_data && cm_id_priv->private_data_len) 774 kfree(cm_id_priv->compare_data);
734 kfree(cm_id_priv->private_data); 775 kfree(cm_id_priv->private_data);
735 kfree(cm_id_priv); 776 kfree(cm_id_priv);
736} 777}
737EXPORT_SYMBOL(ib_destroy_cm_id); 778EXPORT_SYMBOL(ib_destroy_cm_id);
738 779
739int ib_cm_listen(struct ib_cm_id *cm_id, 780int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
740 __be64 service_id, 781 struct ib_cm_compare_data *compare_data)
741 __be64 service_mask)
742{ 782{
743 struct cm_id_private *cm_id_priv, *cur_cm_id_priv; 783 struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
744 unsigned long flags; 784 unsigned long flags;
@@ -752,7 +792,19 @@ int ib_cm_listen(struct ib_cm_id *cm_id,
752 return -EINVAL; 792 return -EINVAL;
753 793
754 cm_id_priv = container_of(cm_id, struct cm_id_private, id); 794 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
755 BUG_ON(cm_id->state != IB_CM_IDLE); 795 if (cm_id->state != IB_CM_IDLE)
796 return -EINVAL;
797
798 if (compare_data) {
799 cm_id_priv->compare_data = kzalloc(sizeof *compare_data,
800 GFP_KERNEL);
801 if (!cm_id_priv->compare_data)
802 return -ENOMEM;
803 cm_mask_copy(cm_id_priv->compare_data->data,
804 compare_data->data, compare_data->mask);
805 memcpy(cm_id_priv->compare_data->mask, compare_data->mask,
806 IB_CM_COMPARE_SIZE);
807 }
756 808
757 cm_id->state = IB_CM_LISTEN; 809 cm_id->state = IB_CM_LISTEN;
758 810
@@ -769,6 +821,8 @@ int ib_cm_listen(struct ib_cm_id *cm_id,
769 821
770 if (cur_cm_id_priv) { 822 if (cur_cm_id_priv) {
771 cm_id->state = IB_CM_IDLE; 823 cm_id->state = IB_CM_IDLE;
824 kfree(cm_id_priv->compare_data);
825 cm_id_priv->compare_data = NULL;
772 ret = -EBUSY; 826 ret = -EBUSY;
773 } 827 }
774 return ret; 828 return ret;
@@ -1241,7 +1295,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
1241 1295
1242 /* Find matching listen request. */ 1296 /* Find matching listen request. */
1243 listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device, 1297 listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
1244 req_msg->service_id); 1298 req_msg->service_id,
1299 req_msg->private_data);
1245 if (!listen_cm_id_priv) { 1300 if (!listen_cm_id_priv) {
1246 spin_unlock_irqrestore(&cm.lock, flags); 1301 spin_unlock_irqrestore(&cm.lock, flags);
1247 cm_issue_rej(work->port, work->mad_recv_wc, 1302 cm_issue_rej(work->port, work->mad_recv_wc,
@@ -1276,6 +1331,7 @@ static int cm_req_handler(struct cm_work *work)
1276 cm_id_priv = container_of(cm_id, struct cm_id_private, id); 1331 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1277 cm_id_priv->id.remote_id = req_msg->local_comm_id; 1332 cm_id_priv->id.remote_id = req_msg->local_comm_id;
1278 cm_init_av_for_response(work->port, work->mad_recv_wc->wc, 1333 cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
1334 work->mad_recv_wc->recv_buf.grh,
1279 &cm_id_priv->av); 1335 &cm_id_priv->av);
1280 cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> 1336 cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1281 id.local_id); 1337 id.local_id);
@@ -2549,7 +2605,7 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
2549 cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID, 2605 cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
2550 cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR)); 2606 cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
2551 sidr_req_msg->request_id = cm_id_priv->id.local_id; 2607 sidr_req_msg->request_id = cm_id_priv->id.local_id;
2552 sidr_req_msg->pkey = cpu_to_be16(param->pkey); 2608 sidr_req_msg->pkey = cpu_to_be16(param->path->pkey);
2553 sidr_req_msg->service_id = param->service_id; 2609 sidr_req_msg->service_id = param->service_id;
2554 2610
2555 if (param->private_data && param->private_data_len) 2611 if (param->private_data && param->private_data_len)
@@ -2641,6 +2697,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
2641 cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid); 2697 cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
2642 cm_id_priv->av.dgid.global.interface_id = 0; 2698 cm_id_priv->av.dgid.global.interface_id = 0;
2643 cm_init_av_for_response(work->port, work->mad_recv_wc->wc, 2699 cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
2700 work->mad_recv_wc->recv_buf.grh,
2644 &cm_id_priv->av); 2701 &cm_id_priv->av);
2645 cm_id_priv->id.remote_id = sidr_req_msg->request_id; 2702 cm_id_priv->id.remote_id = sidr_req_msg->request_id;
2646 cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; 2703 cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
@@ -2654,7 +2711,8 @@ static int cm_sidr_req_handler(struct cm_work *work)
2654 goto out; /* Duplicate message. */ 2711 goto out; /* Duplicate message. */
2655 } 2712 }
2656 cur_cm_id_priv = cm_find_listen(cm_id->device, 2713 cur_cm_id_priv = cm_find_listen(cm_id->device,
2657 sidr_req_msg->service_id); 2714 sidr_req_msg->service_id,
2715 sidr_req_msg->private_data);
2658 if (!cur_cm_id_priv) { 2716 if (!cur_cm_id_priv) {
2659 rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); 2717 rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
2660 spin_unlock_irqrestore(&cm.lock, flags); 2718 spin_unlock_irqrestore(&cm.lock, flags);
@@ -3291,7 +3349,6 @@ error:
3291 3349
3292static void __exit ib_cm_cleanup(void) 3350static void __exit ib_cm_cleanup(void)
3293{ 3351{
3294 flush_workqueue(cm.wq);
3295 destroy_workqueue(cm.wq); 3352 destroy_workqueue(cm.wq);
3296 ib_unregister_client(&cm_client); 3353 ib_unregister_client(&cm_client);
3297 idr_destroy(&cm.local_id_table); 3354 idr_destroy(&cm.local_id_table);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
new file mode 100644
index 000000000000..a76834edf608
--- /dev/null
+++ b/drivers/infiniband/core/cma.c
@@ -0,0 +1,1927 @@
1/*
2 * Copyright (c) 2005 Voltaire Inc. All rights reserved.
3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
6 *
7 * This Software is licensed under one of the following licenses:
8 *
9 * 1) under the terms of the "Common Public License 1.0" a copy of which is
10 * available from the Open Source Initiative, see
11 * http://www.opensource.org/licenses/cpl.php.
12 *
13 * 2) under the terms of the "The BSD License" a copy of which is
14 * available from the Open Source Initiative, see
15 * http://www.opensource.org/licenses/bsd-license.php.
16 *
17 * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
18 * copy of which is available from the Open Source Initiative, see
19 * http://www.opensource.org/licenses/gpl-license.php.
20 *
21 * Licensee has the right to choose one of the above licenses.
22 *
23 * Redistributions of source code must retain the above copyright
24 * notice and one of the license notices.
25 *
26 * Redistributions in binary form must reproduce both the above copyright
27 * notice, one of the license notices in the documentation
28 * and/or other materials provided with the distribution.
29 *
30 */
31
32#include <linux/completion.h>
33#include <linux/in.h>
34#include <linux/in6.h>
35#include <linux/mutex.h>
36#include <linux/random.h>
37#include <linux/idr.h>
38
39#include <net/tcp.h>
40
41#include <rdma/rdma_cm.h>
42#include <rdma/rdma_cm_ib.h>
43#include <rdma/ib_cache.h>
44#include <rdma/ib_cm.h>
45#include <rdma/ib_sa.h>
46
47MODULE_AUTHOR("Sean Hefty");
48MODULE_DESCRIPTION("Generic RDMA CM Agent");
49MODULE_LICENSE("Dual BSD/GPL");
50
51#define CMA_CM_RESPONSE_TIMEOUT 20
52#define CMA_MAX_CM_RETRIES 3
53
54static void cma_add_one(struct ib_device *device);
55static void cma_remove_one(struct ib_device *device);
56
57static struct ib_client cma_client = {
58 .name = "cma",
59 .add = cma_add_one,
60 .remove = cma_remove_one
61};
62
63static LIST_HEAD(dev_list);
64static LIST_HEAD(listen_any_list);
65static DEFINE_MUTEX(lock);
66static struct workqueue_struct *cma_wq;
67static DEFINE_IDR(sdp_ps);
68static DEFINE_IDR(tcp_ps);
69
70struct cma_device {
71 struct list_head list;
72 struct ib_device *device;
73 __be64 node_guid;
74 struct completion comp;
75 atomic_t refcount;
76 struct list_head id_list;
77};
78
79enum cma_state {
80 CMA_IDLE,
81 CMA_ADDR_QUERY,
82 CMA_ADDR_RESOLVED,
83 CMA_ROUTE_QUERY,
84 CMA_ROUTE_RESOLVED,
85 CMA_CONNECT,
86 CMA_DISCONNECT,
87 CMA_ADDR_BOUND,
88 CMA_LISTEN,
89 CMA_DEVICE_REMOVAL,
90 CMA_DESTROYING
91};
92
93struct rdma_bind_list {
94 struct idr *ps;
95 struct hlist_head owners;
96 unsigned short port;
97};
98
99/*
100 * Device removal can occur at anytime, so we need extra handling to
101 * serialize notifying the user of device removal with other callbacks.
102 * We do this by disabling removal notification while a callback is in process,
103 * and reporting it after the callback completes.
104 */
105struct rdma_id_private {
106 struct rdma_cm_id id;
107
108 struct rdma_bind_list *bind_list;
109 struct hlist_node node;
110 struct list_head list;
111 struct list_head listen_list;
112 struct cma_device *cma_dev;
113
114 enum cma_state state;
115 spinlock_t lock;
116 struct completion comp;
117 atomic_t refcount;
118 wait_queue_head_t wait_remove;
119 atomic_t dev_remove;
120
121 int backlog;
122 int timeout_ms;
123 struct ib_sa_query *query;
124 int query_id;
125 union {
126 struct ib_cm_id *ib;
127 } cm_id;
128
129 u32 seq_num;
130 u32 qp_num;
131 enum ib_qp_type qp_type;
132 u8 srq;
133};
134
135struct cma_work {
136 struct work_struct work;
137 struct rdma_id_private *id;
138 enum cma_state old_state;
139 enum cma_state new_state;
140 struct rdma_cm_event event;
141};
142
143union cma_ip_addr {
144 struct in6_addr ip6;
145 struct {
146 __u32 pad[3];
147 __u32 addr;
148 } ip4;
149};
150
151struct cma_hdr {
152 u8 cma_version;
153 u8 ip_version; /* IP version: 7:4 */
154 __u16 port;
155 union cma_ip_addr src_addr;
156 union cma_ip_addr dst_addr;
157};
158
159struct sdp_hh {
160 u8 bsdh[16];
161 u8 sdp_version; /* Major version: 7:4 */
162 u8 ip_version; /* IP version: 7:4 */
163 u8 sdp_specific1[10];
164 __u16 port;
165 __u16 sdp_specific2;
166 union cma_ip_addr src_addr;
167 union cma_ip_addr dst_addr;
168};
169
170struct sdp_hah {
171 u8 bsdh[16];
172 u8 sdp_version;
173};
174
175#define CMA_VERSION 0x00
176#define SDP_MAJ_VERSION 0x2
177
178static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
179{
180 unsigned long flags;
181 int ret;
182
183 spin_lock_irqsave(&id_priv->lock, flags);
184 ret = (id_priv->state == comp);
185 spin_unlock_irqrestore(&id_priv->lock, flags);
186 return ret;
187}
188
189static int cma_comp_exch(struct rdma_id_private *id_priv,
190 enum cma_state comp, enum cma_state exch)
191{
192 unsigned long flags;
193 int ret;
194
195 spin_lock_irqsave(&id_priv->lock, flags);
196 if ((ret = (id_priv->state == comp)))
197 id_priv->state = exch;
198 spin_unlock_irqrestore(&id_priv->lock, flags);
199 return ret;
200}
201
202static enum cma_state cma_exch(struct rdma_id_private *id_priv,
203 enum cma_state exch)
204{
205 unsigned long flags;
206 enum cma_state old;
207
208 spin_lock_irqsave(&id_priv->lock, flags);
209 old = id_priv->state;
210 id_priv->state = exch;
211 spin_unlock_irqrestore(&id_priv->lock, flags);
212 return old;
213}
214
215static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
216{
217 return hdr->ip_version >> 4;
218}
219
220static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
221{
222 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
223}
224
225static inline u8 sdp_get_majv(u8 sdp_version)
226{
227 return sdp_version >> 4;
228}
229
230static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
231{
232 return hh->ip_version >> 4;
233}
234
235static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
236{
237 hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
238}
239
240static void cma_attach_to_dev(struct rdma_id_private *id_priv,
241 struct cma_device *cma_dev)
242{
243 atomic_inc(&cma_dev->refcount);
244 id_priv->cma_dev = cma_dev;
245 id_priv->id.device = cma_dev->device;
246 list_add_tail(&id_priv->list, &cma_dev->id_list);
247}
248
249static inline void cma_deref_dev(struct cma_device *cma_dev)
250{
251 if (atomic_dec_and_test(&cma_dev->refcount))
252 complete(&cma_dev->comp);
253}
254
255static void cma_detach_from_dev(struct rdma_id_private *id_priv)
256{
257 list_del(&id_priv->list);
258 cma_deref_dev(id_priv->cma_dev);
259 id_priv->cma_dev = NULL;
260}
261
262static int cma_acquire_ib_dev(struct rdma_id_private *id_priv)
263{
264 struct cma_device *cma_dev;
265 union ib_gid *gid;
266 int ret = -ENODEV;
267
268 gid = ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr);
269
270 mutex_lock(&lock);
271 list_for_each_entry(cma_dev, &dev_list, list) {
272 ret = ib_find_cached_gid(cma_dev->device, gid,
273 &id_priv->id.port_num, NULL);
274 if (!ret) {
275 cma_attach_to_dev(id_priv, cma_dev);
276 break;
277 }
278 }
279 mutex_unlock(&lock);
280 return ret;
281}
282
283static int cma_acquire_dev(struct rdma_id_private *id_priv)
284{
285 switch (id_priv->id.route.addr.dev_addr.dev_type) {
286 case IB_NODE_CA:
287 return cma_acquire_ib_dev(id_priv);
288 default:
289 return -ENODEV;
290 }
291}
292
293static void cma_deref_id(struct rdma_id_private *id_priv)
294{
295 if (atomic_dec_and_test(&id_priv->refcount))
296 complete(&id_priv->comp);
297}
298
299static void cma_release_remove(struct rdma_id_private *id_priv)
300{
301 if (atomic_dec_and_test(&id_priv->dev_remove))
302 wake_up(&id_priv->wait_remove);
303}
304
305struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
306 void *context, enum rdma_port_space ps)
307{
308 struct rdma_id_private *id_priv;
309
310 id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
311 if (!id_priv)
312 return ERR_PTR(-ENOMEM);
313
314 id_priv->state = CMA_IDLE;
315 id_priv->id.context = context;
316 id_priv->id.event_handler = event_handler;
317 id_priv->id.ps = ps;
318 spin_lock_init(&id_priv->lock);
319 init_completion(&id_priv->comp);
320 atomic_set(&id_priv->refcount, 1);
321 init_waitqueue_head(&id_priv->wait_remove);
322 atomic_set(&id_priv->dev_remove, 0);
323 INIT_LIST_HEAD(&id_priv->listen_list);
324 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
325
326 return &id_priv->id;
327}
328EXPORT_SYMBOL(rdma_create_id);
329
330static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
331{
332 struct ib_qp_attr qp_attr;
333 struct rdma_dev_addr *dev_addr;
334 int ret;
335
336 dev_addr = &id_priv->id.route.addr.dev_addr;
337 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
338 ib_addr_get_pkey(dev_addr),
339 &qp_attr.pkey_index);
340 if (ret)
341 return ret;
342
343 qp_attr.qp_state = IB_QPS_INIT;
344 qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
345 qp_attr.port_num = id_priv->id.port_num;
346 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS |
347 IB_QP_PKEY_INDEX | IB_QP_PORT);
348}
349
350int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
351 struct ib_qp_init_attr *qp_init_attr)
352{
353 struct rdma_id_private *id_priv;
354 struct ib_qp *qp;
355 int ret;
356
357 id_priv = container_of(id, struct rdma_id_private, id);
358 if (id->device != pd->device)
359 return -EINVAL;
360
361 qp = ib_create_qp(pd, qp_init_attr);
362 if (IS_ERR(qp))
363 return PTR_ERR(qp);
364
365 switch (id->device->node_type) {
366 case IB_NODE_CA:
367 ret = cma_init_ib_qp(id_priv, qp);
368 break;
369 default:
370 ret = -ENOSYS;
371 break;
372 }
373
374 if (ret)
375 goto err;
376
377 id->qp = qp;
378 id_priv->qp_num = qp->qp_num;
379 id_priv->qp_type = qp->qp_type;
380 id_priv->srq = (qp->srq != NULL);
381 return 0;
382err:
383 ib_destroy_qp(qp);
384 return ret;
385}
386EXPORT_SYMBOL(rdma_create_qp);
387
388void rdma_destroy_qp(struct rdma_cm_id *id)
389{
390 ib_destroy_qp(id->qp);
391}
392EXPORT_SYMBOL(rdma_destroy_qp);
393
394static int cma_modify_qp_rtr(struct rdma_cm_id *id)
395{
396 struct ib_qp_attr qp_attr;
397 int qp_attr_mask, ret;
398
399 if (!id->qp)
400 return 0;
401
402 /* Need to update QP attributes from default values. */
403 qp_attr.qp_state = IB_QPS_INIT;
404 ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
405 if (ret)
406 return ret;
407
408 ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
409 if (ret)
410 return ret;
411
412 qp_attr.qp_state = IB_QPS_RTR;
413 ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
414 if (ret)
415 return ret;
416
417 return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
418}
419
420static int cma_modify_qp_rts(struct rdma_cm_id *id)
421{
422 struct ib_qp_attr qp_attr;
423 int qp_attr_mask, ret;
424
425 if (!id->qp)
426 return 0;
427
428 qp_attr.qp_state = IB_QPS_RTS;
429 ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
430 if (ret)
431 return ret;
432
433 return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
434}
435
436static int cma_modify_qp_err(struct rdma_cm_id *id)
437{
438 struct ib_qp_attr qp_attr;
439
440 if (!id->qp)
441 return 0;
442
443 qp_attr.qp_state = IB_QPS_ERR;
444 return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE);
445}
446
447int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
448 int *qp_attr_mask)
449{
450 struct rdma_id_private *id_priv;
451 int ret;
452
453 id_priv = container_of(id, struct rdma_id_private, id);
454 switch (id_priv->id.device->node_type) {
455 case IB_NODE_CA:
456 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
457 qp_attr_mask);
458 if (qp_attr->qp_state == IB_QPS_RTR)
459 qp_attr->rq_psn = id_priv->seq_num;
460 break;
461 default:
462 ret = -ENOSYS;
463 break;
464 }
465
466 return ret;
467}
468EXPORT_SYMBOL(rdma_init_qp_attr);
469
470static inline int cma_zero_addr(struct sockaddr *addr)
471{
472 struct in6_addr *ip6;
473
474 if (addr->sa_family == AF_INET)
475 return ZERONET(((struct sockaddr_in *) addr)->sin_addr.s_addr);
476 else {
477 ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
478 return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
479 ip6->s6_addr32[3] | ip6->s6_addr32[4]) == 0;
480 }
481}
482
483static inline int cma_loopback_addr(struct sockaddr *addr)
484{
485 return LOOPBACK(((struct sockaddr_in *) addr)->sin_addr.s_addr);
486}
487
488static inline int cma_any_addr(struct sockaddr *addr)
489{
490 return cma_zero_addr(addr) || cma_loopback_addr(addr);
491}
492
493static inline int cma_any_port(struct sockaddr *addr)
494{
495 return !((struct sockaddr_in *) addr)->sin_port;
496}
497
498static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
499 u8 *ip_ver, __u16 *port,
500 union cma_ip_addr **src, union cma_ip_addr **dst)
501{
502 switch (ps) {
503 case RDMA_PS_SDP:
504 if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
505 SDP_MAJ_VERSION)
506 return -EINVAL;
507
508 *ip_ver = sdp_get_ip_ver(hdr);
509 *port = ((struct sdp_hh *) hdr)->port;
510 *src = &((struct sdp_hh *) hdr)->src_addr;
511 *dst = &((struct sdp_hh *) hdr)->dst_addr;
512 break;
513 default:
514 if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
515 return -EINVAL;
516
517 *ip_ver = cma_get_ip_ver(hdr);
518 *port = ((struct cma_hdr *) hdr)->port;
519 *src = &((struct cma_hdr *) hdr)->src_addr;
520 *dst = &((struct cma_hdr *) hdr)->dst_addr;
521 break;
522 }
523
524 if (*ip_ver != 4 && *ip_ver != 6)
525 return -EINVAL;
526 return 0;
527}
528
529static void cma_save_net_info(struct rdma_addr *addr,
530 struct rdma_addr *listen_addr,
531 u8 ip_ver, __u16 port,
532 union cma_ip_addr *src, union cma_ip_addr *dst)
533{
534 struct sockaddr_in *listen4, *ip4;
535 struct sockaddr_in6 *listen6, *ip6;
536
537 switch (ip_ver) {
538 case 4:
539 listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
540 ip4 = (struct sockaddr_in *) &addr->src_addr;
541 ip4->sin_family = listen4->sin_family;
542 ip4->sin_addr.s_addr = dst->ip4.addr;
543 ip4->sin_port = listen4->sin_port;
544
545 ip4 = (struct sockaddr_in *) &addr->dst_addr;
546 ip4->sin_family = listen4->sin_family;
547 ip4->sin_addr.s_addr = src->ip4.addr;
548 ip4->sin_port = port;
549 break;
550 case 6:
551 listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
552 ip6 = (struct sockaddr_in6 *) &addr->src_addr;
553 ip6->sin6_family = listen6->sin6_family;
554 ip6->sin6_addr = dst->ip6;
555 ip6->sin6_port = listen6->sin6_port;
556
557 ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
558 ip6->sin6_family = listen6->sin6_family;
559 ip6->sin6_addr = src->ip6;
560 ip6->sin6_port = port;
561 break;
562 default:
563 break;
564 }
565}
566
567static inline int cma_user_data_offset(enum rdma_port_space ps)
568{
569 switch (ps) {
570 case RDMA_PS_SDP:
571 return 0;
572 default:
573 return sizeof(struct cma_hdr);
574 }
575}
576
577static int cma_notify_user(struct rdma_id_private *id_priv,
578 enum rdma_cm_event_type type, int status,
579 void *data, u8 data_len)
580{
581 struct rdma_cm_event event;
582
583 event.event = type;
584 event.status = status;
585 event.private_data = data;
586 event.private_data_len = data_len;
587
588 return id_priv->id.event_handler(&id_priv->id, &event);
589}
590
591static void cma_cancel_route(struct rdma_id_private *id_priv)
592{
593 switch (id_priv->id.device->node_type) {
594 case IB_NODE_CA:
595 if (id_priv->query)
596 ib_sa_cancel_query(id_priv->query_id, id_priv->query);
597 break;
598 default:
599 break;
600 }
601}
602
603static inline int cma_internal_listen(struct rdma_id_private *id_priv)
604{
605 return (id_priv->state == CMA_LISTEN) && id_priv->cma_dev &&
606 cma_any_addr(&id_priv->id.route.addr.src_addr);
607}
608
609static void cma_destroy_listen(struct rdma_id_private *id_priv)
610{
611 cma_exch(id_priv, CMA_DESTROYING);
612
613 if (id_priv->cma_dev) {
614 switch (id_priv->id.device->node_type) {
615 case IB_NODE_CA:
616 if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
617 ib_destroy_cm_id(id_priv->cm_id.ib);
618 break;
619 default:
620 break;
621 }
622 cma_detach_from_dev(id_priv);
623 }
624 list_del(&id_priv->listen_list);
625
626 cma_deref_id(id_priv);
627 wait_for_completion(&id_priv->comp);
628
629 kfree(id_priv);
630}
631
632static void cma_cancel_listens(struct rdma_id_private *id_priv)
633{
634 struct rdma_id_private *dev_id_priv;
635
636 mutex_lock(&lock);
637 list_del(&id_priv->list);
638
639 while (!list_empty(&id_priv->listen_list)) {
640 dev_id_priv = list_entry(id_priv->listen_list.next,
641 struct rdma_id_private, listen_list);
642 cma_destroy_listen(dev_id_priv);
643 }
644 mutex_unlock(&lock);
645}
646
647static void cma_cancel_operation(struct rdma_id_private *id_priv,
648 enum cma_state state)
649{
650 switch (state) {
651 case CMA_ADDR_QUERY:
652 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
653 break;
654 case CMA_ROUTE_QUERY:
655 cma_cancel_route(id_priv);
656 break;
657 case CMA_LISTEN:
658 if (cma_any_addr(&id_priv->id.route.addr.src_addr) &&
659 !id_priv->cma_dev)
660 cma_cancel_listens(id_priv);
661 break;
662 default:
663 break;
664 }
665}
666
667static void cma_release_port(struct rdma_id_private *id_priv)
668{
669 struct rdma_bind_list *bind_list = id_priv->bind_list;
670
671 if (!bind_list)
672 return;
673
674 mutex_lock(&lock);
675 hlist_del(&id_priv->node);
676 if (hlist_empty(&bind_list->owners)) {
677 idr_remove(bind_list->ps, bind_list->port);
678 kfree(bind_list);
679 }
680 mutex_unlock(&lock);
681}
682
683void rdma_destroy_id(struct rdma_cm_id *id)
684{
685 struct rdma_id_private *id_priv;
686 enum cma_state state;
687
688 id_priv = container_of(id, struct rdma_id_private, id);
689 state = cma_exch(id_priv, CMA_DESTROYING);
690 cma_cancel_operation(id_priv, state);
691
692 if (id_priv->cma_dev) {
693 switch (id->device->node_type) {
694 case IB_NODE_CA:
695 if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
696 ib_destroy_cm_id(id_priv->cm_id.ib);
697 break;
698 default:
699 break;
700 }
701 mutex_lock(&lock);
702 cma_detach_from_dev(id_priv);
703 mutex_unlock(&lock);
704 }
705
706 cma_release_port(id_priv);
707 cma_deref_id(id_priv);
708 wait_for_completion(&id_priv->comp);
709
710 kfree(id_priv->id.route.path_rec);
711 kfree(id_priv);
712}
713EXPORT_SYMBOL(rdma_destroy_id);
714
715static int cma_rep_recv(struct rdma_id_private *id_priv)
716{
717 int ret;
718
719 ret = cma_modify_qp_rtr(&id_priv->id);
720 if (ret)
721 goto reject;
722
723 ret = cma_modify_qp_rts(&id_priv->id);
724 if (ret)
725 goto reject;
726
727 ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
728 if (ret)
729 goto reject;
730
731 return 0;
732reject:
733 cma_modify_qp_err(&id_priv->id);
734 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
735 NULL, 0, NULL, 0);
736 return ret;
737}
738
739static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
740{
741 if (id_priv->id.ps == RDMA_PS_SDP &&
742 sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
743 SDP_MAJ_VERSION)
744 return -EINVAL;
745
746 return 0;
747}
748
749static int cma_rtu_recv(struct rdma_id_private *id_priv)
750{
751 int ret;
752
753 ret = cma_modify_qp_rts(&id_priv->id);
754 if (ret)
755 goto reject;
756
757 return 0;
758reject:
759 cma_modify_qp_err(&id_priv->id);
760 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
761 NULL, 0, NULL, 0);
762 return ret;
763}
764
765static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
766{
767 struct rdma_id_private *id_priv = cm_id->context;
768 enum rdma_cm_event_type event;
769 u8 private_data_len = 0;
770 int ret = 0, status = 0;
771
772 atomic_inc(&id_priv->dev_remove);
773 if (!cma_comp(id_priv, CMA_CONNECT))
774 goto out;
775
776 switch (ib_event->event) {
777 case IB_CM_REQ_ERROR:
778 case IB_CM_REP_ERROR:
779 event = RDMA_CM_EVENT_UNREACHABLE;
780 status = -ETIMEDOUT;
781 break;
782 case IB_CM_REP_RECEIVED:
783 status = cma_verify_rep(id_priv, ib_event->private_data);
784 if (status)
785 event = RDMA_CM_EVENT_CONNECT_ERROR;
786 else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
787 status = cma_rep_recv(id_priv);
788 event = status ? RDMA_CM_EVENT_CONNECT_ERROR :
789 RDMA_CM_EVENT_ESTABLISHED;
790 } else
791 event = RDMA_CM_EVENT_CONNECT_RESPONSE;
792 private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
793 break;
794 case IB_CM_RTU_RECEIVED:
795 status = cma_rtu_recv(id_priv);
796 event = status ? RDMA_CM_EVENT_CONNECT_ERROR :
797 RDMA_CM_EVENT_ESTABLISHED;
798 break;
799 case IB_CM_DREQ_ERROR:
800 status = -ETIMEDOUT; /* fall through */
801 case IB_CM_DREQ_RECEIVED:
802 case IB_CM_DREP_RECEIVED:
803 if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
804 goto out;
805 event = RDMA_CM_EVENT_DISCONNECTED;
806 break;
807 case IB_CM_TIMEWAIT_EXIT:
808 case IB_CM_MRA_RECEIVED:
809 /* ignore event */
810 goto out;
811 case IB_CM_REJ_RECEIVED:
812 cma_modify_qp_err(&id_priv->id);
813 status = ib_event->param.rej_rcvd.reason;
814 event = RDMA_CM_EVENT_REJECTED;
815 break;
816 default:
817 printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d",
818 ib_event->event);
819 goto out;
820 }
821
822 ret = cma_notify_user(id_priv, event, status, ib_event->private_data,
823 private_data_len);
824 if (ret) {
825 /* Destroy the CM ID by returning a non-zero value. */
826 id_priv->cm_id.ib = NULL;
827 cma_exch(id_priv, CMA_DESTROYING);
828 cma_release_remove(id_priv);
829 rdma_destroy_id(&id_priv->id);
830 return ret;
831 }
832out:
833 cma_release_remove(id_priv);
834 return ret;
835}
836
837static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id,
838 struct ib_cm_event *ib_event)
839{
840 struct rdma_id_private *id_priv;
841 struct rdma_cm_id *id;
842 struct rdma_route *rt;
843 union cma_ip_addr *src, *dst;
844 __u16 port;
845 u8 ip_ver;
846
847 id = rdma_create_id(listen_id->event_handler, listen_id->context,
848 listen_id->ps);
849 if (IS_ERR(id))
850 return NULL;
851
852 rt = &id->route;
853 rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
854 rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, GFP_KERNEL);
855 if (!rt->path_rec)
856 goto err;
857
858 if (cma_get_net_info(ib_event->private_data, listen_id->ps,
859 &ip_ver, &port, &src, &dst))
860 goto err;
861
862 cma_save_net_info(&id->route.addr, &listen_id->route.addr,
863 ip_ver, port, src, dst);
864 rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
865 if (rt->num_paths == 2)
866 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
867
868 ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
869 ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
870 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
871 rt->addr.dev_addr.dev_type = IB_NODE_CA;
872
873 id_priv = container_of(id, struct rdma_id_private, id);
874 id_priv->state = CMA_CONNECT;
875 return id_priv;
876err:
877 rdma_destroy_id(id);
878 return NULL;
879}
880
881static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
882{
883 struct rdma_id_private *listen_id, *conn_id;
884 int offset, ret;
885
886 listen_id = cm_id->context;
887 atomic_inc(&listen_id->dev_remove);
888 if (!cma_comp(listen_id, CMA_LISTEN)) {
889 ret = -ECONNABORTED;
890 goto out;
891 }
892
893 conn_id = cma_new_id(&listen_id->id, ib_event);
894 if (!conn_id) {
895 ret = -ENOMEM;
896 goto out;
897 }
898
899 atomic_inc(&conn_id->dev_remove);
900 ret = cma_acquire_ib_dev(conn_id);
901 if (ret) {
902 ret = -ENODEV;
903 cma_release_remove(conn_id);
904 rdma_destroy_id(&conn_id->id);
905 goto out;
906 }
907
908 conn_id->cm_id.ib = cm_id;
909 cm_id->context = conn_id;
910 cm_id->cm_handler = cma_ib_handler;
911
912 offset = cma_user_data_offset(listen_id->id.ps);
913 ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0,
914 ib_event->private_data + offset,
915 IB_CM_REQ_PRIVATE_DATA_SIZE - offset);
916 if (ret) {
917 /* Destroy the CM ID by returning a non-zero value. */
918 conn_id->cm_id.ib = NULL;
919 cma_exch(conn_id, CMA_DESTROYING);
920 cma_release_remove(conn_id);
921 rdma_destroy_id(&conn_id->id);
922 }
923out:
924 cma_release_remove(listen_id);
925 return ret;
926}
927
928static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
929{
930 return cpu_to_be64(((u64)ps << 16) +
931 be16_to_cpu(((struct sockaddr_in *) addr)->sin_port));
932}
933
934static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
935 struct ib_cm_compare_data *compare)
936{
937 struct cma_hdr *cma_data, *cma_mask;
938 struct sdp_hh *sdp_data, *sdp_mask;
939 __u32 ip4_addr;
940 struct in6_addr ip6_addr;
941
942 memset(compare, 0, sizeof *compare);
943 cma_data = (void *) compare->data;
944 cma_mask = (void *) compare->mask;
945 sdp_data = (void *) compare->data;
946 sdp_mask = (void *) compare->mask;
947
948 switch (addr->sa_family) {
949 case AF_INET:
950 ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
951 if (ps == RDMA_PS_SDP) {
952 sdp_set_ip_ver(sdp_data, 4);
953 sdp_set_ip_ver(sdp_mask, 0xF);
954 sdp_data->dst_addr.ip4.addr = ip4_addr;
955 sdp_mask->dst_addr.ip4.addr = ~0;
956 } else {
957 cma_set_ip_ver(cma_data, 4);
958 cma_set_ip_ver(cma_mask, 0xF);
959 cma_data->dst_addr.ip4.addr = ip4_addr;
960 cma_mask->dst_addr.ip4.addr = ~0;
961 }
962 break;
963 case AF_INET6:
964 ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
965 if (ps == RDMA_PS_SDP) {
966 sdp_set_ip_ver(sdp_data, 6);
967 sdp_set_ip_ver(sdp_mask, 0xF);
968 sdp_data->dst_addr.ip6 = ip6_addr;
969 memset(&sdp_mask->dst_addr.ip6, 0xFF,
970 sizeof sdp_mask->dst_addr.ip6);
971 } else {
972 cma_set_ip_ver(cma_data, 6);
973 cma_set_ip_ver(cma_mask, 0xF);
974 cma_data->dst_addr.ip6 = ip6_addr;
975 memset(&cma_mask->dst_addr.ip6, 0xFF,
976 sizeof cma_mask->dst_addr.ip6);
977 }
978 break;
979 default:
980 break;
981 }
982}
983
984static int cma_ib_listen(struct rdma_id_private *id_priv)
985{
986 struct ib_cm_compare_data compare_data;
987 struct sockaddr *addr;
988 __be64 svc_id;
989 int ret;
990
991 id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
992 id_priv);
993 if (IS_ERR(id_priv->cm_id.ib))
994 return PTR_ERR(id_priv->cm_id.ib);
995
996 addr = &id_priv->id.route.addr.src_addr;
997 svc_id = cma_get_service_id(id_priv->id.ps, addr);
998 if (cma_any_addr(addr))
999 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
1000 else {
1001 cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
1002 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
1003 }
1004
1005 if (ret) {
1006 ib_destroy_cm_id(id_priv->cm_id.ib);
1007 id_priv->cm_id.ib = NULL;
1008 }
1009
1010 return ret;
1011}
1012
1013static int cma_listen_handler(struct rdma_cm_id *id,
1014 struct rdma_cm_event *event)
1015{
1016 struct rdma_id_private *id_priv = id->context;
1017
1018 id->context = id_priv->id.context;
1019 id->event_handler = id_priv->id.event_handler;
1020 return id_priv->id.event_handler(id, event);
1021}
1022
1023static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1024 struct cma_device *cma_dev)
1025{
1026 struct rdma_id_private *dev_id_priv;
1027 struct rdma_cm_id *id;
1028 int ret;
1029
1030 id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
1031 if (IS_ERR(id))
1032 return;
1033
1034 dev_id_priv = container_of(id, struct rdma_id_private, id);
1035
1036 dev_id_priv->state = CMA_ADDR_BOUND;
1037 memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
1038 ip_addr_size(&id_priv->id.route.addr.src_addr));
1039
1040 cma_attach_to_dev(dev_id_priv, cma_dev);
1041 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
1042
1043 ret = rdma_listen(id, id_priv->backlog);
1044 if (ret)
1045 goto err;
1046
1047 return;
1048err:
1049 cma_destroy_listen(dev_id_priv);
1050}
1051
1052static void cma_listen_on_all(struct rdma_id_private *id_priv)
1053{
1054 struct cma_device *cma_dev;
1055
1056 mutex_lock(&lock);
1057 list_add_tail(&id_priv->list, &listen_any_list);
1058 list_for_each_entry(cma_dev, &dev_list, list)
1059 cma_listen_on_dev(id_priv, cma_dev);
1060 mutex_unlock(&lock);
1061}
1062
1063static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
1064{
1065 struct sockaddr_in addr_in;
1066
1067 memset(&addr_in, 0, sizeof addr_in);
1068 addr_in.sin_family = af;
1069 return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
1070}
1071
1072int rdma_listen(struct rdma_cm_id *id, int backlog)
1073{
1074 struct rdma_id_private *id_priv;
1075 int ret;
1076
1077 id_priv = container_of(id, struct rdma_id_private, id);
1078 if (id_priv->state == CMA_IDLE) {
1079 ret = cma_bind_any(id, AF_INET);
1080 if (ret)
1081 return ret;
1082 }
1083
1084 if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
1085 return -EINVAL;
1086
1087 id_priv->backlog = backlog;
1088 if (id->device) {
1089 switch (id->device->node_type) {
1090 case IB_NODE_CA:
1091 ret = cma_ib_listen(id_priv);
1092 if (ret)
1093 goto err;
1094 break;
1095 default:
1096 ret = -ENOSYS;
1097 goto err;
1098 }
1099 } else
1100 cma_listen_on_all(id_priv);
1101
1102 return 0;
1103err:
1104 id_priv->backlog = 0;
1105 cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
1106 return ret;
1107}
1108EXPORT_SYMBOL(rdma_listen);
1109
1110static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
1111 void *context)
1112{
1113 struct cma_work *work = context;
1114 struct rdma_route *route;
1115
1116 route = &work->id->id.route;
1117
1118 if (!status) {
1119 route->num_paths = 1;
1120 *route->path_rec = *path_rec;
1121 } else {
1122 work->old_state = CMA_ROUTE_QUERY;
1123 work->new_state = CMA_ADDR_RESOLVED;
1124 work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
1125 }
1126
1127 queue_work(cma_wq, &work->work);
1128}
1129
1130static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1131 struct cma_work *work)
1132{
1133 struct rdma_dev_addr *addr = &id_priv->id.route.addr.dev_addr;
1134 struct ib_sa_path_rec path_rec;
1135
1136 memset(&path_rec, 0, sizeof path_rec);
1137 path_rec.sgid = *ib_addr_get_sgid(addr);
1138 path_rec.dgid = *ib_addr_get_dgid(addr);
1139 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr));
1140 path_rec.numb_path = 1;
1141
1142 id_priv->query_id = ib_sa_path_rec_get(id_priv->id.device,
1143 id_priv->id.port_num, &path_rec,
1144 IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
1145 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH,
1146 timeout_ms, GFP_KERNEL,
1147 cma_query_handler, work, &id_priv->query);
1148
1149 return (id_priv->query_id < 0) ? id_priv->query_id : 0;
1150}
1151
1152static void cma_work_handler(void *data)
1153{
1154 struct cma_work *work = data;
1155 struct rdma_id_private *id_priv = work->id;
1156 int destroy = 0;
1157
1158 atomic_inc(&id_priv->dev_remove);
1159 if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
1160 goto out;
1161
1162 if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1163 cma_exch(id_priv, CMA_DESTROYING);
1164 destroy = 1;
1165 }
1166out:
1167 cma_release_remove(id_priv);
1168 cma_deref_id(id_priv);
1169 if (destroy)
1170 rdma_destroy_id(&id_priv->id);
1171 kfree(work);
1172}
1173
1174static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
1175{
1176 struct rdma_route *route = &id_priv->id.route;
1177 struct cma_work *work;
1178 int ret;
1179
1180 work = kzalloc(sizeof *work, GFP_KERNEL);
1181 if (!work)
1182 return -ENOMEM;
1183
1184 work->id = id_priv;
1185 INIT_WORK(&work->work, cma_work_handler, work);
1186 work->old_state = CMA_ROUTE_QUERY;
1187 work->new_state = CMA_ROUTE_RESOLVED;
1188 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1189
1190 route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
1191 if (!route->path_rec) {
1192 ret = -ENOMEM;
1193 goto err1;
1194 }
1195
1196 ret = cma_query_ib_route(id_priv, timeout_ms, work);
1197 if (ret)
1198 goto err2;
1199
1200 return 0;
1201err2:
1202 kfree(route->path_rec);
1203 route->path_rec = NULL;
1204err1:
1205 kfree(work);
1206 return ret;
1207}
1208
1209int rdma_set_ib_paths(struct rdma_cm_id *id,
1210 struct ib_sa_path_rec *path_rec, int num_paths)
1211{
1212 struct rdma_id_private *id_priv;
1213 int ret;
1214
1215 id_priv = container_of(id, struct rdma_id_private, id);
1216 if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
1217 return -EINVAL;
1218
1219 id->route.path_rec = kmalloc(sizeof *path_rec * num_paths, GFP_KERNEL);
1220 if (!id->route.path_rec) {
1221 ret = -ENOMEM;
1222 goto err;
1223 }
1224
1225 memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths);
1226 return 0;
1227err:
1228 cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
1229 return ret;
1230}
1231EXPORT_SYMBOL(rdma_set_ib_paths);
1232
1233int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
1234{
1235 struct rdma_id_private *id_priv;
1236 int ret;
1237
1238 id_priv = container_of(id, struct rdma_id_private, id);
1239 if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
1240 return -EINVAL;
1241
1242 atomic_inc(&id_priv->refcount);
1243 switch (id->device->node_type) {
1244 case IB_NODE_CA:
1245 ret = cma_resolve_ib_route(id_priv, timeout_ms);
1246 break;
1247 default:
1248 ret = -ENOSYS;
1249 break;
1250 }
1251 if (ret)
1252 goto err;
1253
1254 return 0;
1255err:
1256 cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
1257 cma_deref_id(id_priv);
1258 return ret;
1259}
1260EXPORT_SYMBOL(rdma_resolve_route);
1261
1262static int cma_bind_loopback(struct rdma_id_private *id_priv)
1263{
1264 struct cma_device *cma_dev;
1265 struct ib_port_attr port_attr;
1266 union ib_gid *gid;
1267 u16 pkey;
1268 int ret;
1269 u8 p;
1270
1271 mutex_lock(&lock);
1272 list_for_each_entry(cma_dev, &dev_list, list)
1273 for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
1274 if (!ib_query_port (cma_dev->device, p, &port_attr) &&
1275 port_attr.state == IB_PORT_ACTIVE)
1276 goto port_found;
1277
1278 if (!list_empty(&dev_list)) {
1279 p = 1;
1280 cma_dev = list_entry(dev_list.next, struct cma_device, list);
1281 } else {
1282 ret = -ENODEV;
1283 goto out;
1284 }
1285
1286port_found:
1287 gid = ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr);
1288 ret = ib_get_cached_gid(cma_dev->device, p, 0, gid);
1289 if (ret)
1290 goto out;
1291
1292 ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
1293 if (ret)
1294 goto out;
1295
1296 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
1297 id_priv->id.port_num = p;
1298 cma_attach_to_dev(id_priv, cma_dev);
1299out:
1300 mutex_unlock(&lock);
1301 return ret;
1302}
1303
1304static void addr_handler(int status, struct sockaddr *src_addr,
1305 struct rdma_dev_addr *dev_addr, void *context)
1306{
1307 struct rdma_id_private *id_priv = context;
1308 enum rdma_cm_event_type event;
1309
1310 atomic_inc(&id_priv->dev_remove);
1311 if (!id_priv->cma_dev && !status)
1312 status = cma_acquire_dev(id_priv);
1313
1314 if (status) {
1315 if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND))
1316 goto out;
1317 event = RDMA_CM_EVENT_ADDR_ERROR;
1318 } else {
1319 if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED))
1320 goto out;
1321 memcpy(&id_priv->id.route.addr.src_addr, src_addr,
1322 ip_addr_size(src_addr));
1323 event = RDMA_CM_EVENT_ADDR_RESOLVED;
1324 }
1325
1326 if (cma_notify_user(id_priv, event, status, NULL, 0)) {
1327 cma_exch(id_priv, CMA_DESTROYING);
1328 cma_release_remove(id_priv);
1329 cma_deref_id(id_priv);
1330 rdma_destroy_id(&id_priv->id);
1331 return;
1332 }
1333out:
1334 cma_release_remove(id_priv);
1335 cma_deref_id(id_priv);
1336}
1337
1338static int cma_resolve_loopback(struct rdma_id_private *id_priv)
1339{
1340 struct cma_work *work;
1341 struct sockaddr_in *src_in, *dst_in;
1342 int ret;
1343
1344 work = kzalloc(sizeof *work, GFP_KERNEL);
1345 if (!work)
1346 return -ENOMEM;
1347
1348 if (!id_priv->cma_dev) {
1349 ret = cma_bind_loopback(id_priv);
1350 if (ret)
1351 goto err;
1352 }
1353
1354 ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr,
1355 ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr));
1356
1357 if (cma_zero_addr(&id_priv->id.route.addr.src_addr)) {
1358 src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
1359 dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
1360 src_in->sin_family = dst_in->sin_family;
1361 src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
1362 }
1363
1364 work->id = id_priv;
1365 INIT_WORK(&work->work, cma_work_handler, work);
1366 work->old_state = CMA_ADDR_QUERY;
1367 work->new_state = CMA_ADDR_RESOLVED;
1368 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1369 queue_work(cma_wq, &work->work);
1370 return 0;
1371err:
1372 kfree(work);
1373 return ret;
1374}
1375
1376static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1377 struct sockaddr *dst_addr)
1378{
1379 if (src_addr && src_addr->sa_family)
1380 return rdma_bind_addr(id, src_addr);
1381 else
1382 return cma_bind_any(id, dst_addr->sa_family);
1383}
1384
1385int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1386 struct sockaddr *dst_addr, int timeout_ms)
1387{
1388 struct rdma_id_private *id_priv;
1389 int ret;
1390
1391 id_priv = container_of(id, struct rdma_id_private, id);
1392 if (id_priv->state == CMA_IDLE) {
1393 ret = cma_bind_addr(id, src_addr, dst_addr);
1394 if (ret)
1395 return ret;
1396 }
1397
1398 if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
1399 return -EINVAL;
1400
1401 atomic_inc(&id_priv->refcount);
1402 memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
1403 if (cma_any_addr(dst_addr))
1404 ret = cma_resolve_loopback(id_priv);
1405 else
1406 ret = rdma_resolve_ip(&id->route.addr.src_addr, dst_addr,
1407 &id->route.addr.dev_addr,
1408 timeout_ms, addr_handler, id_priv);
1409 if (ret)
1410 goto err;
1411
1412 return 0;
1413err:
1414 cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
1415 cma_deref_id(id_priv);
1416 return ret;
1417}
1418EXPORT_SYMBOL(rdma_resolve_addr);
1419
1420static void cma_bind_port(struct rdma_bind_list *bind_list,
1421 struct rdma_id_private *id_priv)
1422{
1423 struct sockaddr_in *sin;
1424
1425 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1426 sin->sin_port = htons(bind_list->port);
1427 id_priv->bind_list = bind_list;
1428 hlist_add_head(&id_priv->node, &bind_list->owners);
1429}
1430
1431static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
1432 unsigned short snum)
1433{
1434 struct rdma_bind_list *bind_list;
1435 int port, start, ret;
1436
1437 bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
1438 if (!bind_list)
1439 return -ENOMEM;
1440
1441 start = snum ? snum : sysctl_local_port_range[0];
1442
1443 do {
1444 ret = idr_get_new_above(ps, bind_list, start, &port);
1445 } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
1446
1447 if (ret)
1448 goto err;
1449
1450 if ((snum && port != snum) ||
1451 (!snum && port > sysctl_local_port_range[1])) {
1452 idr_remove(ps, port);
1453 ret = -EADDRNOTAVAIL;
1454 goto err;
1455 }
1456
1457 bind_list->ps = ps;
1458 bind_list->port = (unsigned short) port;
1459 cma_bind_port(bind_list, id_priv);
1460 return 0;
1461err:
1462 kfree(bind_list);
1463 return ret;
1464}
1465
1466static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
1467{
1468 struct rdma_id_private *cur_id;
1469 struct sockaddr_in *sin, *cur_sin;
1470 struct rdma_bind_list *bind_list;
1471 struct hlist_node *node;
1472 unsigned short snum;
1473
1474 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1475 snum = ntohs(sin->sin_port);
1476 if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
1477 return -EACCES;
1478
1479 bind_list = idr_find(ps, snum);
1480 if (!bind_list)
1481 return cma_alloc_port(ps, id_priv, snum);
1482
1483 /*
1484 * We don't support binding to any address if anyone is bound to
1485 * a specific address on the same port.
1486 */
1487 if (cma_any_addr(&id_priv->id.route.addr.src_addr))
1488 return -EADDRNOTAVAIL;
1489
1490 hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
1491 if (cma_any_addr(&cur_id->id.route.addr.src_addr))
1492 return -EADDRNOTAVAIL;
1493
1494 cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
1495 if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
1496 return -EADDRINUSE;
1497 }
1498
1499 cma_bind_port(bind_list, id_priv);
1500 return 0;
1501}
1502
1503static int cma_get_port(struct rdma_id_private *id_priv)
1504{
1505 struct idr *ps;
1506 int ret;
1507
1508 switch (id_priv->id.ps) {
1509 case RDMA_PS_SDP:
1510 ps = &sdp_ps;
1511 break;
1512 case RDMA_PS_TCP:
1513 ps = &tcp_ps;
1514 break;
1515 default:
1516 return -EPROTONOSUPPORT;
1517 }
1518
1519 mutex_lock(&lock);
1520 if (cma_any_port(&id_priv->id.route.addr.src_addr))
1521 ret = cma_alloc_port(ps, id_priv, 0);
1522 else
1523 ret = cma_use_port(ps, id_priv);
1524 mutex_unlock(&lock);
1525
1526 return ret;
1527}
1528
1529int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
1530{
1531 struct rdma_id_private *id_priv;
1532 int ret;
1533
1534 if (addr->sa_family != AF_INET)
1535 return -EAFNOSUPPORT;
1536
1537 id_priv = container_of(id, struct rdma_id_private, id);
1538 if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
1539 return -EINVAL;
1540
1541 if (!cma_any_addr(addr)) {
1542 ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
1543 if (!ret)
1544 ret = cma_acquire_dev(id_priv);
1545 if (ret)
1546 goto err;
1547 }
1548
1549 memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
1550 ret = cma_get_port(id_priv);
1551 if (ret)
1552 goto err;
1553
1554 return 0;
1555err:
1556 cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
1557 return ret;
1558}
1559EXPORT_SYMBOL(rdma_bind_addr);
1560
1561static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
1562 struct rdma_route *route)
1563{
1564 struct sockaddr_in *src4, *dst4;
1565 struct cma_hdr *cma_hdr;
1566 struct sdp_hh *sdp_hdr;
1567
1568 src4 = (struct sockaddr_in *) &route->addr.src_addr;
1569 dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
1570
1571 switch (ps) {
1572 case RDMA_PS_SDP:
1573 sdp_hdr = hdr;
1574 if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
1575 return -EINVAL;
1576 sdp_set_ip_ver(sdp_hdr, 4);
1577 sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
1578 sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
1579 sdp_hdr->port = src4->sin_port;
1580 break;
1581 default:
1582 cma_hdr = hdr;
1583 cma_hdr->cma_version = CMA_VERSION;
1584 cma_set_ip_ver(cma_hdr, 4);
1585 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
1586 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
1587 cma_hdr->port = src4->sin_port;
1588 break;
1589 }
1590 return 0;
1591}
1592
1593static int cma_connect_ib(struct rdma_id_private *id_priv,
1594 struct rdma_conn_param *conn_param)
1595{
1596 struct ib_cm_req_param req;
1597 struct rdma_route *route;
1598 void *private_data;
1599 int offset, ret;
1600
1601 memset(&req, 0, sizeof req);
1602 offset = cma_user_data_offset(id_priv->id.ps);
1603 req.private_data_len = offset + conn_param->private_data_len;
1604 private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
1605 if (!private_data)
1606 return -ENOMEM;
1607
1608 if (conn_param->private_data && conn_param->private_data_len)
1609 memcpy(private_data + offset, conn_param->private_data,
1610 conn_param->private_data_len);
1611
1612 id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
1613 id_priv);
1614 if (IS_ERR(id_priv->cm_id.ib)) {
1615 ret = PTR_ERR(id_priv->cm_id.ib);
1616 goto out;
1617 }
1618
1619 route = &id_priv->id.route;
1620 ret = cma_format_hdr(private_data, id_priv->id.ps, route);
1621 if (ret)
1622 goto out;
1623 req.private_data = private_data;
1624
1625 req.primary_path = &route->path_rec[0];
1626 if (route->num_paths == 2)
1627 req.alternate_path = &route->path_rec[1];
1628
1629 req.service_id = cma_get_service_id(id_priv->id.ps,
1630 &route->addr.dst_addr);
1631 req.qp_num = id_priv->qp_num;
1632 req.qp_type = id_priv->qp_type;
1633 req.starting_psn = id_priv->seq_num;
1634 req.responder_resources = conn_param->responder_resources;
1635 req.initiator_depth = conn_param->initiator_depth;
1636 req.flow_control = conn_param->flow_control;
1637 req.retry_count = conn_param->retry_count;
1638 req.rnr_retry_count = conn_param->rnr_retry_count;
1639 req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
1640 req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
1641 req.max_cm_retries = CMA_MAX_CM_RETRIES;
1642 req.srq = id_priv->srq ? 1 : 0;
1643
1644 ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
1645out:
1646 kfree(private_data);
1647 return ret;
1648}
1649
1650int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
1651{
1652 struct rdma_id_private *id_priv;
1653 int ret;
1654
1655 id_priv = container_of(id, struct rdma_id_private, id);
1656 if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
1657 return -EINVAL;
1658
1659 if (!id->qp) {
1660 id_priv->qp_num = conn_param->qp_num;
1661 id_priv->qp_type = conn_param->qp_type;
1662 id_priv->srq = conn_param->srq;
1663 }
1664
1665 switch (id->device->node_type) {
1666 case IB_NODE_CA:
1667 ret = cma_connect_ib(id_priv, conn_param);
1668 break;
1669 default:
1670 ret = -ENOSYS;
1671 break;
1672 }
1673 if (ret)
1674 goto err;
1675
1676 return 0;
1677err:
1678 cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
1679 return ret;
1680}
1681EXPORT_SYMBOL(rdma_connect);
1682
1683static int cma_accept_ib(struct rdma_id_private *id_priv,
1684 struct rdma_conn_param *conn_param)
1685{
1686 struct ib_cm_rep_param rep;
1687 int ret;
1688
1689 ret = cma_modify_qp_rtr(&id_priv->id);
1690 if (ret)
1691 return ret;
1692
1693 memset(&rep, 0, sizeof rep);
1694 rep.qp_num = id_priv->qp_num;
1695 rep.starting_psn = id_priv->seq_num;
1696 rep.private_data = conn_param->private_data;
1697 rep.private_data_len = conn_param->private_data_len;
1698 rep.responder_resources = conn_param->responder_resources;
1699 rep.initiator_depth = conn_param->initiator_depth;
1700 rep.target_ack_delay = CMA_CM_RESPONSE_TIMEOUT;
1701 rep.failover_accepted = 0;
1702 rep.flow_control = conn_param->flow_control;
1703 rep.rnr_retry_count = conn_param->rnr_retry_count;
1704 rep.srq = id_priv->srq ? 1 : 0;
1705
1706 return ib_send_cm_rep(id_priv->cm_id.ib, &rep);
1707}
1708
1709int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
1710{
1711 struct rdma_id_private *id_priv;
1712 int ret;
1713
1714 id_priv = container_of(id, struct rdma_id_private, id);
1715 if (!cma_comp(id_priv, CMA_CONNECT))
1716 return -EINVAL;
1717
1718 if (!id->qp && conn_param) {
1719 id_priv->qp_num = conn_param->qp_num;
1720 id_priv->qp_type = conn_param->qp_type;
1721 id_priv->srq = conn_param->srq;
1722 }
1723
1724 switch (id->device->node_type) {
1725 case IB_NODE_CA:
1726 if (conn_param)
1727 ret = cma_accept_ib(id_priv, conn_param);
1728 else
1729 ret = cma_rep_recv(id_priv);
1730 break;
1731 default:
1732 ret = -ENOSYS;
1733 break;
1734 }
1735
1736 if (ret)
1737 goto reject;
1738
1739 return 0;
1740reject:
1741 cma_modify_qp_err(id);
1742 rdma_reject(id, NULL, 0);
1743 return ret;
1744}
1745EXPORT_SYMBOL(rdma_accept);
1746
1747int rdma_reject(struct rdma_cm_id *id, const void *private_data,
1748 u8 private_data_len)
1749{
1750 struct rdma_id_private *id_priv;
1751 int ret;
1752
1753 id_priv = container_of(id, struct rdma_id_private, id);
1754 if (!cma_comp(id_priv, CMA_CONNECT))
1755 return -EINVAL;
1756
1757 switch (id->device->node_type) {
1758 case IB_NODE_CA:
1759 ret = ib_send_cm_rej(id_priv->cm_id.ib,
1760 IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
1761 private_data, private_data_len);
1762 break;
1763 default:
1764 ret = -ENOSYS;
1765 break;
1766 }
1767 return ret;
1768}
1769EXPORT_SYMBOL(rdma_reject);
1770
1771int rdma_disconnect(struct rdma_cm_id *id)
1772{
1773 struct rdma_id_private *id_priv;
1774 int ret;
1775
1776 id_priv = container_of(id, struct rdma_id_private, id);
1777 if (!cma_comp(id_priv, CMA_CONNECT) &&
1778 !cma_comp(id_priv, CMA_DISCONNECT))
1779 return -EINVAL;
1780
1781 ret = cma_modify_qp_err(id);
1782 if (ret)
1783 goto out;
1784
1785 switch (id->device->node_type) {
1786 case IB_NODE_CA:
1787 /* Initiate or respond to a disconnect. */
1788 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
1789 ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
1790 break;
1791 default:
1792 break;
1793 }
1794out:
1795 return ret;
1796}
1797EXPORT_SYMBOL(rdma_disconnect);
1798
1799static void cma_add_one(struct ib_device *device)
1800{
1801 struct cma_device *cma_dev;
1802 struct rdma_id_private *id_priv;
1803
1804 cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
1805 if (!cma_dev)
1806 return;
1807
1808 cma_dev->device = device;
1809 cma_dev->node_guid = device->node_guid;
1810 if (!cma_dev->node_guid)
1811 goto err;
1812
1813 init_completion(&cma_dev->comp);
1814 atomic_set(&cma_dev->refcount, 1);
1815 INIT_LIST_HEAD(&cma_dev->id_list);
1816 ib_set_client_data(device, &cma_client, cma_dev);
1817
1818 mutex_lock(&lock);
1819 list_add_tail(&cma_dev->list, &dev_list);
1820 list_for_each_entry(id_priv, &listen_any_list, list)
1821 cma_listen_on_dev(id_priv, cma_dev);
1822 mutex_unlock(&lock);
1823 return;
1824err:
1825 kfree(cma_dev);
1826}
1827
1828static int cma_remove_id_dev(struct rdma_id_private *id_priv)
1829{
1830 enum cma_state state;
1831
1832 /* Record that we want to remove the device */
1833 state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
1834 if (state == CMA_DESTROYING)
1835 return 0;
1836
1837 cma_cancel_operation(id_priv, state);
1838 wait_event(id_priv->wait_remove, !atomic_read(&id_priv->dev_remove));
1839
1840 /* Check for destruction from another callback. */
1841 if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
1842 return 0;
1843
1844 return cma_notify_user(id_priv, RDMA_CM_EVENT_DEVICE_REMOVAL,
1845 0, NULL, 0);
1846}
1847
1848static void cma_process_remove(struct cma_device *cma_dev)
1849{
1850 struct list_head remove_list;
1851 struct rdma_id_private *id_priv;
1852 int ret;
1853
1854 INIT_LIST_HEAD(&remove_list);
1855
1856 mutex_lock(&lock);
1857 while (!list_empty(&cma_dev->id_list)) {
1858 id_priv = list_entry(cma_dev->id_list.next,
1859 struct rdma_id_private, list);
1860
1861 if (cma_internal_listen(id_priv)) {
1862 cma_destroy_listen(id_priv);
1863 continue;
1864 }
1865
1866 list_del(&id_priv->list);
1867 list_add_tail(&id_priv->list, &remove_list);
1868 atomic_inc(&id_priv->refcount);
1869 mutex_unlock(&lock);
1870
1871 ret = cma_remove_id_dev(id_priv);
1872 cma_deref_id(id_priv);
1873 if (ret)
1874 rdma_destroy_id(&id_priv->id);
1875
1876 mutex_lock(&lock);
1877 }
1878 mutex_unlock(&lock);
1879
1880 cma_deref_dev(cma_dev);
1881 wait_for_completion(&cma_dev->comp);
1882}
1883
1884static void cma_remove_one(struct ib_device *device)
1885{
1886 struct cma_device *cma_dev;
1887
1888 cma_dev = ib_get_client_data(device, &cma_client);
1889 if (!cma_dev)
1890 return;
1891
1892 mutex_lock(&lock);
1893 list_del(&cma_dev->list);
1894 mutex_unlock(&lock);
1895
1896 cma_process_remove(cma_dev);
1897 kfree(cma_dev);
1898}
1899
1900static int cma_init(void)
1901{
1902 int ret;
1903
1904 cma_wq = create_singlethread_workqueue("rdma_cm_wq");
1905 if (!cma_wq)
1906 return -ENOMEM;
1907
1908 ret = ib_register_client(&cma_client);
1909 if (ret)
1910 goto err;
1911 return 0;
1912
1913err:
1914 destroy_workqueue(cma_wq);
1915 return ret;
1916}
1917
1918static void cma_cleanup(void)
1919{
1920 ib_unregister_client(&cma_client);
1921 destroy_workqueue(cma_wq);
1922 idr_destroy(&sdp_ps);
1923 idr_destroy(&tcp_ps);
1924}
1925
1926module_init(cma_init);
1927module_exit(cma_cleanup);
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 838bf54458d2..615fe9cc6c56 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -54,7 +54,7 @@ enum {
54/* 54/*
55 * If an FMR is not in use, then the list member will point to either 55 * If an FMR is not in use, then the list member will point to either
56 * its pool's free_list (if the FMR can be mapped again; that is, 56 * its pool's free_list (if the FMR can be mapped again; that is,
57 * remap_count < IB_FMR_MAX_REMAPS) or its pool's dirty_list (if the 57 * remap_count < pool->max_remaps) or its pool's dirty_list (if the
58 * FMR needs to be unmapped before being remapped). In either of 58 * FMR needs to be unmapped before being remapped). In either of
59 * these cases it is a bug if the ref_count is not 0. In other words, 59 * these cases it is a bug if the ref_count is not 0. In other words,
60 * if ref_count is > 0, then the list member must not be linked into 60 * if ref_count is > 0, then the list member must not be linked into
@@ -84,6 +84,7 @@ struct ib_fmr_pool {
84 84
85 int pool_size; 85 int pool_size;
86 int max_pages; 86 int max_pages;
87 int max_remaps;
87 int dirty_watermark; 88 int dirty_watermark;
88 int dirty_len; 89 int dirty_len;
89 struct list_head free_list; 90 struct list_head free_list;
@@ -214,8 +215,10 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
214{ 215{
215 struct ib_device *device; 216 struct ib_device *device;
216 struct ib_fmr_pool *pool; 217 struct ib_fmr_pool *pool;
218 struct ib_device_attr *attr;
217 int i; 219 int i;
218 int ret; 220 int ret;
221 int max_remaps;
219 222
220 if (!params) 223 if (!params)
221 return ERR_PTR(-EINVAL); 224 return ERR_PTR(-EINVAL);
@@ -228,6 +231,26 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
228 return ERR_PTR(-ENOSYS); 231 return ERR_PTR(-ENOSYS);
229 } 232 }
230 233
234 attr = kmalloc(sizeof *attr, GFP_KERNEL);
235 if (!attr) {
236 printk(KERN_WARNING "couldn't allocate device attr struct");
237 return ERR_PTR(-ENOMEM);
238 }
239
240 ret = ib_query_device(device, attr);
241 if (ret) {
242 printk(KERN_WARNING "couldn't query device");
243 kfree(attr);
244 return ERR_PTR(ret);
245 }
246
247 if (!attr->max_map_per_fmr)
248 max_remaps = IB_FMR_MAX_REMAPS;
249 else
250 max_remaps = attr->max_map_per_fmr;
251
252 kfree(attr);
253
231 pool = kmalloc(sizeof *pool, GFP_KERNEL); 254 pool = kmalloc(sizeof *pool, GFP_KERNEL);
232 if (!pool) { 255 if (!pool) {
233 printk(KERN_WARNING "couldn't allocate pool struct"); 256 printk(KERN_WARNING "couldn't allocate pool struct");
@@ -258,6 +281,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
258 281
259 pool->pool_size = 0; 282 pool->pool_size = 0;
260 pool->max_pages = params->max_pages_per_fmr; 283 pool->max_pages = params->max_pages_per_fmr;
284 pool->max_remaps = max_remaps;
261 pool->dirty_watermark = params->dirty_watermark; 285 pool->dirty_watermark = params->dirty_watermark;
262 pool->dirty_len = 0; 286 pool->dirty_len = 0;
263 spin_lock_init(&pool->pool_lock); 287 spin_lock_init(&pool->pool_lock);
@@ -279,7 +303,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
279 struct ib_pool_fmr *fmr; 303 struct ib_pool_fmr *fmr;
280 struct ib_fmr_attr attr = { 304 struct ib_fmr_attr attr = {
281 .max_pages = params->max_pages_per_fmr, 305 .max_pages = params->max_pages_per_fmr,
282 .max_maps = IB_FMR_MAX_REMAPS, 306 .max_maps = pool->max_remaps,
283 .page_shift = params->page_shift 307 .page_shift = params->page_shift
284 }; 308 };
285 309
@@ -489,7 +513,7 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
489 513
490 --fmr->ref_count; 514 --fmr->ref_count;
491 if (!fmr->ref_count) { 515 if (!fmr->ref_count) {
492 if (fmr->remap_count < IB_FMR_MAX_REMAPS) { 516 if (fmr->remap_count < pool->max_remaps) {
493 list_add_tail(&fmr->list, &pool->free_list); 517 list_add_tail(&fmr->list, &pool->free_list);
494 } else { 518 } else {
495 list_add_tail(&fmr->list, &pool->dirty_list); 519 list_add_tail(&fmr->list, &pool->dirty_list);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 5ad41a64314c..b38e02a5db35 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -34,6 +34,7 @@
34 * $Id: mad.c 5596 2006-03-03 01:00:07Z sean.hefty $ 34 * $Id: mad.c 5596 2006-03-03 01:00:07Z sean.hefty $
35 */ 35 */
36#include <linux/dma-mapping.h> 36#include <linux/dma-mapping.h>
37#include <rdma/ib_cache.h>
37 38
38#include "mad_priv.h" 39#include "mad_priv.h"
39#include "mad_rmpp.h" 40#include "mad_rmpp.h"
@@ -45,8 +46,7 @@ MODULE_DESCRIPTION("kernel IB MAD API");
45MODULE_AUTHOR("Hal Rosenstock"); 46MODULE_AUTHOR("Hal Rosenstock");
46MODULE_AUTHOR("Sean Hefty"); 47MODULE_AUTHOR("Sean Hefty");
47 48
48 49static kmem_cache_t *ib_mad_cache;
49kmem_cache_t *ib_mad_cache;
50 50
51static struct list_head ib_mad_port_list; 51static struct list_head ib_mad_port_list;
52static u32 ib_mad_client_id = 0; 52static u32 ib_mad_client_id = 0;
@@ -1673,20 +1673,21 @@ static inline int rcv_has_same_class(struct ib_mad_send_wr_private *wr,
1673 rwc->recv_buf.mad->mad_hdr.mgmt_class; 1673 rwc->recv_buf.mad->mad_hdr.mgmt_class;
1674} 1674}
1675 1675
1676static inline int rcv_has_same_gid(struct ib_mad_send_wr_private *wr, 1676static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv,
1677 struct ib_mad_send_wr_private *wr,
1677 struct ib_mad_recv_wc *rwc ) 1678 struct ib_mad_recv_wc *rwc )
1678{ 1679{
1679 struct ib_ah_attr attr; 1680 struct ib_ah_attr attr;
1680 u8 send_resp, rcv_resp; 1681 u8 send_resp, rcv_resp;
1682 union ib_gid sgid;
1683 struct ib_device *device = mad_agent_priv->agent.device;
1684 u8 port_num = mad_agent_priv->agent.port_num;
1685 u8 lmc;
1681 1686
1682 send_resp = ((struct ib_mad *)(wr->send_buf.mad))-> 1687 send_resp = ((struct ib_mad *)(wr->send_buf.mad))->
1683 mad_hdr.method & IB_MGMT_METHOD_RESP; 1688 mad_hdr.method & IB_MGMT_METHOD_RESP;
1684 rcv_resp = rwc->recv_buf.mad->mad_hdr.method & IB_MGMT_METHOD_RESP; 1689 rcv_resp = rwc->recv_buf.mad->mad_hdr.method & IB_MGMT_METHOD_RESP;
1685 1690
1686 if (!send_resp && rcv_resp)
1687 /* is request/response. GID/LIDs are both local (same). */
1688 return 1;
1689
1690 if (send_resp == rcv_resp) 1691 if (send_resp == rcv_resp)
1691 /* both requests, or both responses. GIDs different */ 1692 /* both requests, or both responses. GIDs different */
1692 return 0; 1693 return 0;
@@ -1695,48 +1696,78 @@ static inline int rcv_has_same_gid(struct ib_mad_send_wr_private *wr,
1695 /* Assume not equal, to avoid false positives. */ 1696 /* Assume not equal, to avoid false positives. */
1696 return 0; 1697 return 0;
1697 1698
1698 if (!(attr.ah_flags & IB_AH_GRH) && !(rwc->wc->wc_flags & IB_WC_GRH)) 1699 if (!!(attr.ah_flags & IB_AH_GRH) !=
1699 return attr.dlid == rwc->wc->slid; 1700 !!(rwc->wc->wc_flags & IB_WC_GRH))
1700 else if ((attr.ah_flags & IB_AH_GRH) &&
1701 (rwc->wc->wc_flags & IB_WC_GRH))
1702 return memcmp(attr.grh.dgid.raw,
1703 rwc->recv_buf.grh->sgid.raw, 16) == 0;
1704 else
1705 /* one has GID, other does not. Assume different */ 1701 /* one has GID, other does not. Assume different */
1706 return 0; 1702 return 0;
1703
1704 if (!send_resp && rcv_resp) {
1705 /* is request/response. */
1706 if (!(attr.ah_flags & IB_AH_GRH)) {
1707 if (ib_get_cached_lmc(device, port_num, &lmc))
1708 return 0;
1709 return (!lmc || !((attr.src_path_bits ^
1710 rwc->wc->dlid_path_bits) &
1711 ((1 << lmc) - 1)));
1712 } else {
1713 if (ib_get_cached_gid(device, port_num,
1714 attr.grh.sgid_index, &sgid))
1715 return 0;
1716 return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
1717 16);
1718 }
1719 }
1720
1721 if (!(attr.ah_flags & IB_AH_GRH))
1722 return attr.dlid == rwc->wc->slid;
1723 else
1724 return !memcmp(attr.grh.dgid.raw, rwc->recv_buf.grh->sgid.raw,
1725 16);
1726}
1727
1728static inline int is_direct(u8 class)
1729{
1730 return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE);
1707} 1731}
1732
1708struct ib_mad_send_wr_private* 1733struct ib_mad_send_wr_private*
1709ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, 1734ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv,
1710 struct ib_mad_recv_wc *mad_recv_wc) 1735 struct ib_mad_recv_wc *wc)
1711{ 1736{
1712 struct ib_mad_send_wr_private *mad_send_wr; 1737 struct ib_mad_send_wr_private *wr;
1713 struct ib_mad *mad; 1738 struct ib_mad *mad;
1714 1739
1715 mad = (struct ib_mad *)mad_recv_wc->recv_buf.mad; 1740 mad = (struct ib_mad *)wc->recv_buf.mad;
1716 1741
1717 list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list, 1742 list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) {
1718 agent_list) { 1743 if ((wr->tid == mad->mad_hdr.tid) &&
1719 if ((mad_send_wr->tid == mad->mad_hdr.tid) && 1744 rcv_has_same_class(wr, wc) &&
1720 rcv_has_same_class(mad_send_wr, mad_recv_wc) && 1745 /*
1721 rcv_has_same_gid(mad_send_wr, mad_recv_wc)) 1746 * Don't check GID for direct routed MADs.
1722 return mad_send_wr; 1747 * These might have permissive LIDs.
1748 */
1749 (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) ||
1750 rcv_has_same_gid(mad_agent_priv, wr, wc)))
1751 return wr;
1723 } 1752 }
1724 1753
1725 /* 1754 /*
1726 * It's possible to receive the response before we've 1755 * It's possible to receive the response before we've
1727 * been notified that the send has completed 1756 * been notified that the send has completed
1728 */ 1757 */
1729 list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, 1758 list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) {
1730 agent_list) { 1759 if (is_data_mad(mad_agent_priv, wr->send_buf.mad) &&
1731 if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && 1760 wr->tid == mad->mad_hdr.tid &&
1732 mad_send_wr->tid == mad->mad_hdr.tid && 1761 wr->timeout &&
1733 mad_send_wr->timeout && 1762 rcv_has_same_class(wr, wc) &&
1734 rcv_has_same_class(mad_send_wr, mad_recv_wc) && 1763 /*
1735 rcv_has_same_gid(mad_send_wr, mad_recv_wc)) { 1764 * Don't check GID for direct routed MADs.
1765 * These might have permissive LIDs.
1766 */
1767 (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) ||
1768 rcv_has_same_gid(mad_agent_priv, wr, wc)))
1736 /* Verify request has not been canceled */ 1769 /* Verify request has not been canceled */
1737 return (mad_send_wr->status == IB_WC_SUCCESS) ? 1770 return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
1738 mad_send_wr : NULL;
1739 }
1740 } 1771 }
1741 return NULL; 1772 return NULL;
1742} 1773}
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index b4fa28d3160f..d147f3bad2ce 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -212,8 +212,6 @@ struct ib_mad_port_private {
212 struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE]; 212 struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
213}; 213};
214 214
215extern kmem_cache_t *ib_mad_cache;
216
217int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr); 215int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr);
218 216
219struct ib_mad_send_wr_private * 217struct ib_mad_send_wr_private *
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 501cc054cb3b..e911c99ff843 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -47,6 +47,7 @@
47 47
48#include <rdma/ib_pack.h> 48#include <rdma/ib_pack.h>
49#include <rdma/ib_sa.h> 49#include <rdma/ib_sa.h>
50#include <rdma/ib_cache.h>
50 51
51MODULE_AUTHOR("Roland Dreier"); 52MODULE_AUTHOR("Roland Dreier");
52MODULE_DESCRIPTION("InfiniBand subnet administration query support"); 53MODULE_DESCRIPTION("InfiniBand subnet administration query support");
@@ -441,6 +442,36 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
441} 442}
442EXPORT_SYMBOL(ib_sa_cancel_query); 443EXPORT_SYMBOL(ib_sa_cancel_query);
443 444
445int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
446 struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
447{
448 int ret;
449 u16 gid_index;
450
451 memset(ah_attr, 0, sizeof *ah_attr);
452 ah_attr->dlid = be16_to_cpu(rec->dlid);
453 ah_attr->sl = rec->sl;
454 ah_attr->src_path_bits = be16_to_cpu(rec->slid) & 0x7f;
455 ah_attr->port_num = port_num;
456
457 if (rec->hop_limit > 1) {
458 ah_attr->ah_flags = IB_AH_GRH;
459 ah_attr->grh.dgid = rec->dgid;
460
461 ret = ib_find_cached_gid(device, &rec->sgid, &port_num,
462 &gid_index);
463 if (ret)
464 return ret;
465
466 ah_attr->grh.sgid_index = gid_index;
467 ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
468 ah_attr->grh.hop_limit = rec->hop_limit;
469 ah_attr->grh.traffic_class = rec->traffic_class;
470 }
471 return 0;
472}
473EXPORT_SYMBOL(ib_init_ah_from_path);
474
444static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) 475static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
445{ 476{
446 unsigned long flags; 477 unsigned long flags;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 9164a09b6ccd..c1c6fda9452c 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -30,7 +30,7 @@
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE. 31 * SOFTWARE.
32 * 32 *
33 * $Id: ucm.c 2594 2005-06-13 19:46:02Z libor $ 33 * $Id: ucm.c 4311 2005-12-05 18:42:01Z sean.hefty $
34 */ 34 */
35 35
36#include <linux/completion.h> 36#include <linux/completion.h>
@@ -50,6 +50,7 @@
50 50
51#include <rdma/ib_cm.h> 51#include <rdma/ib_cm.h>
52#include <rdma/ib_user_cm.h> 52#include <rdma/ib_user_cm.h>
53#include <rdma/ib_marshall.h>
53 54
54MODULE_AUTHOR("Libor Michalek"); 55MODULE_AUTHOR("Libor Michalek");
55MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access"); 56MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access");
@@ -63,7 +64,7 @@ struct ib_ucm_device {
63}; 64};
64 65
65struct ib_ucm_file { 66struct ib_ucm_file {
66 struct semaphore mutex; 67 struct mutex file_mutex;
67 struct file *filp; 68 struct file *filp;
68 struct ib_ucm_device *device; 69 struct ib_ucm_device *device;
69 70
@@ -152,7 +153,7 @@ static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx)
152{ 153{
153 struct ib_ucm_event *uevent; 154 struct ib_ucm_event *uevent;
154 155
155 down(&ctx->file->mutex); 156 mutex_lock(&ctx->file->file_mutex);
156 list_del(&ctx->file_list); 157 list_del(&ctx->file_list);
157 while (!list_empty(&ctx->events)) { 158 while (!list_empty(&ctx->events)) {
158 159
@@ -167,7 +168,7 @@ static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx)
167 168
168 kfree(uevent); 169 kfree(uevent);
169 } 170 }
170 up(&ctx->file->mutex); 171 mutex_unlock(&ctx->file->file_mutex);
171} 172}
172 173
173static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) 174static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)
@@ -205,36 +206,6 @@ error:
205 return NULL; 206 return NULL;
206} 207}
207 208
208static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath,
209 struct ib_sa_path_rec *kpath)
210{
211 if (!kpath || !upath)
212 return;
213
214 memcpy(upath->dgid, kpath->dgid.raw, sizeof *upath->dgid);
215 memcpy(upath->sgid, kpath->sgid.raw, sizeof *upath->sgid);
216
217 upath->dlid = kpath->dlid;
218 upath->slid = kpath->slid;
219 upath->raw_traffic = kpath->raw_traffic;
220 upath->flow_label = kpath->flow_label;
221 upath->hop_limit = kpath->hop_limit;
222 upath->traffic_class = kpath->traffic_class;
223 upath->reversible = kpath->reversible;
224 upath->numb_path = kpath->numb_path;
225 upath->pkey = kpath->pkey;
226 upath->sl = kpath->sl;
227 upath->mtu_selector = kpath->mtu_selector;
228 upath->mtu = kpath->mtu;
229 upath->rate_selector = kpath->rate_selector;
230 upath->rate = kpath->rate;
231 upath->packet_life_time = kpath->packet_life_time;
232 upath->preference = kpath->preference;
233
234 upath->packet_life_time_selector =
235 kpath->packet_life_time_selector;
236}
237
238static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, 209static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq,
239 struct ib_cm_req_event_param *kreq) 210 struct ib_cm_req_event_param *kreq)
240{ 211{
@@ -253,8 +224,10 @@ static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq,
253 ureq->srq = kreq->srq; 224 ureq->srq = kreq->srq;
254 ureq->port = kreq->port; 225 ureq->port = kreq->port;
255 226
256 ib_ucm_event_path_get(&ureq->primary_path, kreq->primary_path); 227 ib_copy_path_rec_to_user(&ureq->primary_path, kreq->primary_path);
257 ib_ucm_event_path_get(&ureq->alternate_path, kreq->alternate_path); 228 if (kreq->alternate_path)
229 ib_copy_path_rec_to_user(&ureq->alternate_path,
230 kreq->alternate_path);
258} 231}
259 232
260static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep, 233static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep,
@@ -324,8 +297,8 @@ static int ib_ucm_event_process(struct ib_cm_event *evt,
324 info = evt->param.rej_rcvd.ari; 297 info = evt->param.rej_rcvd.ari;
325 break; 298 break;
326 case IB_CM_LAP_RECEIVED: 299 case IB_CM_LAP_RECEIVED:
327 ib_ucm_event_path_get(&uvt->resp.u.lap_resp.path, 300 ib_copy_path_rec_to_user(&uvt->resp.u.lap_resp.path,
328 evt->param.lap_rcvd.alternate_path); 301 evt->param.lap_rcvd.alternate_path);
329 uvt->data_len = IB_CM_LAP_PRIVATE_DATA_SIZE; 302 uvt->data_len = IB_CM_LAP_PRIVATE_DATA_SIZE;
330 uvt->resp.present = IB_UCM_PRES_ALTERNATE; 303 uvt->resp.present = IB_UCM_PRES_ALTERNATE;
331 break; 304 break;
@@ -402,11 +375,11 @@ static int ib_ucm_event_handler(struct ib_cm_id *cm_id,
402 if (result) 375 if (result)
403 goto err2; 376 goto err2;
404 377
405 down(&ctx->file->mutex); 378 mutex_lock(&ctx->file->file_mutex);
406 list_add_tail(&uevent->file_list, &ctx->file->events); 379 list_add_tail(&uevent->file_list, &ctx->file->events);
407 list_add_tail(&uevent->ctx_list, &ctx->events); 380 list_add_tail(&uevent->ctx_list, &ctx->events);
408 wake_up_interruptible(&ctx->file->poll_wait); 381 wake_up_interruptible(&ctx->file->poll_wait);
409 up(&ctx->file->mutex); 382 mutex_unlock(&ctx->file->file_mutex);
410 return 0; 383 return 0;
411 384
412err2: 385err2:
@@ -432,7 +405,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
432 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 405 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
433 return -EFAULT; 406 return -EFAULT;
434 407
435 down(&file->mutex); 408 mutex_lock(&file->file_mutex);
436 while (list_empty(&file->events)) { 409 while (list_empty(&file->events)) {
437 410
438 if (file->filp->f_flags & O_NONBLOCK) { 411 if (file->filp->f_flags & O_NONBLOCK) {
@@ -447,9 +420,9 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
447 420
448 prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE); 421 prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE);
449 422
450 up(&file->mutex); 423 mutex_unlock(&file->file_mutex);
451 schedule(); 424 schedule();
452 down(&file->mutex); 425 mutex_lock(&file->file_mutex);
453 426
454 finish_wait(&file->poll_wait, &wait); 427 finish_wait(&file->poll_wait, &wait);
455 } 428 }
@@ -509,7 +482,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
509 kfree(uevent->info); 482 kfree(uevent->info);
510 kfree(uevent); 483 kfree(uevent);
511done: 484done:
512 up(&file->mutex); 485 mutex_unlock(&file->file_mutex);
513 return result; 486 return result;
514} 487}
515 488
@@ -528,9 +501,9 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
528 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 501 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
529 return -EFAULT; 502 return -EFAULT;
530 503
531 down(&file->mutex); 504 mutex_lock(&file->file_mutex);
532 ctx = ib_ucm_ctx_alloc(file); 505 ctx = ib_ucm_ctx_alloc(file);
533 up(&file->mutex); 506 mutex_unlock(&file->file_mutex);
534 if (!ctx) 507 if (!ctx)
535 return -ENOMEM; 508 return -ENOMEM;
536 509
@@ -637,65 +610,11 @@ static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file,
637 return result; 610 return result;
638} 611}
639 612
640static void ib_ucm_copy_ah_attr(struct ib_ucm_ah_attr *dest_attr,
641 struct ib_ah_attr *src_attr)
642{
643 memcpy(dest_attr->grh_dgid, src_attr->grh.dgid.raw,
644 sizeof src_attr->grh.dgid);
645 dest_attr->grh_flow_label = src_attr->grh.flow_label;
646 dest_attr->grh_sgid_index = src_attr->grh.sgid_index;
647 dest_attr->grh_hop_limit = src_attr->grh.hop_limit;
648 dest_attr->grh_traffic_class = src_attr->grh.traffic_class;
649
650 dest_attr->dlid = src_attr->dlid;
651 dest_attr->sl = src_attr->sl;
652 dest_attr->src_path_bits = src_attr->src_path_bits;
653 dest_attr->static_rate = src_attr->static_rate;
654 dest_attr->is_global = (src_attr->ah_flags & IB_AH_GRH);
655 dest_attr->port_num = src_attr->port_num;
656}
657
658static void ib_ucm_copy_qp_attr(struct ib_ucm_init_qp_attr_resp *dest_attr,
659 struct ib_qp_attr *src_attr)
660{
661 dest_attr->cur_qp_state = src_attr->cur_qp_state;
662 dest_attr->path_mtu = src_attr->path_mtu;
663 dest_attr->path_mig_state = src_attr->path_mig_state;
664 dest_attr->qkey = src_attr->qkey;
665 dest_attr->rq_psn = src_attr->rq_psn;
666 dest_attr->sq_psn = src_attr->sq_psn;
667 dest_attr->dest_qp_num = src_attr->dest_qp_num;
668 dest_attr->qp_access_flags = src_attr->qp_access_flags;
669
670 dest_attr->max_send_wr = src_attr->cap.max_send_wr;
671 dest_attr->max_recv_wr = src_attr->cap.max_recv_wr;
672 dest_attr->max_send_sge = src_attr->cap.max_send_sge;
673 dest_attr->max_recv_sge = src_attr->cap.max_recv_sge;
674 dest_attr->max_inline_data = src_attr->cap.max_inline_data;
675
676 ib_ucm_copy_ah_attr(&dest_attr->ah_attr, &src_attr->ah_attr);
677 ib_ucm_copy_ah_attr(&dest_attr->alt_ah_attr, &src_attr->alt_ah_attr);
678
679 dest_attr->pkey_index = src_attr->pkey_index;
680 dest_attr->alt_pkey_index = src_attr->alt_pkey_index;
681 dest_attr->en_sqd_async_notify = src_attr->en_sqd_async_notify;
682 dest_attr->sq_draining = src_attr->sq_draining;
683 dest_attr->max_rd_atomic = src_attr->max_rd_atomic;
684 dest_attr->max_dest_rd_atomic = src_attr->max_dest_rd_atomic;
685 dest_attr->min_rnr_timer = src_attr->min_rnr_timer;
686 dest_attr->port_num = src_attr->port_num;
687 dest_attr->timeout = src_attr->timeout;
688 dest_attr->retry_cnt = src_attr->retry_cnt;
689 dest_attr->rnr_retry = src_attr->rnr_retry;
690 dest_attr->alt_port_num = src_attr->alt_port_num;
691 dest_attr->alt_timeout = src_attr->alt_timeout;
692}
693
694static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file, 613static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file,
695 const char __user *inbuf, 614 const char __user *inbuf,
696 int in_len, int out_len) 615 int in_len, int out_len)
697{ 616{
698 struct ib_ucm_init_qp_attr_resp resp; 617 struct ib_uverbs_qp_attr resp;
699 struct ib_ucm_init_qp_attr cmd; 618 struct ib_ucm_init_qp_attr cmd;
700 struct ib_ucm_context *ctx; 619 struct ib_ucm_context *ctx;
701 struct ib_qp_attr qp_attr; 620 struct ib_qp_attr qp_attr;
@@ -718,7 +637,7 @@ static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file,
718 if (result) 637 if (result)
719 goto out; 638 goto out;
720 639
721 ib_ucm_copy_qp_attr(&resp, &qp_attr); 640 ib_copy_qp_attr_to_user(&resp, &qp_attr);
722 641
723 if (copy_to_user((void __user *)(unsigned long)cmd.response, 642 if (copy_to_user((void __user *)(unsigned long)cmd.response,
724 &resp, sizeof(resp))) 643 &resp, sizeof(resp)))
@@ -729,6 +648,17 @@ out:
729 return result; 648 return result;
730} 649}
731 650
651static int ucm_validate_listen(__be64 service_id, __be64 service_mask)
652{
653 service_id &= service_mask;
654
655 if (((service_id & IB_CMA_SERVICE_ID_MASK) == IB_CMA_SERVICE_ID) ||
656 ((service_id & IB_SDP_SERVICE_ID_MASK) == IB_SDP_SERVICE_ID))
657 return -EINVAL;
658
659 return 0;
660}
661
732static ssize_t ib_ucm_listen(struct ib_ucm_file *file, 662static ssize_t ib_ucm_listen(struct ib_ucm_file *file,
733 const char __user *inbuf, 663 const char __user *inbuf,
734 int in_len, int out_len) 664 int in_len, int out_len)
@@ -744,7 +674,13 @@ static ssize_t ib_ucm_listen(struct ib_ucm_file *file,
744 if (IS_ERR(ctx)) 674 if (IS_ERR(ctx))
745 return PTR_ERR(ctx); 675 return PTR_ERR(ctx);
746 676
747 result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask); 677 result = ucm_validate_listen(cmd.service_id, cmd.service_mask);
678 if (result)
679 goto out;
680
681 result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask,
682 NULL);
683out:
748 ib_ucm_ctx_put(ctx); 684 ib_ucm_ctx_put(ctx);
749 return result; 685 return result;
750} 686}
@@ -793,7 +729,7 @@ static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len)
793 729
794static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src) 730static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src)
795{ 731{
796 struct ib_ucm_path_rec ucm_path; 732 struct ib_user_path_rec upath;
797 struct ib_sa_path_rec *sa_path; 733 struct ib_sa_path_rec *sa_path;
798 734
799 *path = NULL; 735 *path = NULL;
@@ -805,36 +741,14 @@ static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src)
805 if (!sa_path) 741 if (!sa_path)
806 return -ENOMEM; 742 return -ENOMEM;
807 743
808 if (copy_from_user(&ucm_path, (void __user *)(unsigned long)src, 744 if (copy_from_user(&upath, (void __user *)(unsigned long)src,
809 sizeof(ucm_path))) { 745 sizeof(upath))) {
810 746
811 kfree(sa_path); 747 kfree(sa_path);
812 return -EFAULT; 748 return -EFAULT;
813 } 749 }
814 750
815 memcpy(sa_path->dgid.raw, ucm_path.dgid, sizeof sa_path->dgid); 751 ib_copy_path_rec_from_user(sa_path, &upath);
816 memcpy(sa_path->sgid.raw, ucm_path.sgid, sizeof sa_path->sgid);
817
818 sa_path->dlid = ucm_path.dlid;
819 sa_path->slid = ucm_path.slid;
820 sa_path->raw_traffic = ucm_path.raw_traffic;
821 sa_path->flow_label = ucm_path.flow_label;
822 sa_path->hop_limit = ucm_path.hop_limit;
823 sa_path->traffic_class = ucm_path.traffic_class;
824 sa_path->reversible = ucm_path.reversible;
825 sa_path->numb_path = ucm_path.numb_path;
826 sa_path->pkey = ucm_path.pkey;
827 sa_path->sl = ucm_path.sl;
828 sa_path->mtu_selector = ucm_path.mtu_selector;
829 sa_path->mtu = ucm_path.mtu;
830 sa_path->rate_selector = ucm_path.rate_selector;
831 sa_path->rate = ucm_path.rate;
832 sa_path->packet_life_time = ucm_path.packet_life_time;
833 sa_path->preference = ucm_path.preference;
834
835 sa_path->packet_life_time_selector =
836 ucm_path.packet_life_time_selector;
837
838 *path = sa_path; 752 *path = sa_path;
839 return 0; 753 return 0;
840} 754}
@@ -1130,7 +1044,6 @@ static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file,
1130 param.service_id = cmd.sid; 1044 param.service_id = cmd.sid;
1131 param.timeout_ms = cmd.timeout; 1045 param.timeout_ms = cmd.timeout;
1132 param.max_cm_retries = cmd.max_cm_retries; 1046 param.max_cm_retries = cmd.max_cm_retries;
1133 param.pkey = cmd.pkey;
1134 1047
1135 ctx = ib_ucm_ctx_get(file, cmd.id); 1048 ctx = ib_ucm_ctx_get(file, cmd.id);
1136 if (!IS_ERR(ctx)) { 1049 if (!IS_ERR(ctx)) {
@@ -1263,7 +1176,7 @@ static int ib_ucm_open(struct inode *inode, struct file *filp)
1263 INIT_LIST_HEAD(&file->ctxs); 1176 INIT_LIST_HEAD(&file->ctxs);
1264 init_waitqueue_head(&file->poll_wait); 1177 init_waitqueue_head(&file->poll_wait);
1265 1178
1266 init_MUTEX(&file->mutex); 1179 mutex_init(&file->file_mutex);
1267 1180
1268 filp->private_data = file; 1181 filp->private_data = file;
1269 file->filp = filp; 1182 file->filp = filp;
@@ -1277,11 +1190,11 @@ static int ib_ucm_close(struct inode *inode, struct file *filp)
1277 struct ib_ucm_file *file = filp->private_data; 1190 struct ib_ucm_file *file = filp->private_data;
1278 struct ib_ucm_context *ctx; 1191 struct ib_ucm_context *ctx;
1279 1192
1280 down(&file->mutex); 1193 mutex_lock(&file->file_mutex);
1281 while (!list_empty(&file->ctxs)) { 1194 while (!list_empty(&file->ctxs)) {
1282 ctx = list_entry(file->ctxs.next, 1195 ctx = list_entry(file->ctxs.next,
1283 struct ib_ucm_context, file_list); 1196 struct ib_ucm_context, file_list);
1284 up(&file->mutex); 1197 mutex_unlock(&file->file_mutex);
1285 1198
1286 mutex_lock(&ctx_id_mutex); 1199 mutex_lock(&ctx_id_mutex);
1287 idr_remove(&ctx_id_table, ctx->id); 1200 idr_remove(&ctx_id_table, ctx->id);
@@ -1291,9 +1204,9 @@ static int ib_ucm_close(struct inode *inode, struct file *filp)
1291 ib_ucm_cleanup_events(ctx); 1204 ib_ucm_cleanup_events(ctx);
1292 kfree(ctx); 1205 kfree(ctx);
1293 1206
1294 down(&file->mutex); 1207 mutex_lock(&file->file_mutex);
1295 } 1208 }
1296 up(&file->mutex); 1209 mutex_unlock(&file->file_mutex);
1297 kfree(file); 1210 kfree(file);
1298 return 0; 1211 return 0;
1299} 1212}
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 3372d67ff139..bb9bee56a824 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -132,7 +132,7 @@ struct ib_ucq_object {
132 u32 async_events_reported; 132 u32 async_events_reported;
133}; 133};
134 134
135extern struct mutex ib_uverbs_idr_mutex; 135extern spinlock_t ib_uverbs_idr_lock;
136extern struct idr ib_uverbs_pd_idr; 136extern struct idr ib_uverbs_pd_idr;
137extern struct idr ib_uverbs_mr_idr; 137extern struct idr ib_uverbs_mr_idr;
138extern struct idr ib_uverbs_mw_idr; 138extern struct idr ib_uverbs_mw_idr;
@@ -141,6 +141,8 @@ extern struct idr ib_uverbs_cq_idr;
141extern struct idr ib_uverbs_qp_idr; 141extern struct idr ib_uverbs_qp_idr;
142extern struct idr ib_uverbs_srq_idr; 142extern struct idr ib_uverbs_srq_idr;
143 143
144void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
145
144struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, 146struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
145 int is_async, int *fd); 147 int is_async, int *fd);
146void ib_uverbs_release_event_file(struct kref *ref); 148void ib_uverbs_release_event_file(struct kref *ref);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 9f69bd48eb1b..76bf61e9b552 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -50,6 +50,196 @@
50 (udata)->outlen = (olen); \ 50 (udata)->outlen = (olen); \
51 } while (0) 51 } while (0)
52 52
53/*
54 * The ib_uobject locking scheme is as follows:
55 *
56 * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it
57 * needs to be held during all idr operations. When an object is
58 * looked up, a reference must be taken on the object's kref before
59 * dropping this lock.
60 *
61 * - Each object also has an rwsem. This rwsem must be held for
62 * reading while an operation that uses the object is performed.
63 * For example, while registering an MR, the associated PD's
64 * uobject.mutex must be held for reading. The rwsem must be held
65 * for writing while initializing or destroying an object.
66 *
67 * - In addition, each object has a "live" flag. If this flag is not
68 * set, then lookups of the object will fail even if it is found in
69 * the idr. This handles a reader that blocks and does not acquire
70 * the rwsem until after the object is destroyed. The destroy
71 * operation will set the live flag to 0 and then drop the rwsem;
72 * this will allow the reader to acquire the rwsem, see that the
73 * live flag is 0, and then drop the rwsem and its reference to
74 * object. The underlying storage will not be freed until the last
75 * reference to the object is dropped.
76 */
77
78static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
79 struct ib_ucontext *context)
80{
81 uobj->user_handle = user_handle;
82 uobj->context = context;
83 kref_init(&uobj->ref);
84 init_rwsem(&uobj->mutex);
85 uobj->live = 0;
86}
87
88static void release_uobj(struct kref *kref)
89{
90 kfree(container_of(kref, struct ib_uobject, ref));
91}
92
93static void put_uobj(struct ib_uobject *uobj)
94{
95 kref_put(&uobj->ref, release_uobj);
96}
97
98static void put_uobj_read(struct ib_uobject *uobj)
99{
100 up_read(&uobj->mutex);
101 put_uobj(uobj);
102}
103
104static void put_uobj_write(struct ib_uobject *uobj)
105{
106 up_write(&uobj->mutex);
107 put_uobj(uobj);
108}
109
110static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj)
111{
112 int ret;
113
114retry:
115 if (!idr_pre_get(idr, GFP_KERNEL))
116 return -ENOMEM;
117
118 spin_lock(&ib_uverbs_idr_lock);
119 ret = idr_get_new(idr, uobj, &uobj->id);
120 spin_unlock(&ib_uverbs_idr_lock);
121
122 if (ret == -EAGAIN)
123 goto retry;
124
125 return ret;
126}
127
128void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj)
129{
130 spin_lock(&ib_uverbs_idr_lock);
131 idr_remove(idr, uobj->id);
132 spin_unlock(&ib_uverbs_idr_lock);
133}
134
135static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
136 struct ib_ucontext *context)
137{
138 struct ib_uobject *uobj;
139
140 spin_lock(&ib_uverbs_idr_lock);
141 uobj = idr_find(idr, id);
142 if (uobj)
143 kref_get(&uobj->ref);
144 spin_unlock(&ib_uverbs_idr_lock);
145
146 return uobj;
147}
148
149static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
150 struct ib_ucontext *context)
151{
152 struct ib_uobject *uobj;
153
154 uobj = __idr_get_uobj(idr, id, context);
155 if (!uobj)
156 return NULL;
157
158 down_read(&uobj->mutex);
159 if (!uobj->live) {
160 put_uobj_read(uobj);
161 return NULL;
162 }
163
164 return uobj;
165}
166
167static struct ib_uobject *idr_write_uobj(struct idr *idr, int id,
168 struct ib_ucontext *context)
169{
170 struct ib_uobject *uobj;
171
172 uobj = __idr_get_uobj(idr, id, context);
173 if (!uobj)
174 return NULL;
175
176 down_write(&uobj->mutex);
177 if (!uobj->live) {
178 put_uobj_write(uobj);
179 return NULL;
180 }
181
182 return uobj;
183}
184
185static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context)
186{
187 struct ib_uobject *uobj;
188
189 uobj = idr_read_uobj(idr, id, context);
190 return uobj ? uobj->object : NULL;
191}
192
193static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
194{
195 return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context);
196}
197
198static void put_pd_read(struct ib_pd *pd)
199{
200 put_uobj_read(pd->uobject);
201}
202
203static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context)
204{
205 return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context);
206}
207
208static void put_cq_read(struct ib_cq *cq)
209{
210 put_uobj_read(cq->uobject);
211}
212
213static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
214{
215 return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context);
216}
217
218static void put_ah_read(struct ib_ah *ah)
219{
220 put_uobj_read(ah->uobject);
221}
222
223static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
224{
225 return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context);
226}
227
228static void put_qp_read(struct ib_qp *qp)
229{
230 put_uobj_read(qp->uobject);
231}
232
233static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
234{
235 return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context);
236}
237
238static void put_srq_read(struct ib_srq *srq)
239{
240 put_uobj_read(srq->uobject);
241}
242
53ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, 243ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
54 const char __user *buf, 244 const char __user *buf,
55 int in_len, int out_len) 245 int in_len, int out_len)
@@ -80,8 +270,10 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
80 in_len - sizeof cmd, out_len - sizeof resp); 270 in_len - sizeof cmd, out_len - sizeof resp);
81 271
82 ucontext = ibdev->alloc_ucontext(ibdev, &udata); 272 ucontext = ibdev->alloc_ucontext(ibdev, &udata);
83 if (IS_ERR(ucontext)) 273 if (IS_ERR(ucontext)) {
84 return PTR_ERR(file->ucontext); 274 ret = PTR_ERR(file->ucontext);
275 goto err;
276 }
85 277
86 ucontext->device = ibdev; 278 ucontext->device = ibdev;
87 INIT_LIST_HEAD(&ucontext->pd_list); 279 INIT_LIST_HEAD(&ucontext->pd_list);
@@ -278,7 +470,8 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
278 if (!uobj) 470 if (!uobj)
279 return -ENOMEM; 471 return -ENOMEM;
280 472
281 uobj->context = file->ucontext; 473 init_uobj(uobj, 0, file->ucontext);
474 down_write(&uobj->mutex);
282 475
283 pd = file->device->ib_dev->alloc_pd(file->device->ib_dev, 476 pd = file->device->ib_dev->alloc_pd(file->device->ib_dev,
284 file->ucontext, &udata); 477 file->ucontext, &udata);
@@ -291,20 +484,10 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
291 pd->uobject = uobj; 484 pd->uobject = uobj;
292 atomic_set(&pd->usecnt, 0); 485 atomic_set(&pd->usecnt, 0);
293 486
294 mutex_lock(&ib_uverbs_idr_mutex); 487 uobj->object = pd;
295 488 ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj);
296retry:
297 if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) {
298 ret = -ENOMEM;
299 goto err_up;
300 }
301
302 ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id);
303
304 if (ret == -EAGAIN)
305 goto retry;
306 if (ret) 489 if (ret)
307 goto err_up; 490 goto err_idr;
308 491
309 memset(&resp, 0, sizeof resp); 492 memset(&resp, 0, sizeof resp);
310 resp.pd_handle = uobj->id; 493 resp.pd_handle = uobj->id;
@@ -312,26 +495,27 @@ retry:
312 if (copy_to_user((void __user *) (unsigned long) cmd.response, 495 if (copy_to_user((void __user *) (unsigned long) cmd.response,
313 &resp, sizeof resp)) { 496 &resp, sizeof resp)) {
314 ret = -EFAULT; 497 ret = -EFAULT;
315 goto err_idr; 498 goto err_copy;
316 } 499 }
317 500
318 mutex_lock(&file->mutex); 501 mutex_lock(&file->mutex);
319 list_add_tail(&uobj->list, &file->ucontext->pd_list); 502 list_add_tail(&uobj->list, &file->ucontext->pd_list);
320 mutex_unlock(&file->mutex); 503 mutex_unlock(&file->mutex);
321 504
322 mutex_unlock(&ib_uverbs_idr_mutex); 505 uobj->live = 1;
506
507 up_write(&uobj->mutex);
323 508
324 return in_len; 509 return in_len;
325 510
326err_idr: 511err_copy:
327 idr_remove(&ib_uverbs_pd_idr, uobj->id); 512 idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
328 513
329err_up: 514err_idr:
330 mutex_unlock(&ib_uverbs_idr_mutex);
331 ib_dealloc_pd(pd); 515 ib_dealloc_pd(pd);
332 516
333err: 517err:
334 kfree(uobj); 518 put_uobj_write(uobj);
335 return ret; 519 return ret;
336} 520}
337 521
@@ -340,37 +524,34 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
340 int in_len, int out_len) 524 int in_len, int out_len)
341{ 525{
342 struct ib_uverbs_dealloc_pd cmd; 526 struct ib_uverbs_dealloc_pd cmd;
343 struct ib_pd *pd;
344 struct ib_uobject *uobj; 527 struct ib_uobject *uobj;
345 int ret = -EINVAL; 528 int ret;
346 529
347 if (copy_from_user(&cmd, buf, sizeof cmd)) 530 if (copy_from_user(&cmd, buf, sizeof cmd))
348 return -EFAULT; 531 return -EFAULT;
349 532
350 mutex_lock(&ib_uverbs_idr_mutex); 533 uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext);
534 if (!uobj)
535 return -EINVAL;
351 536
352 pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); 537 ret = ib_dealloc_pd(uobj->object);
353 if (!pd || pd->uobject->context != file->ucontext) 538 if (!ret)
354 goto out; 539 uobj->live = 0;
355 540
356 uobj = pd->uobject; 541 put_uobj_write(uobj);
357 542
358 ret = ib_dealloc_pd(pd);
359 if (ret) 543 if (ret)
360 goto out; 544 return ret;
361 545
362 idr_remove(&ib_uverbs_pd_idr, cmd.pd_handle); 546 idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
363 547
364 mutex_lock(&file->mutex); 548 mutex_lock(&file->mutex);
365 list_del(&uobj->list); 549 list_del(&uobj->list);
366 mutex_unlock(&file->mutex); 550 mutex_unlock(&file->mutex);
367 551
368 kfree(uobj); 552 put_uobj(uobj);
369
370out:
371 mutex_unlock(&ib_uverbs_idr_mutex);
372 553
373 return ret ? ret : in_len; 554 return in_len;
374} 555}
375 556
376ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, 557ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
@@ -410,7 +591,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
410 if (!obj) 591 if (!obj)
411 return -ENOMEM; 592 return -ENOMEM;
412 593
413 obj->uobject.context = file->ucontext; 594 init_uobj(&obj->uobject, 0, file->ucontext);
595 down_write(&obj->uobject.mutex);
414 596
415 /* 597 /*
416 * We ask for writable memory if any access flags other than 598 * We ask for writable memory if any access flags other than
@@ -427,23 +609,14 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
427 609
428 obj->umem.virt_base = cmd.hca_va; 610 obj->umem.virt_base = cmd.hca_va;
429 611
430 mutex_lock(&ib_uverbs_idr_mutex); 612 pd = idr_read_pd(cmd.pd_handle, file->ucontext);
431 613 if (!pd)
432 pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); 614 goto err_release;
433 if (!pd || pd->uobject->context != file->ucontext) {
434 ret = -EINVAL;
435 goto err_up;
436 }
437
438 if (!pd->device->reg_user_mr) {
439 ret = -ENOSYS;
440 goto err_up;
441 }
442 615
443 mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata); 616 mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata);
444 if (IS_ERR(mr)) { 617 if (IS_ERR(mr)) {
445 ret = PTR_ERR(mr); 618 ret = PTR_ERR(mr);
446 goto err_up; 619 goto err_put;
447 } 620 }
448 621
449 mr->device = pd->device; 622 mr->device = pd->device;
@@ -452,53 +625,48 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
452 atomic_inc(&pd->usecnt); 625 atomic_inc(&pd->usecnt);
453 atomic_set(&mr->usecnt, 0); 626 atomic_set(&mr->usecnt, 0);
454 627
455 memset(&resp, 0, sizeof resp); 628 obj->uobject.object = mr;
456 resp.lkey = mr->lkey; 629 ret = idr_add_uobj(&ib_uverbs_mr_idr, &obj->uobject);
457 resp.rkey = mr->rkey;
458
459retry:
460 if (!idr_pre_get(&ib_uverbs_mr_idr, GFP_KERNEL)) {
461 ret = -ENOMEM;
462 goto err_unreg;
463 }
464
465 ret = idr_get_new(&ib_uverbs_mr_idr, mr, &obj->uobject.id);
466
467 if (ret == -EAGAIN)
468 goto retry;
469 if (ret) 630 if (ret)
470 goto err_unreg; 631 goto err_unreg;
471 632
633 memset(&resp, 0, sizeof resp);
634 resp.lkey = mr->lkey;
635 resp.rkey = mr->rkey;
472 resp.mr_handle = obj->uobject.id; 636 resp.mr_handle = obj->uobject.id;
473 637
474 if (copy_to_user((void __user *) (unsigned long) cmd.response, 638 if (copy_to_user((void __user *) (unsigned long) cmd.response,
475 &resp, sizeof resp)) { 639 &resp, sizeof resp)) {
476 ret = -EFAULT; 640 ret = -EFAULT;
477 goto err_idr; 641 goto err_copy;
478 } 642 }
479 643
644 put_pd_read(pd);
645
480 mutex_lock(&file->mutex); 646 mutex_lock(&file->mutex);
481 list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); 647 list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
482 mutex_unlock(&file->mutex); 648 mutex_unlock(&file->mutex);
483 649
484 mutex_unlock(&ib_uverbs_idr_mutex); 650 obj->uobject.live = 1;
651
652 up_write(&obj->uobject.mutex);
485 653
486 return in_len; 654 return in_len;
487 655
488err_idr: 656err_copy:
489 idr_remove(&ib_uverbs_mr_idr, obj->uobject.id); 657 idr_remove_uobj(&ib_uverbs_mr_idr, &obj->uobject);
490 658
491err_unreg: 659err_unreg:
492 ib_dereg_mr(mr); 660 ib_dereg_mr(mr);
493 atomic_dec(&pd->usecnt);
494 661
495err_up: 662err_put:
496 mutex_unlock(&ib_uverbs_idr_mutex); 663 put_pd_read(pd);
497 664
665err_release:
498 ib_umem_release(file->device->ib_dev, &obj->umem); 666 ib_umem_release(file->device->ib_dev, &obj->umem);
499 667
500err_free: 668err_free:
501 kfree(obj); 669 put_uobj_write(&obj->uobject);
502 return ret; 670 return ret;
503} 671}
504 672
@@ -508,37 +676,40 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
508{ 676{
509 struct ib_uverbs_dereg_mr cmd; 677 struct ib_uverbs_dereg_mr cmd;
510 struct ib_mr *mr; 678 struct ib_mr *mr;
679 struct ib_uobject *uobj;
511 struct ib_umem_object *memobj; 680 struct ib_umem_object *memobj;
512 int ret = -EINVAL; 681 int ret = -EINVAL;
513 682
514 if (copy_from_user(&cmd, buf, sizeof cmd)) 683 if (copy_from_user(&cmd, buf, sizeof cmd))
515 return -EFAULT; 684 return -EFAULT;
516 685
517 mutex_lock(&ib_uverbs_idr_mutex); 686 uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext);
518 687 if (!uobj)
519 mr = idr_find(&ib_uverbs_mr_idr, cmd.mr_handle); 688 return -EINVAL;
520 if (!mr || mr->uobject->context != file->ucontext)
521 goto out;
522 689
523 memobj = container_of(mr->uobject, struct ib_umem_object, uobject); 690 memobj = container_of(uobj, struct ib_umem_object, uobject);
691 mr = uobj->object;
524 692
525 ret = ib_dereg_mr(mr); 693 ret = ib_dereg_mr(mr);
694 if (!ret)
695 uobj->live = 0;
696
697 put_uobj_write(uobj);
698
526 if (ret) 699 if (ret)
527 goto out; 700 return ret;
528 701
529 idr_remove(&ib_uverbs_mr_idr, cmd.mr_handle); 702 idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
530 703
531 mutex_lock(&file->mutex); 704 mutex_lock(&file->mutex);
532 list_del(&memobj->uobject.list); 705 list_del(&uobj->list);
533 mutex_unlock(&file->mutex); 706 mutex_unlock(&file->mutex);
534 707
535 ib_umem_release(file->device->ib_dev, &memobj->umem); 708 ib_umem_release(file->device->ib_dev, &memobj->umem);
536 kfree(memobj);
537 709
538out: 710 put_uobj(uobj);
539 mutex_unlock(&ib_uverbs_idr_mutex);
540 711
541 return ret ? ret : in_len; 712 return in_len;
542} 713}
543 714
544ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, 715ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
@@ -577,7 +748,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
577 struct ib_uverbs_create_cq cmd; 748 struct ib_uverbs_create_cq cmd;
578 struct ib_uverbs_create_cq_resp resp; 749 struct ib_uverbs_create_cq_resp resp;
579 struct ib_udata udata; 750 struct ib_udata udata;
580 struct ib_ucq_object *uobj; 751 struct ib_ucq_object *obj;
581 struct ib_uverbs_event_file *ev_file = NULL; 752 struct ib_uverbs_event_file *ev_file = NULL;
582 struct ib_cq *cq; 753 struct ib_cq *cq;
583 int ret; 754 int ret;
@@ -595,10 +766,13 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
595 if (cmd.comp_vector >= file->device->num_comp_vectors) 766 if (cmd.comp_vector >= file->device->num_comp_vectors)
596 return -EINVAL; 767 return -EINVAL;
597 768
598 uobj = kmalloc(sizeof *uobj, GFP_KERNEL); 769 obj = kmalloc(sizeof *obj, GFP_KERNEL);
599 if (!uobj) 770 if (!obj)
600 return -ENOMEM; 771 return -ENOMEM;
601 772
773 init_uobj(&obj->uobject, cmd.user_handle, file->ucontext);
774 down_write(&obj->uobject.mutex);
775
602 if (cmd.comp_channel >= 0) { 776 if (cmd.comp_channel >= 0) {
603 ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel); 777 ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel);
604 if (!ev_file) { 778 if (!ev_file) {
@@ -607,72 +781,64 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
607 } 781 }
608 } 782 }
609 783
610 uobj->uobject.user_handle = cmd.user_handle; 784 obj->uverbs_file = file;
611 uobj->uobject.context = file->ucontext; 785 obj->comp_events_reported = 0;
612 uobj->uverbs_file = file; 786 obj->async_events_reported = 0;
613 uobj->comp_events_reported = 0; 787 INIT_LIST_HEAD(&obj->comp_list);
614 uobj->async_events_reported = 0; 788 INIT_LIST_HEAD(&obj->async_list);
615 INIT_LIST_HEAD(&uobj->comp_list);
616 INIT_LIST_HEAD(&uobj->async_list);
617 789
618 cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe, 790 cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe,
619 file->ucontext, &udata); 791 file->ucontext, &udata);
620 if (IS_ERR(cq)) { 792 if (IS_ERR(cq)) {
621 ret = PTR_ERR(cq); 793 ret = PTR_ERR(cq);
622 goto err; 794 goto err_file;
623 } 795 }
624 796
625 cq->device = file->device->ib_dev; 797 cq->device = file->device->ib_dev;
626 cq->uobject = &uobj->uobject; 798 cq->uobject = &obj->uobject;
627 cq->comp_handler = ib_uverbs_comp_handler; 799 cq->comp_handler = ib_uverbs_comp_handler;
628 cq->event_handler = ib_uverbs_cq_event_handler; 800 cq->event_handler = ib_uverbs_cq_event_handler;
629 cq->cq_context = ev_file; 801 cq->cq_context = ev_file;
630 atomic_set(&cq->usecnt, 0); 802 atomic_set(&cq->usecnt, 0);
631 803
632 mutex_lock(&ib_uverbs_idr_mutex); 804 obj->uobject.object = cq;
633 805 ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject);
634retry:
635 if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) {
636 ret = -ENOMEM;
637 goto err_up;
638 }
639
640 ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->uobject.id);
641
642 if (ret == -EAGAIN)
643 goto retry;
644 if (ret) 806 if (ret)
645 goto err_up; 807 goto err_free;
646 808
647 memset(&resp, 0, sizeof resp); 809 memset(&resp, 0, sizeof resp);
648 resp.cq_handle = uobj->uobject.id; 810 resp.cq_handle = obj->uobject.id;
649 resp.cqe = cq->cqe; 811 resp.cqe = cq->cqe;
650 812
651 if (copy_to_user((void __user *) (unsigned long) cmd.response, 813 if (copy_to_user((void __user *) (unsigned long) cmd.response,
652 &resp, sizeof resp)) { 814 &resp, sizeof resp)) {
653 ret = -EFAULT; 815 ret = -EFAULT;
654 goto err_idr; 816 goto err_copy;
655 } 817 }
656 818
657 mutex_lock(&file->mutex); 819 mutex_lock(&file->mutex);
658 list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list); 820 list_add_tail(&obj->uobject.list, &file->ucontext->cq_list);
659 mutex_unlock(&file->mutex); 821 mutex_unlock(&file->mutex);
660 822
661 mutex_unlock(&ib_uverbs_idr_mutex); 823 obj->uobject.live = 1;
824
825 up_write(&obj->uobject.mutex);
662 826
663 return in_len; 827 return in_len;
664 828
665err_idr: 829err_copy:
666 idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id); 830 idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
831
667 832
668err_up: 833err_free:
669 mutex_unlock(&ib_uverbs_idr_mutex);
670 ib_destroy_cq(cq); 834 ib_destroy_cq(cq);
671 835
672err: 836err_file:
673 if (ev_file) 837 if (ev_file)
674 ib_uverbs_release_ucq(file, ev_file, uobj); 838 ib_uverbs_release_ucq(file, ev_file, obj);
675 kfree(uobj); 839
840err:
841 put_uobj_write(&obj->uobject);
676 return ret; 842 return ret;
677} 843}
678 844
@@ -693,11 +859,9 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
693 (unsigned long) cmd.response + sizeof resp, 859 (unsigned long) cmd.response + sizeof resp,
694 in_len - sizeof cmd, out_len - sizeof resp); 860 in_len - sizeof cmd, out_len - sizeof resp);
695 861
696 mutex_lock(&ib_uverbs_idr_mutex); 862 cq = idr_read_cq(cmd.cq_handle, file->ucontext);
697 863 if (!cq)
698 cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); 864 return -EINVAL;
699 if (!cq || cq->uobject->context != file->ucontext || !cq->device->resize_cq)
700 goto out;
701 865
702 ret = cq->device->resize_cq(cq, cmd.cqe, &udata); 866 ret = cq->device->resize_cq(cq, cmd.cqe, &udata);
703 if (ret) 867 if (ret)
@@ -711,7 +875,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
711 ret = -EFAULT; 875 ret = -EFAULT;
712 876
713out: 877out:
714 mutex_unlock(&ib_uverbs_idr_mutex); 878 put_cq_read(cq);
715 879
716 return ret ? ret : in_len; 880 return ret ? ret : in_len;
717} 881}
@@ -722,6 +886,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
722{ 886{
723 struct ib_uverbs_poll_cq cmd; 887 struct ib_uverbs_poll_cq cmd;
724 struct ib_uverbs_poll_cq_resp *resp; 888 struct ib_uverbs_poll_cq_resp *resp;
889 struct ib_uobject *uobj;
725 struct ib_cq *cq; 890 struct ib_cq *cq;
726 struct ib_wc *wc; 891 struct ib_wc *wc;
727 int ret = 0; 892 int ret = 0;
@@ -742,15 +907,17 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
742 goto out_wc; 907 goto out_wc;
743 } 908 }
744 909
745 mutex_lock(&ib_uverbs_idr_mutex); 910 uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
746 cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); 911 if (!uobj) {
747 if (!cq || cq->uobject->context != file->ucontext) {
748 ret = -EINVAL; 912 ret = -EINVAL;
749 goto out; 913 goto out;
750 } 914 }
915 cq = uobj->object;
751 916
752 resp->count = ib_poll_cq(cq, cmd.ne, wc); 917 resp->count = ib_poll_cq(cq, cmd.ne, wc);
753 918
919 put_uobj_read(uobj);
920
754 for (i = 0; i < resp->count; i++) { 921 for (i = 0; i < resp->count; i++) {
755 resp->wc[i].wr_id = wc[i].wr_id; 922 resp->wc[i].wr_id = wc[i].wr_id;
756 resp->wc[i].status = wc[i].status; 923 resp->wc[i].status = wc[i].status;
@@ -772,7 +939,6 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
772 ret = -EFAULT; 939 ret = -EFAULT;
773 940
774out: 941out:
775 mutex_unlock(&ib_uverbs_idr_mutex);
776 kfree(resp); 942 kfree(resp);
777 943
778out_wc: 944out_wc:
@@ -785,22 +951,23 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
785 int out_len) 951 int out_len)
786{ 952{
787 struct ib_uverbs_req_notify_cq cmd; 953 struct ib_uverbs_req_notify_cq cmd;
954 struct ib_uobject *uobj;
788 struct ib_cq *cq; 955 struct ib_cq *cq;
789 int ret = -EINVAL;
790 956
791 if (copy_from_user(&cmd, buf, sizeof cmd)) 957 if (copy_from_user(&cmd, buf, sizeof cmd))
792 return -EFAULT; 958 return -EFAULT;
793 959
794 mutex_lock(&ib_uverbs_idr_mutex); 960 uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
795 cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); 961 if (!uobj)
796 if (cq && cq->uobject->context == file->ucontext) { 962 return -EINVAL;
797 ib_req_notify_cq(cq, cmd.solicited_only ? 963 cq = uobj->object;
798 IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
799 ret = in_len;
800 }
801 mutex_unlock(&ib_uverbs_idr_mutex);
802 964
803 return ret; 965 ib_req_notify_cq(cq, cmd.solicited_only ?
966 IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
967
968 put_uobj_read(uobj);
969
970 return in_len;
804} 971}
805 972
806ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, 973ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
@@ -809,52 +976,50 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
809{ 976{
810 struct ib_uverbs_destroy_cq cmd; 977 struct ib_uverbs_destroy_cq cmd;
811 struct ib_uverbs_destroy_cq_resp resp; 978 struct ib_uverbs_destroy_cq_resp resp;
979 struct ib_uobject *uobj;
812 struct ib_cq *cq; 980 struct ib_cq *cq;
813 struct ib_ucq_object *uobj; 981 struct ib_ucq_object *obj;
814 struct ib_uverbs_event_file *ev_file; 982 struct ib_uverbs_event_file *ev_file;
815 u64 user_handle;
816 int ret = -EINVAL; 983 int ret = -EINVAL;
817 984
818 if (copy_from_user(&cmd, buf, sizeof cmd)) 985 if (copy_from_user(&cmd, buf, sizeof cmd))
819 return -EFAULT; 986 return -EFAULT;
820 987
821 memset(&resp, 0, sizeof resp); 988 uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
822 989 if (!uobj)
823 mutex_lock(&ib_uverbs_idr_mutex); 990 return -EINVAL;
991 cq = uobj->object;
992 ev_file = cq->cq_context;
993 obj = container_of(cq->uobject, struct ib_ucq_object, uobject);
824 994
825 cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); 995 ret = ib_destroy_cq(cq);
826 if (!cq || cq->uobject->context != file->ucontext) 996 if (!ret)
827 goto out; 997 uobj->live = 0;
828 998
829 user_handle = cq->uobject->user_handle; 999 put_uobj_write(uobj);
830 uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
831 ev_file = cq->cq_context;
832 1000
833 ret = ib_destroy_cq(cq);
834 if (ret) 1001 if (ret)
835 goto out; 1002 return ret;
836 1003
837 idr_remove(&ib_uverbs_cq_idr, cmd.cq_handle); 1004 idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
838 1005
839 mutex_lock(&file->mutex); 1006 mutex_lock(&file->mutex);
840 list_del(&uobj->uobject.list); 1007 list_del(&uobj->list);
841 mutex_unlock(&file->mutex); 1008 mutex_unlock(&file->mutex);
842 1009
843 ib_uverbs_release_ucq(file, ev_file, uobj); 1010 ib_uverbs_release_ucq(file, ev_file, obj);
844 1011
845 resp.comp_events_reported = uobj->comp_events_reported; 1012 memset(&resp, 0, sizeof resp);
846 resp.async_events_reported = uobj->async_events_reported; 1013 resp.comp_events_reported = obj->comp_events_reported;
1014 resp.async_events_reported = obj->async_events_reported;
847 1015
848 kfree(uobj); 1016 put_uobj(uobj);
849 1017
850 if (copy_to_user((void __user *) (unsigned long) cmd.response, 1018 if (copy_to_user((void __user *) (unsigned long) cmd.response,
851 &resp, sizeof resp)) 1019 &resp, sizeof resp))
852 ret = -EFAULT; 1020 return -EFAULT;
853
854out:
855 mutex_unlock(&ib_uverbs_idr_mutex);
856 1021
857 return ret ? ret : in_len; 1022 return in_len;
858} 1023}
859 1024
860ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, 1025ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
@@ -864,7 +1029,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
864 struct ib_uverbs_create_qp cmd; 1029 struct ib_uverbs_create_qp cmd;
865 struct ib_uverbs_create_qp_resp resp; 1030 struct ib_uverbs_create_qp_resp resp;
866 struct ib_udata udata; 1031 struct ib_udata udata;
867 struct ib_uqp_object *uobj; 1032 struct ib_uqp_object *obj;
868 struct ib_pd *pd; 1033 struct ib_pd *pd;
869 struct ib_cq *scq, *rcq; 1034 struct ib_cq *scq, *rcq;
870 struct ib_srq *srq; 1035 struct ib_srq *srq;
@@ -882,23 +1047,21 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
882 (unsigned long) cmd.response + sizeof resp, 1047 (unsigned long) cmd.response + sizeof resp,
883 in_len - sizeof cmd, out_len - sizeof resp); 1048 in_len - sizeof cmd, out_len - sizeof resp);
884 1049
885 uobj = kmalloc(sizeof *uobj, GFP_KERNEL); 1050 obj = kmalloc(sizeof *obj, GFP_KERNEL);
886 if (!uobj) 1051 if (!obj)
887 return -ENOMEM; 1052 return -ENOMEM;
888 1053
889 mutex_lock(&ib_uverbs_idr_mutex); 1054 init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext);
1055 down_write(&obj->uevent.uobject.mutex);
890 1056
891 pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); 1057 pd = idr_read_pd(cmd.pd_handle, file->ucontext);
892 scq = idr_find(&ib_uverbs_cq_idr, cmd.send_cq_handle); 1058 scq = idr_read_cq(cmd.send_cq_handle, file->ucontext);
893 rcq = idr_find(&ib_uverbs_cq_idr, cmd.recv_cq_handle); 1059 rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext);
894 srq = cmd.is_srq ? idr_find(&ib_uverbs_srq_idr, cmd.srq_handle) : NULL; 1060 srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
895 1061
896 if (!pd || pd->uobject->context != file->ucontext || 1062 if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) {
897 !scq || scq->uobject->context != file->ucontext ||
898 !rcq || rcq->uobject->context != file->ucontext ||
899 (cmd.is_srq && (!srq || srq->uobject->context != file->ucontext))) {
900 ret = -EINVAL; 1063 ret = -EINVAL;
901 goto err_up; 1064 goto err_put;
902 } 1065 }
903 1066
904 attr.event_handler = ib_uverbs_qp_event_handler; 1067 attr.event_handler = ib_uverbs_qp_event_handler;
@@ -915,16 +1078,14 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
915 attr.cap.max_recv_sge = cmd.max_recv_sge; 1078 attr.cap.max_recv_sge = cmd.max_recv_sge;
916 attr.cap.max_inline_data = cmd.max_inline_data; 1079 attr.cap.max_inline_data = cmd.max_inline_data;
917 1080
918 uobj->uevent.uobject.user_handle = cmd.user_handle; 1081 obj->uevent.events_reported = 0;
919 uobj->uevent.uobject.context = file->ucontext; 1082 INIT_LIST_HEAD(&obj->uevent.event_list);
920 uobj->uevent.events_reported = 0; 1083 INIT_LIST_HEAD(&obj->mcast_list);
921 INIT_LIST_HEAD(&uobj->uevent.event_list);
922 INIT_LIST_HEAD(&uobj->mcast_list);
923 1084
924 qp = pd->device->create_qp(pd, &attr, &udata); 1085 qp = pd->device->create_qp(pd, &attr, &udata);
925 if (IS_ERR(qp)) { 1086 if (IS_ERR(qp)) {
926 ret = PTR_ERR(qp); 1087 ret = PTR_ERR(qp);
927 goto err_up; 1088 goto err_put;
928 } 1089 }
929 1090
930 qp->device = pd->device; 1091 qp->device = pd->device;
@@ -932,7 +1093,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
932 qp->send_cq = attr.send_cq; 1093 qp->send_cq = attr.send_cq;
933 qp->recv_cq = attr.recv_cq; 1094 qp->recv_cq = attr.recv_cq;
934 qp->srq = attr.srq; 1095 qp->srq = attr.srq;
935 qp->uobject = &uobj->uevent.uobject; 1096 qp->uobject = &obj->uevent.uobject;
936 qp->event_handler = attr.event_handler; 1097 qp->event_handler = attr.event_handler;
937 qp->qp_context = attr.qp_context; 1098 qp->qp_context = attr.qp_context;
938 qp->qp_type = attr.qp_type; 1099 qp->qp_type = attr.qp_type;
@@ -942,23 +1103,14 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
942 if (attr.srq) 1103 if (attr.srq)
943 atomic_inc(&attr.srq->usecnt); 1104 atomic_inc(&attr.srq->usecnt);
944 1105
945 memset(&resp, 0, sizeof resp); 1106 obj->uevent.uobject.object = qp;
946 resp.qpn = qp->qp_num; 1107 ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
947
948retry:
949 if (!idr_pre_get(&ib_uverbs_qp_idr, GFP_KERNEL)) {
950 ret = -ENOMEM;
951 goto err_destroy;
952 }
953
954 ret = idr_get_new(&ib_uverbs_qp_idr, qp, &uobj->uevent.uobject.id);
955
956 if (ret == -EAGAIN)
957 goto retry;
958 if (ret) 1108 if (ret)
959 goto err_destroy; 1109 goto err_destroy;
960 1110
961 resp.qp_handle = uobj->uevent.uobject.id; 1111 memset(&resp, 0, sizeof resp);
1112 resp.qpn = qp->qp_num;
1113 resp.qp_handle = obj->uevent.uobject.id;
962 resp.max_recv_sge = attr.cap.max_recv_sge; 1114 resp.max_recv_sge = attr.cap.max_recv_sge;
963 resp.max_send_sge = attr.cap.max_send_sge; 1115 resp.max_send_sge = attr.cap.max_send_sge;
964 resp.max_recv_wr = attr.cap.max_recv_wr; 1116 resp.max_recv_wr = attr.cap.max_recv_wr;
@@ -968,32 +1120,42 @@ retry:
968 if (copy_to_user((void __user *) (unsigned long) cmd.response, 1120 if (copy_to_user((void __user *) (unsigned long) cmd.response,
969 &resp, sizeof resp)) { 1121 &resp, sizeof resp)) {
970 ret = -EFAULT; 1122 ret = -EFAULT;
971 goto err_idr; 1123 goto err_copy;
972 } 1124 }
973 1125
1126 put_pd_read(pd);
1127 put_cq_read(scq);
1128 put_cq_read(rcq);
1129 if (srq)
1130 put_srq_read(srq);
1131
974 mutex_lock(&file->mutex); 1132 mutex_lock(&file->mutex);
975 list_add_tail(&uobj->uevent.uobject.list, &file->ucontext->qp_list); 1133 list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
976 mutex_unlock(&file->mutex); 1134 mutex_unlock(&file->mutex);
977 1135
978 mutex_unlock(&ib_uverbs_idr_mutex); 1136 obj->uevent.uobject.live = 1;
1137
1138 up_write(&obj->uevent.uobject.mutex);
979 1139
980 return in_len; 1140 return in_len;
981 1141
982err_idr: 1142err_copy:
983 idr_remove(&ib_uverbs_qp_idr, uobj->uevent.uobject.id); 1143 idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
984 1144
985err_destroy: 1145err_destroy:
986 ib_destroy_qp(qp); 1146 ib_destroy_qp(qp);
987 atomic_dec(&pd->usecnt);
988 atomic_dec(&attr.send_cq->usecnt);
989 atomic_dec(&attr.recv_cq->usecnt);
990 if (attr.srq)
991 atomic_dec(&attr.srq->usecnt);
992
993err_up:
994 mutex_unlock(&ib_uverbs_idr_mutex);
995 1147
996 kfree(uobj); 1148err_put:
1149 if (pd)
1150 put_pd_read(pd);
1151 if (scq)
1152 put_cq_read(scq);
1153 if (rcq)
1154 put_cq_read(rcq);
1155 if (srq)
1156 put_srq_read(srq);
1157
1158 put_uobj_write(&obj->uevent.uobject);
997 return ret; 1159 return ret;
998} 1160}
999 1161
@@ -1018,15 +1180,15 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
1018 goto out; 1180 goto out;
1019 } 1181 }
1020 1182
1021 mutex_lock(&ib_uverbs_idr_mutex); 1183 qp = idr_read_qp(cmd.qp_handle, file->ucontext);
1022 1184 if (!qp) {
1023 qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
1024 if (qp && qp->uobject->context == file->ucontext)
1025 ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
1026 else
1027 ret = -EINVAL; 1185 ret = -EINVAL;
1186 goto out;
1187 }
1028 1188
1029 mutex_unlock(&ib_uverbs_idr_mutex); 1189 ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
1190
1191 put_qp_read(qp);
1030 1192
1031 if (ret) 1193 if (ret)
1032 goto out; 1194 goto out;
@@ -1113,10 +1275,8 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
1113 if (!attr) 1275 if (!attr)
1114 return -ENOMEM; 1276 return -ENOMEM;
1115 1277
1116 mutex_lock(&ib_uverbs_idr_mutex); 1278 qp = idr_read_qp(cmd.qp_handle, file->ucontext);
1117 1279 if (!qp) {
1118 qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
1119 if (!qp || qp->uobject->context != file->ucontext) {
1120 ret = -EINVAL; 1280 ret = -EINVAL;
1121 goto out; 1281 goto out;
1122 } 1282 }
@@ -1168,13 +1328,15 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
1168 attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; 1328 attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
1169 1329
1170 ret = ib_modify_qp(qp, attr, cmd.attr_mask); 1330 ret = ib_modify_qp(qp, attr, cmd.attr_mask);
1331
1332 put_qp_read(qp);
1333
1171 if (ret) 1334 if (ret)
1172 goto out; 1335 goto out;
1173 1336
1174 ret = in_len; 1337 ret = in_len;
1175 1338
1176out: 1339out:
1177 mutex_unlock(&ib_uverbs_idr_mutex);
1178 kfree(attr); 1340 kfree(attr);
1179 1341
1180 return ret; 1342 return ret;
@@ -1186,8 +1348,9 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
1186{ 1348{
1187 struct ib_uverbs_destroy_qp cmd; 1349 struct ib_uverbs_destroy_qp cmd;
1188 struct ib_uverbs_destroy_qp_resp resp; 1350 struct ib_uverbs_destroy_qp_resp resp;
1351 struct ib_uobject *uobj;
1189 struct ib_qp *qp; 1352 struct ib_qp *qp;
1190 struct ib_uqp_object *uobj; 1353 struct ib_uqp_object *obj;
1191 int ret = -EINVAL; 1354 int ret = -EINVAL;
1192 1355
1193 if (copy_from_user(&cmd, buf, sizeof cmd)) 1356 if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -1195,43 +1358,43 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
1195 1358
1196 memset(&resp, 0, sizeof resp); 1359 memset(&resp, 0, sizeof resp);
1197 1360
1198 mutex_lock(&ib_uverbs_idr_mutex); 1361 uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext);
1199 1362 if (!uobj)
1200 qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); 1363 return -EINVAL;
1201 if (!qp || qp->uobject->context != file->ucontext) 1364 qp = uobj->object;
1202 goto out; 1365 obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
1203
1204 uobj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
1205 1366
1206 if (!list_empty(&uobj->mcast_list)) { 1367 if (!list_empty(&obj->mcast_list)) {
1207 ret = -EBUSY; 1368 put_uobj_write(uobj);
1208 goto out; 1369 return -EBUSY;
1209 } 1370 }
1210 1371
1211 ret = ib_destroy_qp(qp); 1372 ret = ib_destroy_qp(qp);
1373 if (!ret)
1374 uobj->live = 0;
1375
1376 put_uobj_write(uobj);
1377
1212 if (ret) 1378 if (ret)
1213 goto out; 1379 return ret;
1214 1380
1215 idr_remove(&ib_uverbs_qp_idr, cmd.qp_handle); 1381 idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
1216 1382
1217 mutex_lock(&file->mutex); 1383 mutex_lock(&file->mutex);
1218 list_del(&uobj->uevent.uobject.list); 1384 list_del(&uobj->list);
1219 mutex_unlock(&file->mutex); 1385 mutex_unlock(&file->mutex);
1220 1386
1221 ib_uverbs_release_uevent(file, &uobj->uevent); 1387 ib_uverbs_release_uevent(file, &obj->uevent);
1222 1388
1223 resp.events_reported = uobj->uevent.events_reported; 1389 resp.events_reported = obj->uevent.events_reported;
1224 1390
1225 kfree(uobj); 1391 put_uobj(uobj);
1226 1392
1227 if (copy_to_user((void __user *) (unsigned long) cmd.response, 1393 if (copy_to_user((void __user *) (unsigned long) cmd.response,
1228 &resp, sizeof resp)) 1394 &resp, sizeof resp))
1229 ret = -EFAULT; 1395 return -EFAULT;
1230
1231out:
1232 mutex_unlock(&ib_uverbs_idr_mutex);
1233 1396
1234 return ret ? ret : in_len; 1397 return in_len;
1235} 1398}
1236 1399
1237ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, 1400ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
@@ -1244,6 +1407,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
1244 struct ib_send_wr *wr = NULL, *last, *next, *bad_wr; 1407 struct ib_send_wr *wr = NULL, *last, *next, *bad_wr;
1245 struct ib_qp *qp; 1408 struct ib_qp *qp;
1246 int i, sg_ind; 1409 int i, sg_ind;
1410 int is_ud;
1247 ssize_t ret = -EINVAL; 1411 ssize_t ret = -EINVAL;
1248 1412
1249 if (copy_from_user(&cmd, buf, sizeof cmd)) 1413 if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -1260,12 +1424,11 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
1260 if (!user_wr) 1424 if (!user_wr)
1261 return -ENOMEM; 1425 return -ENOMEM;
1262 1426
1263 mutex_lock(&ib_uverbs_idr_mutex); 1427 qp = idr_read_qp(cmd.qp_handle, file->ucontext);
1264 1428 if (!qp)
1265 qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
1266 if (!qp || qp->uobject->context != file->ucontext)
1267 goto out; 1429 goto out;
1268 1430
1431 is_ud = qp->qp_type == IB_QPT_UD;
1269 sg_ind = 0; 1432 sg_ind = 0;
1270 last = NULL; 1433 last = NULL;
1271 for (i = 0; i < cmd.wr_count; ++i) { 1434 for (i = 0; i < cmd.wr_count; ++i) {
@@ -1273,12 +1436,12 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
1273 buf + sizeof cmd + i * cmd.wqe_size, 1436 buf + sizeof cmd + i * cmd.wqe_size,
1274 cmd.wqe_size)) { 1437 cmd.wqe_size)) {
1275 ret = -EFAULT; 1438 ret = -EFAULT;
1276 goto out; 1439 goto out_put;
1277 } 1440 }
1278 1441
1279 if (user_wr->num_sge + sg_ind > cmd.sge_count) { 1442 if (user_wr->num_sge + sg_ind > cmd.sge_count) {
1280 ret = -EINVAL; 1443 ret = -EINVAL;
1281 goto out; 1444 goto out_put;
1282 } 1445 }
1283 1446
1284 next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + 1447 next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
@@ -1286,7 +1449,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
1286 GFP_KERNEL); 1449 GFP_KERNEL);
1287 if (!next) { 1450 if (!next) {
1288 ret = -ENOMEM; 1451 ret = -ENOMEM;
1289 goto out; 1452 goto out_put;
1290 } 1453 }
1291 1454
1292 if (!last) 1455 if (!last)
@@ -1302,12 +1465,12 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
1302 next->send_flags = user_wr->send_flags; 1465 next->send_flags = user_wr->send_flags;
1303 next->imm_data = (__be32 __force) user_wr->imm_data; 1466 next->imm_data = (__be32 __force) user_wr->imm_data;
1304 1467
1305 if (qp->qp_type == IB_QPT_UD) { 1468 if (is_ud) {
1306 next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr, 1469 next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah,
1307 user_wr->wr.ud.ah); 1470 file->ucontext);
1308 if (!next->wr.ud.ah) { 1471 if (!next->wr.ud.ah) {
1309 ret = -EINVAL; 1472 ret = -EINVAL;
1310 goto out; 1473 goto out_put;
1311 } 1474 }
1312 next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn; 1475 next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn;
1313 next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; 1476 next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
@@ -1344,7 +1507,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
1344 sg_ind * sizeof (struct ib_sge), 1507 sg_ind * sizeof (struct ib_sge),
1345 next->num_sge * sizeof (struct ib_sge))) { 1508 next->num_sge * sizeof (struct ib_sge))) {
1346 ret = -EFAULT; 1509 ret = -EFAULT;
1347 goto out; 1510 goto out_put;
1348 } 1511 }
1349 sg_ind += next->num_sge; 1512 sg_ind += next->num_sge;
1350 } else 1513 } else
@@ -1364,10 +1527,13 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
1364 &resp, sizeof resp)) 1527 &resp, sizeof resp))
1365 ret = -EFAULT; 1528 ret = -EFAULT;
1366 1529
1367out: 1530out_put:
1368 mutex_unlock(&ib_uverbs_idr_mutex); 1531 put_qp_read(qp);
1369 1532
1533out:
1370 while (wr) { 1534 while (wr) {
1535 if (is_ud && wr->wr.ud.ah)
1536 put_ah_read(wr->wr.ud.ah);
1371 next = wr->next; 1537 next = wr->next;
1372 kfree(wr); 1538 kfree(wr);
1373 wr = next; 1539 wr = next;
@@ -1482,14 +1648,15 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
1482 if (IS_ERR(wr)) 1648 if (IS_ERR(wr))
1483 return PTR_ERR(wr); 1649 return PTR_ERR(wr);
1484 1650
1485 mutex_lock(&ib_uverbs_idr_mutex); 1651 qp = idr_read_qp(cmd.qp_handle, file->ucontext);
1486 1652 if (!qp)
1487 qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
1488 if (!qp || qp->uobject->context != file->ucontext)
1489 goto out; 1653 goto out;
1490 1654
1491 resp.bad_wr = 0; 1655 resp.bad_wr = 0;
1492 ret = qp->device->post_recv(qp, wr, &bad_wr); 1656 ret = qp->device->post_recv(qp, wr, &bad_wr);
1657
1658 put_qp_read(qp);
1659
1493 if (ret) 1660 if (ret)
1494 for (next = wr; next; next = next->next) { 1661 for (next = wr; next; next = next->next) {
1495 ++resp.bad_wr; 1662 ++resp.bad_wr;
@@ -1503,8 +1670,6 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
1503 ret = -EFAULT; 1670 ret = -EFAULT;
1504 1671
1505out: 1672out:
1506 mutex_unlock(&ib_uverbs_idr_mutex);
1507
1508 while (wr) { 1673 while (wr) {
1509 next = wr->next; 1674 next = wr->next;
1510 kfree(wr); 1675 kfree(wr);
@@ -1533,14 +1698,15 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
1533 if (IS_ERR(wr)) 1698 if (IS_ERR(wr))
1534 return PTR_ERR(wr); 1699 return PTR_ERR(wr);
1535 1700
1536 mutex_lock(&ib_uverbs_idr_mutex); 1701 srq = idr_read_srq(cmd.srq_handle, file->ucontext);
1537 1702 if (!srq)
1538 srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle);
1539 if (!srq || srq->uobject->context != file->ucontext)
1540 goto out; 1703 goto out;
1541 1704
1542 resp.bad_wr = 0; 1705 resp.bad_wr = 0;
1543 ret = srq->device->post_srq_recv(srq, wr, &bad_wr); 1706 ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
1707
1708 put_srq_read(srq);
1709
1544 if (ret) 1710 if (ret)
1545 for (next = wr; next; next = next->next) { 1711 for (next = wr; next; next = next->next) {
1546 ++resp.bad_wr; 1712 ++resp.bad_wr;
@@ -1554,8 +1720,6 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
1554 ret = -EFAULT; 1720 ret = -EFAULT;
1555 1721
1556out: 1722out:
1557 mutex_unlock(&ib_uverbs_idr_mutex);
1558
1559 while (wr) { 1723 while (wr) {
1560 next = wr->next; 1724 next = wr->next;
1561 kfree(wr); 1725 kfree(wr);
@@ -1587,17 +1751,15 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
1587 if (!uobj) 1751 if (!uobj)
1588 return -ENOMEM; 1752 return -ENOMEM;
1589 1753
1590 mutex_lock(&ib_uverbs_idr_mutex); 1754 init_uobj(uobj, cmd.user_handle, file->ucontext);
1755 down_write(&uobj->mutex);
1591 1756
1592 pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); 1757 pd = idr_read_pd(cmd.pd_handle, file->ucontext);
1593 if (!pd || pd->uobject->context != file->ucontext) { 1758 if (!pd) {
1594 ret = -EINVAL; 1759 ret = -EINVAL;
1595 goto err_up; 1760 goto err;
1596 } 1761 }
1597 1762
1598 uobj->user_handle = cmd.user_handle;
1599 uobj->context = file->ucontext;
1600
1601 attr.dlid = cmd.attr.dlid; 1763 attr.dlid = cmd.attr.dlid;
1602 attr.sl = cmd.attr.sl; 1764 attr.sl = cmd.attr.sl;
1603 attr.src_path_bits = cmd.attr.src_path_bits; 1765 attr.src_path_bits = cmd.attr.src_path_bits;
@@ -1613,21 +1775,13 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
1613 ah = ib_create_ah(pd, &attr); 1775 ah = ib_create_ah(pd, &attr);
1614 if (IS_ERR(ah)) { 1776 if (IS_ERR(ah)) {
1615 ret = PTR_ERR(ah); 1777 ret = PTR_ERR(ah);
1616 goto err_up; 1778 goto err;
1617 }
1618
1619 ah->uobject = uobj;
1620
1621retry:
1622 if (!idr_pre_get(&ib_uverbs_ah_idr, GFP_KERNEL)) {
1623 ret = -ENOMEM;
1624 goto err_destroy;
1625 } 1779 }
1626 1780
1627 ret = idr_get_new(&ib_uverbs_ah_idr, ah, &uobj->id); 1781 ah->uobject = uobj;
1782 uobj->object = ah;
1628 1783
1629 if (ret == -EAGAIN) 1784 ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj);
1630 goto retry;
1631 if (ret) 1785 if (ret)
1632 goto err_destroy; 1786 goto err_destroy;
1633 1787
@@ -1636,27 +1790,29 @@ retry:
1636 if (copy_to_user((void __user *) (unsigned long) cmd.response, 1790 if (copy_to_user((void __user *) (unsigned long) cmd.response,
1637 &resp, sizeof resp)) { 1791 &resp, sizeof resp)) {
1638 ret = -EFAULT; 1792 ret = -EFAULT;
1639 goto err_idr; 1793 goto err_copy;
1640 } 1794 }
1641 1795
1796 put_pd_read(pd);
1797
1642 mutex_lock(&file->mutex); 1798 mutex_lock(&file->mutex);
1643 list_add_tail(&uobj->list, &file->ucontext->ah_list); 1799 list_add_tail(&uobj->list, &file->ucontext->ah_list);
1644 mutex_unlock(&file->mutex); 1800 mutex_unlock(&file->mutex);
1645 1801
1646 mutex_unlock(&ib_uverbs_idr_mutex); 1802 uobj->live = 1;
1803
1804 up_write(&uobj->mutex);
1647 1805
1648 return in_len; 1806 return in_len;
1649 1807
1650err_idr: 1808err_copy:
1651 idr_remove(&ib_uverbs_ah_idr, uobj->id); 1809 idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
1652 1810
1653err_destroy: 1811err_destroy:
1654 ib_destroy_ah(ah); 1812 ib_destroy_ah(ah);
1655 1813
1656err_up: 1814err:
1657 mutex_unlock(&ib_uverbs_idr_mutex); 1815 put_uobj_write(uobj);
1658
1659 kfree(uobj);
1660 return ret; 1816 return ret;
1661} 1817}
1662 1818
@@ -1666,35 +1822,34 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
1666 struct ib_uverbs_destroy_ah cmd; 1822 struct ib_uverbs_destroy_ah cmd;
1667 struct ib_ah *ah; 1823 struct ib_ah *ah;
1668 struct ib_uobject *uobj; 1824 struct ib_uobject *uobj;
1669 int ret = -EINVAL; 1825 int ret;
1670 1826
1671 if (copy_from_user(&cmd, buf, sizeof cmd)) 1827 if (copy_from_user(&cmd, buf, sizeof cmd))
1672 return -EFAULT; 1828 return -EFAULT;
1673 1829
1674 mutex_lock(&ib_uverbs_idr_mutex); 1830 uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext);
1831 if (!uobj)
1832 return -EINVAL;
1833 ah = uobj->object;
1675 1834
1676 ah = idr_find(&ib_uverbs_ah_idr, cmd.ah_handle); 1835 ret = ib_destroy_ah(ah);
1677 if (!ah || ah->uobject->context != file->ucontext) 1836 if (!ret)
1678 goto out; 1837 uobj->live = 0;
1679 1838
1680 uobj = ah->uobject; 1839 put_uobj_write(uobj);
1681 1840
1682 ret = ib_destroy_ah(ah);
1683 if (ret) 1841 if (ret)
1684 goto out; 1842 return ret;
1685 1843
1686 idr_remove(&ib_uverbs_ah_idr, cmd.ah_handle); 1844 idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
1687 1845
1688 mutex_lock(&file->mutex); 1846 mutex_lock(&file->mutex);
1689 list_del(&uobj->list); 1847 list_del(&uobj->list);
1690 mutex_unlock(&file->mutex); 1848 mutex_unlock(&file->mutex);
1691 1849
1692 kfree(uobj); 1850 put_uobj(uobj);
1693 1851
1694out: 1852 return in_len;
1695 mutex_unlock(&ib_uverbs_idr_mutex);
1696
1697 return ret ? ret : in_len;
1698} 1853}
1699 1854
1700ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, 1855ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
@@ -1703,47 +1858,43 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
1703{ 1858{
1704 struct ib_uverbs_attach_mcast cmd; 1859 struct ib_uverbs_attach_mcast cmd;
1705 struct ib_qp *qp; 1860 struct ib_qp *qp;
1706 struct ib_uqp_object *uobj; 1861 struct ib_uqp_object *obj;
1707 struct ib_uverbs_mcast_entry *mcast; 1862 struct ib_uverbs_mcast_entry *mcast;
1708 int ret = -EINVAL; 1863 int ret;
1709 1864
1710 if (copy_from_user(&cmd, buf, sizeof cmd)) 1865 if (copy_from_user(&cmd, buf, sizeof cmd))
1711 return -EFAULT; 1866 return -EFAULT;
1712 1867
1713 mutex_lock(&ib_uverbs_idr_mutex); 1868 qp = idr_read_qp(cmd.qp_handle, file->ucontext);
1714 1869 if (!qp)
1715 qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); 1870 return -EINVAL;
1716 if (!qp || qp->uobject->context != file->ucontext)
1717 goto out;
1718 1871
1719 uobj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); 1872 obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
1720 1873
1721 list_for_each_entry(mcast, &uobj->mcast_list, list) 1874 list_for_each_entry(mcast, &obj->mcast_list, list)
1722 if (cmd.mlid == mcast->lid && 1875 if (cmd.mlid == mcast->lid &&
1723 !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { 1876 !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
1724 ret = 0; 1877 ret = 0;
1725 goto out; 1878 goto out_put;
1726 } 1879 }
1727 1880
1728 mcast = kmalloc(sizeof *mcast, GFP_KERNEL); 1881 mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
1729 if (!mcast) { 1882 if (!mcast) {
1730 ret = -ENOMEM; 1883 ret = -ENOMEM;
1731 goto out; 1884 goto out_put;
1732 } 1885 }
1733 1886
1734 mcast->lid = cmd.mlid; 1887 mcast->lid = cmd.mlid;
1735 memcpy(mcast->gid.raw, cmd.gid, sizeof mcast->gid.raw); 1888 memcpy(mcast->gid.raw, cmd.gid, sizeof mcast->gid.raw);
1736 1889
1737 ret = ib_attach_mcast(qp, &mcast->gid, cmd.mlid); 1890 ret = ib_attach_mcast(qp, &mcast->gid, cmd.mlid);
1738 if (!ret) { 1891 if (!ret)
1739 uobj = container_of(qp->uobject, struct ib_uqp_object, 1892 list_add_tail(&mcast->list, &obj->mcast_list);
1740 uevent.uobject); 1893 else
1741 list_add_tail(&mcast->list, &uobj->mcast_list);
1742 } else
1743 kfree(mcast); 1894 kfree(mcast);
1744 1895
1745out: 1896out_put:
1746 mutex_unlock(&ib_uverbs_idr_mutex); 1897 put_qp_read(qp);
1747 1898
1748 return ret ? ret : in_len; 1899 return ret ? ret : in_len;
1749} 1900}
@@ -1753,7 +1904,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
1753 int out_len) 1904 int out_len)
1754{ 1905{
1755 struct ib_uverbs_detach_mcast cmd; 1906 struct ib_uverbs_detach_mcast cmd;
1756 struct ib_uqp_object *uobj; 1907 struct ib_uqp_object *obj;
1757 struct ib_qp *qp; 1908 struct ib_qp *qp;
1758 struct ib_uverbs_mcast_entry *mcast; 1909 struct ib_uverbs_mcast_entry *mcast;
1759 int ret = -EINVAL; 1910 int ret = -EINVAL;
@@ -1761,19 +1912,17 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
1761 if (copy_from_user(&cmd, buf, sizeof cmd)) 1912 if (copy_from_user(&cmd, buf, sizeof cmd))
1762 return -EFAULT; 1913 return -EFAULT;
1763 1914
1764 mutex_lock(&ib_uverbs_idr_mutex); 1915 qp = idr_read_qp(cmd.qp_handle, file->ucontext);
1765 1916 if (!qp)
1766 qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); 1917 return -EINVAL;
1767 if (!qp || qp->uobject->context != file->ucontext)
1768 goto out;
1769 1918
1770 ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid); 1919 ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
1771 if (ret) 1920 if (ret)
1772 goto out; 1921 goto out_put;
1773 1922
1774 uobj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); 1923 obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
1775 1924
1776 list_for_each_entry(mcast, &uobj->mcast_list, list) 1925 list_for_each_entry(mcast, &obj->mcast_list, list)
1777 if (cmd.mlid == mcast->lid && 1926 if (cmd.mlid == mcast->lid &&
1778 !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { 1927 !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
1779 list_del(&mcast->list); 1928 list_del(&mcast->list);
@@ -1781,8 +1930,8 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
1781 break; 1930 break;
1782 } 1931 }
1783 1932
1784out: 1933out_put:
1785 mutex_unlock(&ib_uverbs_idr_mutex); 1934 put_qp_read(qp);
1786 1935
1787 return ret ? ret : in_len; 1936 return ret ? ret : in_len;
1788} 1937}
@@ -1794,7 +1943,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
1794 struct ib_uverbs_create_srq cmd; 1943 struct ib_uverbs_create_srq cmd;
1795 struct ib_uverbs_create_srq_resp resp; 1944 struct ib_uverbs_create_srq_resp resp;
1796 struct ib_udata udata; 1945 struct ib_udata udata;
1797 struct ib_uevent_object *uobj; 1946 struct ib_uevent_object *obj;
1798 struct ib_pd *pd; 1947 struct ib_pd *pd;
1799 struct ib_srq *srq; 1948 struct ib_srq *srq;
1800 struct ib_srq_init_attr attr; 1949 struct ib_srq_init_attr attr;
@@ -1810,17 +1959,17 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
1810 (unsigned long) cmd.response + sizeof resp, 1959 (unsigned long) cmd.response + sizeof resp,
1811 in_len - sizeof cmd, out_len - sizeof resp); 1960 in_len - sizeof cmd, out_len - sizeof resp);
1812 1961
1813 uobj = kmalloc(sizeof *uobj, GFP_KERNEL); 1962 obj = kmalloc(sizeof *obj, GFP_KERNEL);
1814 if (!uobj) 1963 if (!obj)
1815 return -ENOMEM; 1964 return -ENOMEM;
1816 1965
1817 mutex_lock(&ib_uverbs_idr_mutex); 1966 init_uobj(&obj->uobject, 0, file->ucontext);
1967 down_write(&obj->uobject.mutex);
1818 1968
1819 pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); 1969 pd = idr_read_pd(cmd.pd_handle, file->ucontext);
1820 1970 if (!pd) {
1821 if (!pd || pd->uobject->context != file->ucontext) {
1822 ret = -EINVAL; 1971 ret = -EINVAL;
1823 goto err_up; 1972 goto err;
1824 } 1973 }
1825 1974
1826 attr.event_handler = ib_uverbs_srq_event_handler; 1975 attr.event_handler = ib_uverbs_srq_event_handler;
@@ -1829,69 +1978,59 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
1829 attr.attr.max_sge = cmd.max_sge; 1978 attr.attr.max_sge = cmd.max_sge;
1830 attr.attr.srq_limit = cmd.srq_limit; 1979 attr.attr.srq_limit = cmd.srq_limit;
1831 1980
1832 uobj->uobject.user_handle = cmd.user_handle; 1981 obj->events_reported = 0;
1833 uobj->uobject.context = file->ucontext; 1982 INIT_LIST_HEAD(&obj->event_list);
1834 uobj->events_reported = 0;
1835 INIT_LIST_HEAD(&uobj->event_list);
1836 1983
1837 srq = pd->device->create_srq(pd, &attr, &udata); 1984 srq = pd->device->create_srq(pd, &attr, &udata);
1838 if (IS_ERR(srq)) { 1985 if (IS_ERR(srq)) {
1839 ret = PTR_ERR(srq); 1986 ret = PTR_ERR(srq);
1840 goto err_up; 1987 goto err;
1841 } 1988 }
1842 1989
1843 srq->device = pd->device; 1990 srq->device = pd->device;
1844 srq->pd = pd; 1991 srq->pd = pd;
1845 srq->uobject = &uobj->uobject; 1992 srq->uobject = &obj->uobject;
1846 srq->event_handler = attr.event_handler; 1993 srq->event_handler = attr.event_handler;
1847 srq->srq_context = attr.srq_context; 1994 srq->srq_context = attr.srq_context;
1848 atomic_inc(&pd->usecnt); 1995 atomic_inc(&pd->usecnt);
1849 atomic_set(&srq->usecnt, 0); 1996 atomic_set(&srq->usecnt, 0);
1850 1997
1851 memset(&resp, 0, sizeof resp); 1998 obj->uobject.object = srq;
1852 1999 ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject);
1853retry:
1854 if (!idr_pre_get(&ib_uverbs_srq_idr, GFP_KERNEL)) {
1855 ret = -ENOMEM;
1856 goto err_destroy;
1857 }
1858
1859 ret = idr_get_new(&ib_uverbs_srq_idr, srq, &uobj->uobject.id);
1860
1861 if (ret == -EAGAIN)
1862 goto retry;
1863 if (ret) 2000 if (ret)
1864 goto err_destroy; 2001 goto err_destroy;
1865 2002
1866 resp.srq_handle = uobj->uobject.id; 2003 memset(&resp, 0, sizeof resp);
2004 resp.srq_handle = obj->uobject.id;
1867 resp.max_wr = attr.attr.max_wr; 2005 resp.max_wr = attr.attr.max_wr;
1868 resp.max_sge = attr.attr.max_sge; 2006 resp.max_sge = attr.attr.max_sge;
1869 2007
1870 if (copy_to_user((void __user *) (unsigned long) cmd.response, 2008 if (copy_to_user((void __user *) (unsigned long) cmd.response,
1871 &resp, sizeof resp)) { 2009 &resp, sizeof resp)) {
1872 ret = -EFAULT; 2010 ret = -EFAULT;
1873 goto err_idr; 2011 goto err_copy;
1874 } 2012 }
1875 2013
2014 put_pd_read(pd);
2015
1876 mutex_lock(&file->mutex); 2016 mutex_lock(&file->mutex);
1877 list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list); 2017 list_add_tail(&obj->uobject.list, &file->ucontext->srq_list);
1878 mutex_unlock(&file->mutex); 2018 mutex_unlock(&file->mutex);
1879 2019
1880 mutex_unlock(&ib_uverbs_idr_mutex); 2020 obj->uobject.live = 1;
2021
2022 up_write(&obj->uobject.mutex);
1881 2023
1882 return in_len; 2024 return in_len;
1883 2025
1884err_idr: 2026err_copy:
1885 idr_remove(&ib_uverbs_srq_idr, uobj->uobject.id); 2027 idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject);
1886 2028
1887err_destroy: 2029err_destroy:
1888 ib_destroy_srq(srq); 2030 ib_destroy_srq(srq);
1889 atomic_dec(&pd->usecnt);
1890
1891err_up:
1892 mutex_unlock(&ib_uverbs_idr_mutex);
1893 2031
1894 kfree(uobj); 2032err:
2033 put_uobj_write(&obj->uobject);
1895 return ret; 2034 return ret;
1896} 2035}
1897 2036
@@ -1907,21 +2046,16 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
1907 if (copy_from_user(&cmd, buf, sizeof cmd)) 2046 if (copy_from_user(&cmd, buf, sizeof cmd))
1908 return -EFAULT; 2047 return -EFAULT;
1909 2048
1910 mutex_lock(&ib_uverbs_idr_mutex); 2049 srq = idr_read_srq(cmd.srq_handle, file->ucontext);
1911 2050 if (!srq)
1912 srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); 2051 return -EINVAL;
1913 if (!srq || srq->uobject->context != file->ucontext) {
1914 ret = -EINVAL;
1915 goto out;
1916 }
1917 2052
1918 attr.max_wr = cmd.max_wr; 2053 attr.max_wr = cmd.max_wr;
1919 attr.srq_limit = cmd.srq_limit; 2054 attr.srq_limit = cmd.srq_limit;
1920 2055
1921 ret = ib_modify_srq(srq, &attr, cmd.attr_mask); 2056 ret = ib_modify_srq(srq, &attr, cmd.attr_mask);
1922 2057
1923out: 2058 put_srq_read(srq);
1924 mutex_unlock(&ib_uverbs_idr_mutex);
1925 2059
1926 return ret ? ret : in_len; 2060 return ret ? ret : in_len;
1927} 2061}
@@ -1942,18 +2076,16 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
1942 if (copy_from_user(&cmd, buf, sizeof cmd)) 2076 if (copy_from_user(&cmd, buf, sizeof cmd))
1943 return -EFAULT; 2077 return -EFAULT;
1944 2078
1945 mutex_lock(&ib_uverbs_idr_mutex); 2079 srq = idr_read_srq(cmd.srq_handle, file->ucontext);
2080 if (!srq)
2081 return -EINVAL;
1946 2082
1947 srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); 2083 ret = ib_query_srq(srq, &attr);
1948 if (srq && srq->uobject->context == file->ucontext)
1949 ret = ib_query_srq(srq, &attr);
1950 else
1951 ret = -EINVAL;
1952 2084
1953 mutex_unlock(&ib_uverbs_idr_mutex); 2085 put_srq_read(srq);
1954 2086
1955 if (ret) 2087 if (ret)
1956 goto out; 2088 return ret;
1957 2089
1958 memset(&resp, 0, sizeof resp); 2090 memset(&resp, 0, sizeof resp);
1959 2091
@@ -1963,10 +2095,9 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
1963 2095
1964 if (copy_to_user((void __user *) (unsigned long) cmd.response, 2096 if (copy_to_user((void __user *) (unsigned long) cmd.response,
1965 &resp, sizeof resp)) 2097 &resp, sizeof resp))
1966 ret = -EFAULT; 2098 return -EFAULT;
1967 2099
1968out: 2100 return in_len;
1969 return ret ? ret : in_len;
1970} 2101}
1971 2102
1972ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, 2103ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
@@ -1975,45 +2106,45 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
1975{ 2106{
1976 struct ib_uverbs_destroy_srq cmd; 2107 struct ib_uverbs_destroy_srq cmd;
1977 struct ib_uverbs_destroy_srq_resp resp; 2108 struct ib_uverbs_destroy_srq_resp resp;
2109 struct ib_uobject *uobj;
1978 struct ib_srq *srq; 2110 struct ib_srq *srq;
1979 struct ib_uevent_object *uobj; 2111 struct ib_uevent_object *obj;
1980 int ret = -EINVAL; 2112 int ret = -EINVAL;
1981 2113
1982 if (copy_from_user(&cmd, buf, sizeof cmd)) 2114 if (copy_from_user(&cmd, buf, sizeof cmd))
1983 return -EFAULT; 2115 return -EFAULT;
1984 2116
1985 mutex_lock(&ib_uverbs_idr_mutex); 2117 uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext);
1986 2118 if (!uobj)
1987 memset(&resp, 0, sizeof resp); 2119 return -EINVAL;
2120 srq = uobj->object;
2121 obj = container_of(uobj, struct ib_uevent_object, uobject);
1988 2122
1989 srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); 2123 ret = ib_destroy_srq(srq);
1990 if (!srq || srq->uobject->context != file->ucontext) 2124 if (!ret)
1991 goto out; 2125 uobj->live = 0;
1992 2126
1993 uobj = container_of(srq->uobject, struct ib_uevent_object, uobject); 2127 put_uobj_write(uobj);
1994 2128
1995 ret = ib_destroy_srq(srq);
1996 if (ret) 2129 if (ret)
1997 goto out; 2130 return ret;
1998 2131
1999 idr_remove(&ib_uverbs_srq_idr, cmd.srq_handle); 2132 idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
2000 2133
2001 mutex_lock(&file->mutex); 2134 mutex_lock(&file->mutex);
2002 list_del(&uobj->uobject.list); 2135 list_del(&uobj->list);
2003 mutex_unlock(&file->mutex); 2136 mutex_unlock(&file->mutex);
2004 2137
2005 ib_uverbs_release_uevent(file, uobj); 2138 ib_uverbs_release_uevent(file, obj);
2006 2139
2007 resp.events_reported = uobj->events_reported; 2140 memset(&resp, 0, sizeof resp);
2141 resp.events_reported = obj->events_reported;
2008 2142
2009 kfree(uobj); 2143 put_uobj(uobj);
2010 2144
2011 if (copy_to_user((void __user *) (unsigned long) cmd.response, 2145 if (copy_to_user((void __user *) (unsigned long) cmd.response,
2012 &resp, sizeof resp)) 2146 &resp, sizeof resp))
2013 ret = -EFAULT; 2147 ret = -EFAULT;
2014 2148
2015out:
2016 mutex_unlock(&ib_uverbs_idr_mutex);
2017
2018 return ret ? ret : in_len; 2149 return ret ? ret : in_len;
2019} 2150}
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index ff092a0a94da..5ec2d49e9bb6 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -66,7 +66,7 @@ enum {
66 66
67static struct class *uverbs_class; 67static struct class *uverbs_class;
68 68
69DEFINE_MUTEX(ib_uverbs_idr_mutex); 69DEFINE_SPINLOCK(ib_uverbs_idr_lock);
70DEFINE_IDR(ib_uverbs_pd_idr); 70DEFINE_IDR(ib_uverbs_pd_idr);
71DEFINE_IDR(ib_uverbs_mr_idr); 71DEFINE_IDR(ib_uverbs_mr_idr);
72DEFINE_IDR(ib_uverbs_mw_idr); 72DEFINE_IDR(ib_uverbs_mw_idr);
@@ -183,21 +183,21 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
183 if (!context) 183 if (!context)
184 return 0; 184 return 0;
185 185
186 mutex_lock(&ib_uverbs_idr_mutex);
187
188 list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { 186 list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
189 struct ib_ah *ah = idr_find(&ib_uverbs_ah_idr, uobj->id); 187 struct ib_ah *ah = uobj->object;
190 idr_remove(&ib_uverbs_ah_idr, uobj->id); 188
189 idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
191 ib_destroy_ah(ah); 190 ib_destroy_ah(ah);
192 list_del(&uobj->list); 191 list_del(&uobj->list);
193 kfree(uobj); 192 kfree(uobj);
194 } 193 }
195 194
196 list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { 195 list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
197 struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id); 196 struct ib_qp *qp = uobj->object;
198 struct ib_uqp_object *uqp = 197 struct ib_uqp_object *uqp =
199 container_of(uobj, struct ib_uqp_object, uevent.uobject); 198 container_of(uobj, struct ib_uqp_object, uevent.uobject);
200 idr_remove(&ib_uverbs_qp_idr, uobj->id); 199
200 idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
201 ib_uverbs_detach_umcast(qp, uqp); 201 ib_uverbs_detach_umcast(qp, uqp);
202 ib_destroy_qp(qp); 202 ib_destroy_qp(qp);
203 list_del(&uobj->list); 203 list_del(&uobj->list);
@@ -206,11 +206,12 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
206 } 206 }
207 207
208 list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { 208 list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
209 struct ib_cq *cq = idr_find(&ib_uverbs_cq_idr, uobj->id); 209 struct ib_cq *cq = uobj->object;
210 struct ib_uverbs_event_file *ev_file = cq->cq_context; 210 struct ib_uverbs_event_file *ev_file = cq->cq_context;
211 struct ib_ucq_object *ucq = 211 struct ib_ucq_object *ucq =
212 container_of(uobj, struct ib_ucq_object, uobject); 212 container_of(uobj, struct ib_ucq_object, uobject);
213 idr_remove(&ib_uverbs_cq_idr, uobj->id); 213
214 idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
214 ib_destroy_cq(cq); 215 ib_destroy_cq(cq);
215 list_del(&uobj->list); 216 list_del(&uobj->list);
216 ib_uverbs_release_ucq(file, ev_file, ucq); 217 ib_uverbs_release_ucq(file, ev_file, ucq);
@@ -218,10 +219,11 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
218 } 219 }
219 220
220 list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { 221 list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
221 struct ib_srq *srq = idr_find(&ib_uverbs_srq_idr, uobj->id); 222 struct ib_srq *srq = uobj->object;
222 struct ib_uevent_object *uevent = 223 struct ib_uevent_object *uevent =
223 container_of(uobj, struct ib_uevent_object, uobject); 224 container_of(uobj, struct ib_uevent_object, uobject);
224 idr_remove(&ib_uverbs_srq_idr, uobj->id); 225
226 idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
225 ib_destroy_srq(srq); 227 ib_destroy_srq(srq);
226 list_del(&uobj->list); 228 list_del(&uobj->list);
227 ib_uverbs_release_uevent(file, uevent); 229 ib_uverbs_release_uevent(file, uevent);
@@ -231,11 +233,11 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
231 /* XXX Free MWs */ 233 /* XXX Free MWs */
232 234
233 list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { 235 list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
234 struct ib_mr *mr = idr_find(&ib_uverbs_mr_idr, uobj->id); 236 struct ib_mr *mr = uobj->object;
235 struct ib_device *mrdev = mr->device; 237 struct ib_device *mrdev = mr->device;
236 struct ib_umem_object *memobj; 238 struct ib_umem_object *memobj;
237 239
238 idr_remove(&ib_uverbs_mr_idr, uobj->id); 240 idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
239 ib_dereg_mr(mr); 241 ib_dereg_mr(mr);
240 242
241 memobj = container_of(uobj, struct ib_umem_object, uobject); 243 memobj = container_of(uobj, struct ib_umem_object, uobject);
@@ -246,15 +248,14 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
246 } 248 }
247 249
248 list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { 250 list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
249 struct ib_pd *pd = idr_find(&ib_uverbs_pd_idr, uobj->id); 251 struct ib_pd *pd = uobj->object;
250 idr_remove(&ib_uverbs_pd_idr, uobj->id); 252
253 idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
251 ib_dealloc_pd(pd); 254 ib_dealloc_pd(pd);
252 list_del(&uobj->list); 255 list_del(&uobj->list);
253 kfree(uobj); 256 kfree(uobj);
254 } 257 }
255 258
256 mutex_unlock(&ib_uverbs_idr_mutex);
257
258 return context->device->dealloc_ucontext(context); 259 return context->device->dealloc_ucontext(context);
259} 260}
260 261
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
new file mode 100644
index 000000000000..ce46b13ae02b
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -0,0 +1,138 @@
1/*
2 * Copyright (c) 2005 Intel Corporation. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <rdma/ib_marshall.h>
34
35static void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
36 struct ib_ah_attr *src)
37{
38 memcpy(dst->grh.dgid, src->grh.dgid.raw, sizeof src->grh.dgid);
39 dst->grh.flow_label = src->grh.flow_label;
40 dst->grh.sgid_index = src->grh.sgid_index;
41 dst->grh.hop_limit = src->grh.hop_limit;
42 dst->grh.traffic_class = src->grh.traffic_class;
43 dst->dlid = src->dlid;
44 dst->sl = src->sl;
45 dst->src_path_bits = src->src_path_bits;
46 dst->static_rate = src->static_rate;
47 dst->is_global = src->ah_flags & IB_AH_GRH ? 1 : 0;
48 dst->port_num = src->port_num;
49}
50
51void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
52 struct ib_qp_attr *src)
53{
54 dst->cur_qp_state = src->cur_qp_state;
55 dst->path_mtu = src->path_mtu;
56 dst->path_mig_state = src->path_mig_state;
57 dst->qkey = src->qkey;
58 dst->rq_psn = src->rq_psn;
59 dst->sq_psn = src->sq_psn;
60 dst->dest_qp_num = src->dest_qp_num;
61 dst->qp_access_flags = src->qp_access_flags;
62
63 dst->max_send_wr = src->cap.max_send_wr;
64 dst->max_recv_wr = src->cap.max_recv_wr;
65 dst->max_send_sge = src->cap.max_send_sge;
66 dst->max_recv_sge = src->cap.max_recv_sge;
67 dst->max_inline_data = src->cap.max_inline_data;
68
69 ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr);
70 ib_copy_ah_attr_to_user(&dst->alt_ah_attr, &src->alt_ah_attr);
71
72 dst->pkey_index = src->pkey_index;
73 dst->alt_pkey_index = src->alt_pkey_index;
74 dst->en_sqd_async_notify = src->en_sqd_async_notify;
75 dst->sq_draining = src->sq_draining;
76 dst->max_rd_atomic = src->max_rd_atomic;
77 dst->max_dest_rd_atomic = src->max_dest_rd_atomic;
78 dst->min_rnr_timer = src->min_rnr_timer;
79 dst->port_num = src->port_num;
80 dst->timeout = src->timeout;
81 dst->retry_cnt = src->retry_cnt;
82 dst->rnr_retry = src->rnr_retry;
83 dst->alt_port_num = src->alt_port_num;
84 dst->alt_timeout = src->alt_timeout;
85}
86EXPORT_SYMBOL(ib_copy_qp_attr_to_user);
87
88void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
89 struct ib_sa_path_rec *src)
90{
91 memcpy(dst->dgid, src->dgid.raw, sizeof src->dgid);
92 memcpy(dst->sgid, src->sgid.raw, sizeof src->sgid);
93
94 dst->dlid = src->dlid;
95 dst->slid = src->slid;
96 dst->raw_traffic = src->raw_traffic;
97 dst->flow_label = src->flow_label;
98 dst->hop_limit = src->hop_limit;
99 dst->traffic_class = src->traffic_class;
100 dst->reversible = src->reversible;
101 dst->numb_path = src->numb_path;
102 dst->pkey = src->pkey;
103 dst->sl = src->sl;
104 dst->mtu_selector = src->mtu_selector;
105 dst->mtu = src->mtu;
106 dst->rate_selector = src->rate_selector;
107 dst->rate = src->rate;
108 dst->packet_life_time = src->packet_life_time;
109 dst->preference = src->preference;
110 dst->packet_life_time_selector = src->packet_life_time_selector;
111}
112EXPORT_SYMBOL(ib_copy_path_rec_to_user);
113
114void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
115 struct ib_user_path_rec *src)
116{
117 memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid);
118 memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid);
119
120 dst->dlid = src->dlid;
121 dst->slid = src->slid;
122 dst->raw_traffic = src->raw_traffic;
123 dst->flow_label = src->flow_label;
124 dst->hop_limit = src->hop_limit;
125 dst->traffic_class = src->traffic_class;
126 dst->reversible = src->reversible;
127 dst->numb_path = src->numb_path;
128 dst->pkey = src->pkey;
129 dst->sl = src->sl;
130 dst->mtu_selector = src->mtu_selector;
131 dst->mtu = src->mtu;
132 dst->rate_selector = src->rate_selector;
133 dst->rate = src->rate;
134 dst->packet_life_time = src->packet_life_time;
135 dst->preference = src->preference;
136 dst->packet_life_time_selector = src->packet_life_time_selector;
137}
138EXPORT_SYMBOL(ib_copy_path_rec_from_user);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index b78e7dc69330..468999c38803 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -125,35 +125,47 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
125} 125}
126EXPORT_SYMBOL(ib_create_ah); 126EXPORT_SYMBOL(ib_create_ah);
127 127
128struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc, 128int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
129 struct ib_grh *grh, u8 port_num) 129 struct ib_grh *grh, struct ib_ah_attr *ah_attr)
130{ 130{
131 struct ib_ah_attr ah_attr;
132 u32 flow_class; 131 u32 flow_class;
133 u16 gid_index; 132 u16 gid_index;
134 int ret; 133 int ret;
135 134
136 memset(&ah_attr, 0, sizeof ah_attr); 135 memset(ah_attr, 0, sizeof *ah_attr);
137 ah_attr.dlid = wc->slid; 136 ah_attr->dlid = wc->slid;
138 ah_attr.sl = wc->sl; 137 ah_attr->sl = wc->sl;
139 ah_attr.src_path_bits = wc->dlid_path_bits; 138 ah_attr->src_path_bits = wc->dlid_path_bits;
140 ah_attr.port_num = port_num; 139 ah_attr->port_num = port_num;
141 140
142 if (wc->wc_flags & IB_WC_GRH) { 141 if (wc->wc_flags & IB_WC_GRH) {
143 ah_attr.ah_flags = IB_AH_GRH; 142 ah_attr->ah_flags = IB_AH_GRH;
144 ah_attr.grh.dgid = grh->sgid; 143 ah_attr->grh.dgid = grh->sgid;
145 144
146 ret = ib_find_cached_gid(pd->device, &grh->dgid, &port_num, 145 ret = ib_find_cached_gid(device, &grh->dgid, &port_num,
147 &gid_index); 146 &gid_index);
148 if (ret) 147 if (ret)
149 return ERR_PTR(ret); 148 return ret;
150 149
151 ah_attr.grh.sgid_index = (u8) gid_index; 150 ah_attr->grh.sgid_index = (u8) gid_index;
152 flow_class = be32_to_cpu(grh->version_tclass_flow); 151 flow_class = be32_to_cpu(grh->version_tclass_flow);
153 ah_attr.grh.flow_label = flow_class & 0xFFFFF; 152 ah_attr->grh.flow_label = flow_class & 0xFFFFF;
154 ah_attr.grh.traffic_class = (flow_class >> 20) & 0xFF; 153 ah_attr->grh.hop_limit = grh->hop_limit;
155 ah_attr.grh.hop_limit = grh->hop_limit; 154 ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
156 } 155 }
156 return 0;
157}
158EXPORT_SYMBOL(ib_init_ah_from_wc);
159
160struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc,
161 struct ib_grh *grh, u8 port_num)
162{
163 struct ib_ah_attr ah_attr;
164 int ret;
165
166 ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr);
167 if (ret)
168 return ERR_PTR(ret);
157 169
158 return ib_create_ah(pd, &ah_attr); 170 return ib_create_ah(pd, &ah_attr);
159} 171}
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index f7f8391fe43f..1a9d0a2c33c3 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -137,47 +137,11 @@ static int recv_subn_get_guidinfo(struct ib_smp *smp,
137 return reply(smp); 137 return reply(smp);
138} 138}
139 139
140struct port_info {
141 __be64 mkey;
142 __be64 gid_prefix;
143 __be16 lid;
144 __be16 sm_lid;
145 __be32 cap_mask;
146 __be16 diag_code;
147 __be16 mkey_lease_period;
148 u8 local_port_num;
149 u8 link_width_enabled;
150 u8 link_width_supported;
151 u8 link_width_active;
152 u8 linkspeed_portstate; /* 4 bits, 4 bits */
153 u8 portphysstate_linkdown; /* 4 bits, 4 bits */
154 u8 mkeyprot_resv_lmc; /* 2 bits, 3, 3 */
155 u8 linkspeedactive_enabled; /* 4 bits, 4 bits */
156 u8 neighbormtu_mastersmsl; /* 4 bits, 4 bits */
157 u8 vlcap_inittype; /* 4 bits, 4 bits */
158 u8 vl_high_limit;
159 u8 vl_arb_high_cap;
160 u8 vl_arb_low_cap;
161 u8 inittypereply_mtucap; /* 4 bits, 4 bits */
162 u8 vlstallcnt_hoqlife; /* 3 bits, 5 bits */
163 u8 operationalvl_pei_peo_fpi_fpo; /* 4 bits, 1, 1, 1, 1 */
164 __be16 mkey_violations;
165 __be16 pkey_violations;
166 __be16 qkey_violations;
167 u8 guid_cap;
168 u8 clientrereg_resv_subnetto; /* 1 bit, 2 bits, 5 */
169 u8 resv_resptimevalue; /* 3 bits, 5 bits */
170 u8 localphyerrors_overrunerrors; /* 4 bits, 4 bits */
171 __be16 max_credit_hint;
172 u8 resv;
173 u8 link_roundtrip_latency[3];
174} __attribute__ ((packed));
175
176static int recv_subn_get_portinfo(struct ib_smp *smp, 140static int recv_subn_get_portinfo(struct ib_smp *smp,
177 struct ib_device *ibdev, u8 port) 141 struct ib_device *ibdev, u8 port)
178{ 142{
179 struct ipath_ibdev *dev; 143 struct ipath_ibdev *dev;
180 struct port_info *pip = (struct port_info *)smp->data; 144 struct ib_port_info *pip = (struct ib_port_info *)smp->data;
181 u16 lid; 145 u16 lid;
182 u8 ibcstat; 146 u8 ibcstat;
183 u8 mtu; 147 u8 mtu;
@@ -312,7 +276,7 @@ static int recv_subn_set_guidinfo(struct ib_smp *smp,
312static int recv_subn_set_portinfo(struct ib_smp *smp, 276static int recv_subn_set_portinfo(struct ib_smp *smp,
313 struct ib_device *ibdev, u8 port) 277 struct ib_device *ibdev, u8 port)
314{ 278{
315 struct port_info *pip = (struct port_info *)smp->data; 279 struct ib_port_info *pip = (struct ib_port_info *)smp->data;
316 struct ib_event event; 280 struct ib_event event;
317 struct ipath_ibdev *dev; 281 struct ipath_ibdev *dev;
318 u32 flags; 282 u32 flags;
@@ -445,7 +409,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
445 409
446 if (pip->clientrereg_resv_subnetto & 0x80) { 410 if (pip->clientrereg_resv_subnetto & 0x80) {
447 clientrereg = 1; 411 clientrereg = 1;
448 event.event = IB_EVENT_LID_CHANGE; 412 event.event = IB_EVENT_CLIENT_REREGISTER;
449 ib_dispatch_event(&event); 413 ib_dispatch_event(&event);
450 } 414 }
451 415
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 798e13e14faf..d0f7731802c9 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -174,7 +174,6 @@ enum {
174 174
175struct mthca_cmd_context { 175struct mthca_cmd_context {
176 struct completion done; 176 struct completion done;
177 struct timer_list timer;
178 int result; 177 int result;
179 int next; 178 int next;
180 u64 out_param; 179 u64 out_param;
@@ -362,15 +361,6 @@ void mthca_cmd_event(struct mthca_dev *dev,
362 complete(&context->done); 361 complete(&context->done);
363} 362}
364 363
365static void event_timeout(unsigned long context_ptr)
366{
367 struct mthca_cmd_context *context =
368 (struct mthca_cmd_context *) context_ptr;
369
370 context->result = -EBUSY;
371 complete(&context->done);
372}
373
374static int mthca_cmd_wait(struct mthca_dev *dev, 364static int mthca_cmd_wait(struct mthca_dev *dev,
375 u64 in_param, 365 u64 in_param,
376 u64 *out_param, 366 u64 *out_param,
@@ -401,11 +391,10 @@ static int mthca_cmd_wait(struct mthca_dev *dev,
401 if (err) 391 if (err)
402 goto out; 392 goto out;
403 393
404 context->timer.expires = jiffies + timeout; 394 if (!wait_for_completion_timeout(&context->done, timeout)) {
405 add_timer(&context->timer); 395 err = -EBUSY;
406 396 goto out;
407 wait_for_completion(&context->done); 397 }
408 del_timer_sync(&context->timer);
409 398
410 err = context->result; 399 err = context->result;
411 if (err) 400 if (err)
@@ -535,10 +524,6 @@ int mthca_cmd_use_events(struct mthca_dev *dev)
535 for (i = 0; i < dev->cmd.max_cmds; ++i) { 524 for (i = 0; i < dev->cmd.max_cmds; ++i) {
536 dev->cmd.context[i].token = i; 525 dev->cmd.context[i].token = i;
537 dev->cmd.context[i].next = i + 1; 526 dev->cmd.context[i].next = i + 1;
538 init_timer(&dev->cmd.context[i].timer);
539 dev->cmd.context[i].timer.data =
540 (unsigned long) &dev->cmd.context[i];
541 dev->cmd.context[i].timer.function = event_timeout;
542 } 527 }
543 528
544 dev->cmd.context[dev->cmd.max_cmds - 1].next = -1; 529 dev->cmd.context[dev->cmd.max_cmds - 1].next = -1;
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 205854e9c662..3e27a084257e 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -540,8 +540,17 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
540 entry->wr_id = srq->wrid[wqe_index]; 540 entry->wr_id = srq->wrid[wqe_index];
541 mthca_free_srq_wqe(srq, wqe); 541 mthca_free_srq_wqe(srq, wqe);
542 } else { 542 } else {
543 s32 wqe;
543 wq = &(*cur_qp)->rq; 544 wq = &(*cur_qp)->rq;
544 wqe_index = be32_to_cpu(cqe->wqe) >> wq->wqe_shift; 545 wqe = be32_to_cpu(cqe->wqe);
546 wqe_index = wqe >> wq->wqe_shift;
547 /*
548 * WQE addr == base - 1 might be reported in receive completion
549 * with error instead of (rq size - 1) by Sinai FW 1.0.800 and
550 * Arbel FW 5.1.400. This bug should be fixed in later FW revs.
551 */
552 if (unlikely(wqe_index < 0))
553 wqe_index = wq->max - 1;
545 entry->wr_id = (*cur_qp)->wrid[wqe_index]; 554 entry->wr_id = (*cur_qp)->wrid[wqe_index];
546 } 555 }
547 556
@@ -813,6 +822,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
813 spin_lock_init(&cq->lock); 822 spin_lock_init(&cq->lock);
814 cq->refcount = 1; 823 cq->refcount = 1;
815 init_waitqueue_head(&cq->wait); 824 init_waitqueue_head(&cq->wait);
825 mutex_init(&cq->mutex);
816 826
817 memset(cq_context, 0, sizeof *cq_context); 827 memset(cq_context, 0, sizeof *cq_context);
818 cq_context->flags = cpu_to_be32(MTHCA_CQ_STATUS_OK | 828 cq_context->flags = cpu_to_be32(MTHCA_CQ_STATUS_OK |
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 99f109c3815d..d536217e700e 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -695,10 +695,6 @@ static void mthca_unmap_reg(struct mthca_dev *dev, unsigned long offset,
695 695
696static int __devinit mthca_map_eq_regs(struct mthca_dev *dev) 696static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
697{ 697{
698 unsigned long mthca_base;
699
700 mthca_base = pci_resource_start(dev->pdev, 0);
701
702 if (mthca_is_memfree(dev)) { 698 if (mthca_is_memfree(dev)) {
703 /* 699 /*
704 * We assume that the EQ arm and EQ set CI registers 700 * We assume that the EQ arm and EQ set CI registers
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 4730863ece9a..d9bc030bcccc 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -114,14 +114,22 @@ static void smp_snoop(struct ib_device *ibdev,
114 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && 114 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
115 mad->mad_hdr.method == IB_MGMT_METHOD_SET) { 115 mad->mad_hdr.method == IB_MGMT_METHOD_SET) {
116 if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) { 116 if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
117 struct ib_port_info *pinfo =
118 (struct ib_port_info *) ((struct ib_smp *) mad)->data;
119
117 mthca_update_rate(to_mdev(ibdev), port_num); 120 mthca_update_rate(to_mdev(ibdev), port_num);
118 update_sm_ah(to_mdev(ibdev), port_num, 121 update_sm_ah(to_mdev(ibdev), port_num,
119 be16_to_cpup((__be16 *) (mad->data + 58)), 122 be16_to_cpu(pinfo->lid),
120 (*(u8 *) (mad->data + 76)) & 0xf); 123 pinfo->neighbormtu_mastersmsl & 0xf);
121 124
122 event.device = ibdev; 125 event.device = ibdev;
123 event.event = IB_EVENT_LID_CHANGE;
124 event.element.port_num = port_num; 126 event.element.port_num = port_num;
127
128 if(pinfo->clientrereg_resv_subnetto & 0x80)
129 event.event = IB_EVENT_CLIENT_REREGISTER;
130 else
131 event.event = IB_EVENT_LID_CHANGE;
132
125 ib_dispatch_event(&event); 133 ib_dispatch_event(&event);
126 } 134 }
127 135
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index a2eae8a30167..230ae21db8fd 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -115,6 +115,16 @@ static int mthca_query_device(struct ib_device *ibdev,
115 props->max_mcast_qp_attach = MTHCA_QP_PER_MGM; 115 props->max_mcast_qp_attach = MTHCA_QP_PER_MGM;
116 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 116 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
117 props->max_mcast_grp; 117 props->max_mcast_grp;
118 /*
119 * If Sinai memory key optimization is being used, then only
120 * the 8-bit key portion will change. For other HCAs, the
121 * unused index bits will also be used for FMR remapping.
122 */
123 if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
124 props->max_map_per_fmr = 255;
125 else
126 props->max_map_per_fmr =
127 (1 << (32 - long_log2(mdev->limits.num_mpts))) - 1;
118 128
119 err = 0; 129 err = 0;
120 out: 130 out:
@@ -783,18 +793,24 @@ static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *uda
783 if (entries < 1 || entries > dev->limits.max_cqes) 793 if (entries < 1 || entries > dev->limits.max_cqes)
784 return -EINVAL; 794 return -EINVAL;
785 795
796 mutex_lock(&cq->mutex);
797
786 entries = roundup_pow_of_two(entries + 1); 798 entries = roundup_pow_of_two(entries + 1);
787 if (entries == ibcq->cqe + 1) 799 if (entries == ibcq->cqe + 1) {
788 return 0; 800 ret = 0;
801 goto out;
802 }
789 803
790 if (cq->is_kernel) { 804 if (cq->is_kernel) {
791 ret = mthca_alloc_resize_buf(dev, cq, entries); 805 ret = mthca_alloc_resize_buf(dev, cq, entries);
792 if (ret) 806 if (ret)
793 return ret; 807 goto out;
794 lkey = cq->resize_buf->buf.mr.ibmr.lkey; 808 lkey = cq->resize_buf->buf.mr.ibmr.lkey;
795 } else { 809 } else {
796 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) 810 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
797 return -EFAULT; 811 ret = -EFAULT;
812 goto out;
813 }
798 lkey = ucmd.lkey; 814 lkey = ucmd.lkey;
799 } 815 }
800 816
@@ -811,7 +827,7 @@ static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *uda
811 cq->resize_buf = NULL; 827 cq->resize_buf = NULL;
812 spin_unlock_irq(&cq->lock); 828 spin_unlock_irq(&cq->lock);
813 } 829 }
814 return ret; 830 goto out;
815 } 831 }
816 832
817 if (cq->is_kernel) { 833 if (cq->is_kernel) {
@@ -838,7 +854,10 @@ static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *uda
838 } else 854 } else
839 ibcq->cqe = entries - 1; 855 ibcq->cqe = entries - 1;
840 856
841 return 0; 857out:
858 mutex_unlock(&cq->mutex);
859
860 return ret;
842} 861}
843 862
844static int mthca_destroy_cq(struct ib_cq *cq) 863static int mthca_destroy_cq(struct ib_cq *cq)
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 179a8f610d0f..8de2887ba15c 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -214,6 +214,7 @@ struct mthca_cq {
214 int arm_sn; 214 int arm_sn;
215 215
216 wait_queue_head_t wait; 216 wait_queue_head_t wait;
217 struct mutex mutex;
217}; 218};
218 219
219struct mthca_srq { 220struct mthca_srq {
@@ -237,6 +238,7 @@ struct mthca_srq {
237 struct mthca_mr mr; 238 struct mthca_mr mr;
238 239
239 wait_queue_head_t wait; 240 wait_queue_head_t wait;
241 struct mutex mutex;
240}; 242};
241 243
242struct mthca_wq { 244struct mthca_wq {
@@ -278,6 +280,7 @@ struct mthca_qp {
278 union mthca_buf queue; 280 union mthca_buf queue;
279 281
280 wait_queue_head_t wait; 282 wait_queue_head_t wait;
283 struct mutex mutex;
281}; 284};
282 285
283struct mthca_sqp { 286struct mthca_sqp {
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 07c13be07a4a..16c387d8170c 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -534,7 +534,9 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
534 struct mthca_qp_context *qp_context; 534 struct mthca_qp_context *qp_context;
535 u32 sqd_event = 0; 535 u32 sqd_event = 0;
536 u8 status; 536 u8 status;
537 int err; 537 int err = -EINVAL;
538
539 mutex_lock(&qp->mutex);
538 540
539 if (attr_mask & IB_QP_CUR_STATE) { 541 if (attr_mask & IB_QP_CUR_STATE) {
540 cur_state = attr->cur_qp_state; 542 cur_state = attr->cur_qp_state;
@@ -553,39 +555,41 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
553 "%d->%d with attr 0x%08x\n", 555 "%d->%d with attr 0x%08x\n",
554 qp->transport, cur_state, new_state, 556 qp->transport, cur_state, new_state,
555 attr_mask); 557 attr_mask);
556 return -EINVAL; 558 goto out;
557 } 559 }
558 560
559 if ((attr_mask & IB_QP_PKEY_INDEX) && 561 if ((attr_mask & IB_QP_PKEY_INDEX) &&
560 attr->pkey_index >= dev->limits.pkey_table_len) { 562 attr->pkey_index >= dev->limits.pkey_table_len) {
561 mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n", 563 mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
562 attr->pkey_index, dev->limits.pkey_table_len-1); 564 attr->pkey_index, dev->limits.pkey_table_len-1);
563 return -EINVAL; 565 goto out;
564 } 566 }
565 567
566 if ((attr_mask & IB_QP_PORT) && 568 if ((attr_mask & IB_QP_PORT) &&
567 (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) { 569 (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {
568 mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num); 570 mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num);
569 return -EINVAL; 571 goto out;
570 } 572 }
571 573
572 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && 574 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
573 attr->max_rd_atomic > dev->limits.max_qp_init_rdma) { 575 attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {
574 mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n", 576 mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n",
575 attr->max_rd_atomic, dev->limits.max_qp_init_rdma); 577 attr->max_rd_atomic, dev->limits.max_qp_init_rdma);
576 return -EINVAL; 578 goto out;
577 } 579 }
578 580
579 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && 581 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
580 attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) { 582 attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {
581 mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n", 583 mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n",
582 attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift); 584 attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift);
583 return -EINVAL; 585 goto out;
584 } 586 }
585 587
586 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); 588 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
587 if (IS_ERR(mailbox)) 589 if (IS_ERR(mailbox)) {
588 return PTR_ERR(mailbox); 590 err = PTR_ERR(mailbox);
591 goto out;
592 }
589 qp_param = mailbox->buf; 593 qp_param = mailbox->buf;
590 qp_context = &qp_param->context; 594 qp_context = &qp_param->context;
591 memset(qp_param, 0, sizeof *qp_param); 595 memset(qp_param, 0, sizeof *qp_param);
@@ -618,7 +622,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
618 if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_2048) { 622 if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_2048) {
619 mthca_dbg(dev, "path MTU (%u) is invalid\n", 623 mthca_dbg(dev, "path MTU (%u) is invalid\n",
620 attr->path_mtu); 624 attr->path_mtu);
621 return -EINVAL; 625 goto out_mailbox;
622 } 626 }
623 qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31; 627 qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31;
624 } 628 }
@@ -672,7 +676,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
672 if (attr_mask & IB_QP_AV) { 676 if (attr_mask & IB_QP_AV) {
673 if (mthca_path_set(dev, &attr->ah_attr, &qp_context->pri_path, 677 if (mthca_path_set(dev, &attr->ah_attr, &qp_context->pri_path,
674 attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) 678 attr_mask & IB_QP_PORT ? attr->port_num : qp->port))
675 return -EINVAL; 679 goto out_mailbox;
676 680
677 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH); 681 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
678 } 682 }
@@ -686,18 +690,18 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
686 if (attr->alt_pkey_index >= dev->limits.pkey_table_len) { 690 if (attr->alt_pkey_index >= dev->limits.pkey_table_len) {
687 mthca_dbg(dev, "Alternate P_Key index (%u) too large. max is %d\n", 691 mthca_dbg(dev, "Alternate P_Key index (%u) too large. max is %d\n",
688 attr->alt_pkey_index, dev->limits.pkey_table_len-1); 692 attr->alt_pkey_index, dev->limits.pkey_table_len-1);
689 return -EINVAL; 693 goto out_mailbox;
690 } 694 }
691 695
692 if (attr->alt_port_num == 0 || attr->alt_port_num > dev->limits.num_ports) { 696 if (attr->alt_port_num == 0 || attr->alt_port_num > dev->limits.num_ports) {
693 mthca_dbg(dev, "Alternate port number (%u) is invalid\n", 697 mthca_dbg(dev, "Alternate port number (%u) is invalid\n",
694 attr->alt_port_num); 698 attr->alt_port_num);
695 return -EINVAL; 699 goto out_mailbox;
696 } 700 }
697 701
698 if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path, 702 if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path,
699 attr->alt_ah_attr.port_num)) 703 attr->alt_ah_attr.port_num))
700 return -EINVAL; 704 goto out_mailbox;
701 705
702 qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index | 706 qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index |
703 attr->alt_port_num << 24); 707 attr->alt_port_num << 24);
@@ -793,12 +797,12 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
793 err = mthca_MODIFY_QP(dev, cur_state, new_state, qp->qpn, 0, 797 err = mthca_MODIFY_QP(dev, cur_state, new_state, qp->qpn, 0,
794 mailbox, sqd_event, &status); 798 mailbox, sqd_event, &status);
795 if (err) 799 if (err)
796 goto out; 800 goto out_mailbox;
797 if (status) { 801 if (status) {
798 mthca_warn(dev, "modify QP %d->%d returned status %02x.\n", 802 mthca_warn(dev, "modify QP %d->%d returned status %02x.\n",
799 cur_state, new_state, status); 803 cur_state, new_state, status);
800 err = -EINVAL; 804 err = -EINVAL;
801 goto out; 805 goto out_mailbox;
802 } 806 }
803 807
804 qp->state = new_state; 808 qp->state = new_state;
@@ -853,8 +857,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
853 } 857 }
854 } 858 }
855 859
856out: 860out_mailbox:
857 mthca_free_mailbox(dev, mailbox); 861 mthca_free_mailbox(dev, mailbox);
862
863out:
864 mutex_unlock(&qp->mutex);
858 return err; 865 return err;
859} 866}
860 867
@@ -1100,6 +1107,7 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
1100 1107
1101 qp->refcount = 1; 1108 qp->refcount = 1;
1102 init_waitqueue_head(&qp->wait); 1109 init_waitqueue_head(&qp->wait);
1110 mutex_init(&qp->mutex);
1103 qp->state = IB_QPS_RESET; 1111 qp->state = IB_QPS_RESET;
1104 qp->atomic_rd_en = 0; 1112 qp->atomic_rd_en = 0;
1105 qp->resp_depth = 0; 1113 qp->resp_depth = 0;
diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c
index df5e494a9d38..f4fddd5327f5 100644
--- a/drivers/infiniband/hw/mthca/mthca_reset.c
+++ b/drivers/infiniband/hw/mthca/mthca_reset.c
@@ -49,6 +49,12 @@ int mthca_reset(struct mthca_dev *mdev)
49 u32 *hca_header = NULL; 49 u32 *hca_header = NULL;
50 u32 *bridge_header = NULL; 50 u32 *bridge_header = NULL;
51 struct pci_dev *bridge = NULL; 51 struct pci_dev *bridge = NULL;
52 int bridge_pcix_cap = 0;
53 int hca_pcie_cap = 0;
54 int hca_pcix_cap = 0;
55
56 u16 devctl;
57 u16 linkctl;
52 58
53#define MTHCA_RESET_OFFSET 0xf0010 59#define MTHCA_RESET_OFFSET 0xf0010
54#define MTHCA_RESET_VALUE swab32(1) 60#define MTHCA_RESET_VALUE swab32(1)
@@ -110,6 +116,9 @@ int mthca_reset(struct mthca_dev *mdev)
110 } 116 }
111 } 117 }
112 118
119 hca_pcix_cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX);
120 hca_pcie_cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP);
121
113 if (bridge) { 122 if (bridge) {
114 bridge_header = kmalloc(256, GFP_KERNEL); 123 bridge_header = kmalloc(256, GFP_KERNEL);
115 if (!bridge_header) { 124 if (!bridge_header) {
@@ -129,6 +138,13 @@ int mthca_reset(struct mthca_dev *mdev)
129 goto out; 138 goto out;
130 } 139 }
131 } 140 }
141 bridge_pcix_cap = pci_find_capability(bridge, PCI_CAP_ID_PCIX);
142 if (!bridge_pcix_cap) {
143 err = -ENODEV;
144 mthca_err(mdev, "Couldn't locate HCA bridge "
145 "PCI-X capability, aborting.\n");
146 goto out;
147 }
132 } 148 }
133 149
134 /* actually hit reset */ 150 /* actually hit reset */
@@ -178,6 +194,20 @@ int mthca_reset(struct mthca_dev *mdev)
178good: 194good:
179 /* Now restore the PCI headers */ 195 /* Now restore the PCI headers */
180 if (bridge) { 196 if (bridge) {
197 if (pci_write_config_dword(bridge, bridge_pcix_cap + 0x8,
198 bridge_header[(bridge_pcix_cap + 0x8) / 4])) {
199 err = -ENODEV;
200 mthca_err(mdev, "Couldn't restore HCA bridge Upstream "
201 "split transaction control, aborting.\n");
202 goto out;
203 }
204 if (pci_write_config_dword(bridge, bridge_pcix_cap + 0xc,
205 bridge_header[(bridge_pcix_cap + 0xc) / 4])) {
206 err = -ENODEV;
207 mthca_err(mdev, "Couldn't restore HCA bridge Downstream "
208 "split transaction control, aborting.\n");
209 goto out;
210 }
181 /* 211 /*
182 * Bridge control register is at 0x3e, so we'll 212 * Bridge control register is at 0x3e, so we'll
183 * naturally restore it last in this loop. 213 * naturally restore it last in this loop.
@@ -203,6 +233,35 @@ good:
203 } 233 }
204 } 234 }
205 235
236 if (hca_pcix_cap) {
237 if (pci_write_config_dword(mdev->pdev, hca_pcix_cap,
238 hca_header[hca_pcix_cap / 4])) {
239 err = -ENODEV;
240 mthca_err(mdev, "Couldn't restore HCA PCI-X "
241 "command register, aborting.\n");
242 goto out;
243 }
244 }
245
246 if (hca_pcie_cap) {
247 devctl = hca_header[(hca_pcie_cap + PCI_EXP_DEVCTL) / 4];
248 if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_DEVCTL,
249 devctl)) {
250 err = -ENODEV;
251 mthca_err(mdev, "Couldn't restore HCA PCI Express "
252 "Device Control register, aborting.\n");
253 goto out;
254 }
255 linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4];
256 if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_LNKCTL,
257 linkctl)) {
258 err = -ENODEV;
259 mthca_err(mdev, "Couldn't restore HCA PCI Express "
260 "Link control register, aborting.\n");
261 goto out;
262 }
263 }
264
206 for (i = 0; i < 16; ++i) { 265 for (i = 0; i < 16; ++i) {
207 if (i * 4 == PCI_COMMAND) 266 if (i * 4 == PCI_COMMAND)
208 continue; 267 continue;
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index b292fefa3b41..fab417c5cf43 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -243,6 +243,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
243 spin_lock_init(&srq->lock); 243 spin_lock_init(&srq->lock);
244 srq->refcount = 1; 244 srq->refcount = 1;
245 init_waitqueue_head(&srq->wait); 245 init_waitqueue_head(&srq->wait);
246 mutex_init(&srq->mutex);
246 247
247 if (mthca_is_memfree(dev)) 248 if (mthca_is_memfree(dev))
248 mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf); 249 mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf);
@@ -371,7 +372,11 @@ int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
371 if (attr_mask & IB_SRQ_LIMIT) { 372 if (attr_mask & IB_SRQ_LIMIT) {
372 if (attr->srq_limit > srq->max) 373 if (attr->srq_limit > srq->max)
373 return -EINVAL; 374 return -EINVAL;
375
376 mutex_lock(&srq->mutex);
374 ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status); 377 ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status);
378 mutex_unlock(&srq->mutex);
379
375 if (ret) 380 if (ret)
376 return ret; 381 return ret;
377 if (status) 382 if (status)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 12a1e0572ef2..491d2afaf5b4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -272,8 +272,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
272void ipoib_dev_cleanup(struct net_device *dev); 272void ipoib_dev_cleanup(struct net_device *dev);
273 273
274void ipoib_mcast_join_task(void *dev_ptr); 274void ipoib_mcast_join_task(void *dev_ptr);
275void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid, 275void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb);
276 struct sk_buff *skb);
277 276
278void ipoib_mcast_restart_task(void *dev_ptr); 277void ipoib_mcast_restart_task(void *dev_ptr);
279int ipoib_mcast_start_thread(struct net_device *dev); 278int ipoib_mcast_start_thread(struct net_device *dev);
@@ -369,15 +368,26 @@ extern int ipoib_debug_level;
369#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG_DATA */ 368#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG_DATA */
370 369
371 370
372#define IPOIB_GID_FMT "%x:%x:%x:%x:%x:%x:%x:%x" 371#define IPOIB_GID_FMT "%2.2x%2.2x:%2.2x%2.2x:%2.2x%2.2x:%2.2x%2.2x:" \
373 372 "%2.2x%2.2x:%2.2x%2.2x:%2.2x%2.2x:%2.2x%2.2x"
374#define IPOIB_GID_ARG(gid) be16_to_cpup((__be16 *) ((gid).raw + 0)), \ 373
375 be16_to_cpup((__be16 *) ((gid).raw + 2)), \ 374#define IPOIB_GID_RAW_ARG(gid) ((u8 *)(gid))[0], \
376 be16_to_cpup((__be16 *) ((gid).raw + 4)), \ 375 ((u8 *)(gid))[1], \
377 be16_to_cpup((__be16 *) ((gid).raw + 6)), \ 376 ((u8 *)(gid))[2], \
378 be16_to_cpup((__be16 *) ((gid).raw + 8)), \ 377 ((u8 *)(gid))[3], \
379 be16_to_cpup((__be16 *) ((gid).raw + 10)), \ 378 ((u8 *)(gid))[4], \
380 be16_to_cpup((__be16 *) ((gid).raw + 12)), \ 379 ((u8 *)(gid))[5], \
381 be16_to_cpup((__be16 *) ((gid).raw + 14)) 380 ((u8 *)(gid))[6], \
381 ((u8 *)(gid))[7], \
382 ((u8 *)(gid))[8], \
383 ((u8 *)(gid))[9], \
384 ((u8 *)(gid))[10],\
385 ((u8 *)(gid))[11],\
386 ((u8 *)(gid))[12],\
387 ((u8 *)(gid))[13],\
388 ((u8 *)(gid))[14],\
389 ((u8 *)(gid))[15]
390
391#define IPOIB_GID_ARG(gid) IPOIB_GID_RAW_ARG((gid).raw)
382 392
383#endif /* _IPOIB_H */ 393#endif /* _IPOIB_H */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 8406839b91cf..5033666b1481 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -84,15 +84,9 @@ void ipoib_free_ah(struct kref *kref)
84 84
85 unsigned long flags; 85 unsigned long flags;
86 86
87 if ((int) priv->tx_tail - (int) ah->last_send >= 0) { 87 spin_lock_irqsave(&priv->lock, flags);
88 ipoib_dbg(priv, "Freeing ah %p\n", ah->ah); 88 list_add_tail(&ah->list, &priv->dead_ahs);
89 ib_destroy_ah(ah->ah); 89 spin_unlock_irqrestore(&priv->lock, flags);
90 kfree(ah);
91 } else {
92 spin_lock_irqsave(&priv->lock, flags);
93 list_add_tail(&ah->list, &priv->dead_ahs);
94 spin_unlock_irqrestore(&priv->lock, flags);
95 }
96} 90}
97 91
98static int ipoib_ib_post_receive(struct net_device *dev, int id) 92static int ipoib_ib_post_receive(struct net_device *dev, int id)
@@ -377,19 +371,16 @@ static void __ipoib_reap_ah(struct net_device *dev)
377 struct ipoib_ah *ah, *tah; 371 struct ipoib_ah *ah, *tah;
378 LIST_HEAD(remove_list); 372 LIST_HEAD(remove_list);
379 373
380 spin_lock_irq(&priv->lock); 374 spin_lock_irq(&priv->tx_lock);
375 spin_lock(&priv->lock);
381 list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) 376 list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
382 if ((int) priv->tx_tail - (int) ah->last_send >= 0) { 377 if ((int) priv->tx_tail - (int) ah->last_send >= 0) {
383 list_del(&ah->list); 378 list_del(&ah->list);
384 list_add_tail(&ah->list, &remove_list); 379 ib_destroy_ah(ah->ah);
380 kfree(ah);
385 } 381 }
386 spin_unlock_irq(&priv->lock); 382 spin_unlock(&priv->lock);
387 383 spin_unlock_irq(&priv->tx_lock);
388 list_for_each_entry_safe(ah, tah, &remove_list, list) {
389 ipoib_dbg(priv, "Reaping ah %p\n", ah->ah);
390 ib_destroy_ah(ah->ah);
391 kfree(ah);
392 }
393} 384}
394 385
395void ipoib_reap_ah(void *dev_ptr) 386void ipoib_reap_ah(void *dev_ptr)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index cb078a7d0bf5..1c6ea1c682a5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -185,8 +185,7 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
185 return 0; 185 return 0;
186} 186}
187 187
188static struct ipoib_path *__path_find(struct net_device *dev, 188static struct ipoib_path *__path_find(struct net_device *dev, void *gid)
189 union ib_gid *gid)
190{ 189{
191 struct ipoib_dev_priv *priv = netdev_priv(dev); 190 struct ipoib_dev_priv *priv = netdev_priv(dev);
192 struct rb_node *n = priv->path_tree.rb_node; 191 struct rb_node *n = priv->path_tree.rb_node;
@@ -196,7 +195,7 @@ static struct ipoib_path *__path_find(struct net_device *dev,
196 while (n) { 195 while (n) {
197 path = rb_entry(n, struct ipoib_path, rb_node); 196 path = rb_entry(n, struct ipoib_path, rb_node);
198 197
199 ret = memcmp(gid->raw, path->pathrec.dgid.raw, 198 ret = memcmp(gid, path->pathrec.dgid.raw,
200 sizeof (union ib_gid)); 199 sizeof (union ib_gid));
201 200
202 if (ret < 0) 201 if (ret < 0)
@@ -424,8 +423,7 @@ static void path_rec_completion(int status,
424 } 423 }
425} 424}
426 425
427static struct ipoib_path *path_rec_create(struct net_device *dev, 426static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
428 union ib_gid *gid)
429{ 427{
430 struct ipoib_dev_priv *priv = netdev_priv(dev); 428 struct ipoib_dev_priv *priv = netdev_priv(dev);
431 struct ipoib_path *path; 429 struct ipoib_path *path;
@@ -440,7 +438,7 @@ static struct ipoib_path *path_rec_create(struct net_device *dev,
440 438
441 INIT_LIST_HEAD(&path->neigh_list); 439 INIT_LIST_HEAD(&path->neigh_list);
442 440
443 memcpy(path->pathrec.dgid.raw, gid->raw, sizeof (union ib_gid)); 441 memcpy(path->pathrec.dgid.raw, gid, sizeof (union ib_gid));
444 path->pathrec.sgid = priv->local_gid; 442 path->pathrec.sgid = priv->local_gid;
445 path->pathrec.pkey = cpu_to_be16(priv->pkey); 443 path->pathrec.pkey = cpu_to_be16(priv->pkey);
446 path->pathrec.numb_path = 1; 444 path->pathrec.numb_path = 1;
@@ -498,10 +496,9 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
498 */ 496 */
499 spin_lock(&priv->lock); 497 spin_lock(&priv->lock);
500 498
501 path = __path_find(dev, (union ib_gid *) (skb->dst->neighbour->ha + 4)); 499 path = __path_find(dev, skb->dst->neighbour->ha + 4);
502 if (!path) { 500 if (!path) {
503 path = path_rec_create(dev, 501 path = path_rec_create(dev, skb->dst->neighbour->ha + 4);
504 (union ib_gid *) (skb->dst->neighbour->ha + 4));
505 if (!path) 502 if (!path)
506 goto err_path; 503 goto err_path;
507 504
@@ -551,7 +548,7 @@ static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev)
551 /* Add in the P_Key for multicasts */ 548 /* Add in the P_Key for multicasts */
552 skb->dst->neighbour->ha[8] = (priv->pkey >> 8) & 0xff; 549 skb->dst->neighbour->ha[8] = (priv->pkey >> 8) & 0xff;
553 skb->dst->neighbour->ha[9] = priv->pkey & 0xff; 550 skb->dst->neighbour->ha[9] = priv->pkey & 0xff;
554 ipoib_mcast_send(dev, (union ib_gid *) (skb->dst->neighbour->ha + 4), skb); 551 ipoib_mcast_send(dev, skb->dst->neighbour->ha + 4, skb);
555} 552}
556 553
557static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, 554static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
@@ -566,10 +563,9 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
566 */ 563 */
567 spin_lock(&priv->lock); 564 spin_lock(&priv->lock);
568 565
569 path = __path_find(dev, (union ib_gid *) (phdr->hwaddr + 4)); 566 path = __path_find(dev, phdr->hwaddr + 4);
570 if (!path) { 567 if (!path) {
571 path = path_rec_create(dev, 568 path = path_rec_create(dev, phdr->hwaddr + 4);
572 (union ib_gid *) (phdr->hwaddr + 4));
573 if (path) { 569 if (path) {
574 /* put pseudoheader back on for next time */ 570 /* put pseudoheader back on for next time */
575 skb_push(skb, sizeof *phdr); 571 skb_push(skb, sizeof *phdr);
@@ -660,7 +656,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
660 phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff; 656 phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff;
661 phdr->hwaddr[9] = priv->pkey & 0xff; 657 phdr->hwaddr[9] = priv->pkey & 0xff;
662 658
663 ipoib_mcast_send(dev, (union ib_gid *) (phdr->hwaddr + 4), skb); 659 ipoib_mcast_send(dev, phdr->hwaddr + 4, skb);
664 } else { 660 } else {
665 /* unicast GID -- should be ARP or RARP reply */ 661 /* unicast GID -- should be ARP or RARP reply */
666 662
@@ -671,7 +667,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
671 skb->dst ? "neigh" : "dst", 667 skb->dst ? "neigh" : "dst",
672 be16_to_cpup((__be16 *) skb->data), 668 be16_to_cpup((__be16 *) skb->data),
673 be32_to_cpup((__be32 *) phdr->hwaddr), 669 be32_to_cpup((__be32 *) phdr->hwaddr),
674 IPOIB_GID_ARG(*(union ib_gid *) (phdr->hwaddr + 4))); 670 IPOIB_GID_RAW_ARG(phdr->hwaddr + 4));
675 dev_kfree_skb_any(skb); 671 dev_kfree_skb_any(skb);
676 ++priv->stats.tx_dropped; 672 ++priv->stats.tx_dropped;
677 goto out; 673 goto out;
@@ -754,7 +750,7 @@ static void ipoib_neigh_destructor(struct neighbour *n)
754 ipoib_dbg(priv, 750 ipoib_dbg(priv,
755 "neigh_destructor for %06x " IPOIB_GID_FMT "\n", 751 "neigh_destructor for %06x " IPOIB_GID_FMT "\n",
756 be32_to_cpup((__be32 *) n->ha), 752 be32_to_cpup((__be32 *) n->ha),
757 IPOIB_GID_ARG(*((union ib_gid *) (n->ha + 4)))); 753 IPOIB_GID_RAW_ARG(n->ha + 4));
758 754
759 spin_lock_irqsave(&priv->lock, flags); 755 spin_lock_irqsave(&priv->lock, flags);
760 756
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 1d917edcf9ba..216471fa01cc 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -154,7 +154,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
154 return mcast; 154 return mcast;
155} 155}
156 156
157static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, union ib_gid *mgid) 157static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
158{ 158{
159 struct ipoib_dev_priv *priv = netdev_priv(dev); 159 struct ipoib_dev_priv *priv = netdev_priv(dev);
160 struct rb_node *n = priv->multicast_tree.rb_node; 160 struct rb_node *n = priv->multicast_tree.rb_node;
@@ -165,7 +165,7 @@ static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, union ib_g
165 165
166 mcast = rb_entry(n, struct ipoib_mcast, rb_node); 166 mcast = rb_entry(n, struct ipoib_mcast, rb_node);
167 167
168 ret = memcmp(mgid->raw, mcast->mcmember.mgid.raw, 168 ret = memcmp(mgid, mcast->mcmember.mgid.raw,
169 sizeof (union ib_gid)); 169 sizeof (union ib_gid));
170 if (ret < 0) 170 if (ret < 0)
171 n = n->rb_left; 171 n = n->rb_left;
@@ -694,8 +694,7 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
694 return 0; 694 return 0;
695} 695}
696 696
697void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid, 697void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
698 struct sk_buff *skb)
699{ 698{
700 struct ipoib_dev_priv *priv = netdev_priv(dev); 699 struct ipoib_dev_priv *priv = netdev_priv(dev);
701 struct ipoib_mcast *mcast; 700 struct ipoib_mcast *mcast;
@@ -718,7 +717,7 @@ void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid,
718 if (!mcast) { 717 if (!mcast) {
719 /* Let's create a new send only group now */ 718 /* Let's create a new send only group now */
720 ipoib_dbg_mcast(priv, "setting up send only multicast group for " 719 ipoib_dbg_mcast(priv, "setting up send only multicast group for "
721 IPOIB_GID_FMT "\n", IPOIB_GID_ARG(*mgid)); 720 IPOIB_GID_FMT "\n", IPOIB_GID_RAW_ARG(mgid));
722 721
723 mcast = ipoib_mcast_alloc(dev, 0); 722 mcast = ipoib_mcast_alloc(dev, 0);
724 if (!mcast) { 723 if (!mcast) {
@@ -730,7 +729,7 @@ void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid,
730 } 729 }
731 730
732 set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); 731 set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
733 mcast->mcmember.mgid = *mgid; 732 memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
734 __ipoib_mcast_add(dev, mcast); 733 __ipoib_mcast_add(dev, mcast);
735 list_add_tail(&mcast->list, &priv->multicast_list); 734 list_add_tail(&mcast->list, &priv->multicast_list);
736 } 735 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 1d49d1643c59..7b717c648f72 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -255,7 +255,8 @@ void ipoib_event(struct ib_event_handler *handler,
255 record->event == IB_EVENT_PKEY_CHANGE || 255 record->event == IB_EVENT_PKEY_CHANGE ||
256 record->event == IB_EVENT_PORT_ACTIVE || 256 record->event == IB_EVENT_PORT_ACTIVE ||
257 record->event == IB_EVENT_LID_CHANGE || 257 record->event == IB_EVENT_LID_CHANGE ||
258 record->event == IB_EVENT_SM_CHANGE) { 258 record->event == IB_EVENT_SM_CHANGE ||
259 record->event == IB_EVENT_CLIENT_REREGISTER) {
259 ipoib_dbg(priv, "Port state change event\n"); 260 ipoib_dbg(priv, "Port state change event\n");
260 queue_work(ipoib_workqueue, &priv->flush_task); 261 queue_work(ipoib_workqueue, &priv->flush_task);
261 } 262 }
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 9cbdffa08dc2..4e22afef7206 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -62,6 +62,13 @@ MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator "
62 "v" DRV_VERSION " (" DRV_RELDATE ")"); 62 "v" DRV_VERSION " (" DRV_RELDATE ")");
63MODULE_LICENSE("Dual BSD/GPL"); 63MODULE_LICENSE("Dual BSD/GPL");
64 64
65static int srp_sg_tablesize = SRP_DEF_SG_TABLESIZE;
66static int srp_max_iu_len;
67
68module_param(srp_sg_tablesize, int, 0444);
69MODULE_PARM_DESC(srp_sg_tablesize,
70 "Max number of gather/scatter entries per I/O (default is 12)");
71
65static int topspin_workarounds = 1; 72static int topspin_workarounds = 1;
66 73
67module_param(topspin_workarounds, int, 0444); 74module_param(topspin_workarounds, int, 0444);
@@ -105,7 +112,8 @@ static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
105 if (!iu->buf) 112 if (!iu->buf)
106 goto out_free_iu; 113 goto out_free_iu;
107 114
108 iu->dma = dma_map_single(host->dev->dma_device, iu->buf, size, direction); 115 iu->dma = dma_map_single(host->dev->dev->dma_device,
116 iu->buf, size, direction);
109 if (dma_mapping_error(iu->dma)) 117 if (dma_mapping_error(iu->dma))
110 goto out_free_buf; 118 goto out_free_buf;
111 119
@@ -127,7 +135,8 @@ static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
127 if (!iu) 135 if (!iu)
128 return; 136 return;
129 137
130 dma_unmap_single(host->dev->dma_device, iu->dma, iu->size, iu->direction); 138 dma_unmap_single(host->dev->dev->dma_device,
139 iu->dma, iu->size, iu->direction);
131 kfree(iu->buf); 140 kfree(iu->buf);
132 kfree(iu); 141 kfree(iu);
133} 142}
@@ -147,7 +156,7 @@ static int srp_init_qp(struct srp_target_port *target,
147 if (!attr) 156 if (!attr)
148 return -ENOMEM; 157 return -ENOMEM;
149 158
150 ret = ib_find_cached_pkey(target->srp_host->dev, 159 ret = ib_find_cached_pkey(target->srp_host->dev->dev,
151 target->srp_host->port, 160 target->srp_host->port,
152 be16_to_cpu(target->path.pkey), 161 be16_to_cpu(target->path.pkey),
153 &attr->pkey_index); 162 &attr->pkey_index);
@@ -179,7 +188,7 @@ static int srp_create_target_ib(struct srp_target_port *target)
179 if (!init_attr) 188 if (!init_attr)
180 return -ENOMEM; 189 return -ENOMEM;
181 190
182 target->cq = ib_create_cq(target->srp_host->dev, srp_completion, 191 target->cq = ib_create_cq(target->srp_host->dev->dev, srp_completion,
183 NULL, target, SRP_CQ_SIZE); 192 NULL, target, SRP_CQ_SIZE);
184 if (IS_ERR(target->cq)) { 193 if (IS_ERR(target->cq)) {
185 ret = PTR_ERR(target->cq); 194 ret = PTR_ERR(target->cq);
@@ -198,7 +207,7 @@ static int srp_create_target_ib(struct srp_target_port *target)
198 init_attr->send_cq = target->cq; 207 init_attr->send_cq = target->cq;
199 init_attr->recv_cq = target->cq; 208 init_attr->recv_cq = target->cq;
200 209
201 target->qp = ib_create_qp(target->srp_host->pd, init_attr); 210 target->qp = ib_create_qp(target->srp_host->dev->pd, init_attr);
202 if (IS_ERR(target->qp)) { 211 if (IS_ERR(target->qp)) {
203 ret = PTR_ERR(target->qp); 212 ret = PTR_ERR(target->qp);
204 ib_destroy_cq(target->cq); 213 ib_destroy_cq(target->cq);
@@ -250,7 +259,7 @@ static int srp_lookup_path(struct srp_target_port *target)
250 259
251 init_completion(&target->done); 260 init_completion(&target->done);
252 261
253 target->path_query_id = ib_sa_path_rec_get(target->srp_host->dev, 262 target->path_query_id = ib_sa_path_rec_get(target->srp_host->dev->dev,
254 target->srp_host->port, 263 target->srp_host->port,
255 &target->path, 264 &target->path,
256 IB_SA_PATH_REC_DGID | 265 IB_SA_PATH_REC_DGID |
@@ -309,10 +318,32 @@ static int srp_send_req(struct srp_target_port *target)
309 318
310 req->priv.opcode = SRP_LOGIN_REQ; 319 req->priv.opcode = SRP_LOGIN_REQ;
311 req->priv.tag = 0; 320 req->priv.tag = 0;
312 req->priv.req_it_iu_len = cpu_to_be32(SRP_MAX_IU_LEN); 321 req->priv.req_it_iu_len = cpu_to_be32(srp_max_iu_len);
313 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | 322 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
314 SRP_BUF_FORMAT_INDIRECT); 323 SRP_BUF_FORMAT_INDIRECT);
315 memcpy(req->priv.initiator_port_id, target->srp_host->initiator_port_id, 16); 324 /*
325 * In the published SRP specification (draft rev. 16a), the
326 * port identifier format is 8 bytes of ID extension followed
327 * by 8 bytes of GUID. Older drafts put the two halves in the
328 * opposite order, so that the GUID comes first.
329 *
330 * Targets conforming to these obsolete drafts can be
331 * recognized by the I/O Class they report.
332 */
333 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
334 memcpy(req->priv.initiator_port_id,
335 target->srp_host->initiator_port_id + 8, 8);
336 memcpy(req->priv.initiator_port_id + 8,
337 target->srp_host->initiator_port_id, 8);
338 memcpy(req->priv.target_port_id, &target->ioc_guid, 8);
339 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
340 } else {
341 memcpy(req->priv.initiator_port_id,
342 target->srp_host->initiator_port_id, 16);
343 memcpy(req->priv.target_port_id, &target->id_ext, 8);
344 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
345 }
346
316 /* 347 /*
317 * Topspin/Cisco SRP targets will reject our login unless we 348 * Topspin/Cisco SRP targets will reject our login unless we
318 * zero out the first 8 bytes of our initiator port ID. The 349 * zero out the first 8 bytes of our initiator port ID. The
@@ -325,8 +356,6 @@ static int srp_send_req(struct srp_target_port *target)
325 (unsigned long long) be64_to_cpu(target->ioc_guid)); 356 (unsigned long long) be64_to_cpu(target->ioc_guid));
326 memset(req->priv.initiator_port_id, 0, 8); 357 memset(req->priv.initiator_port_id, 0, 8);
327 } 358 }
328 memcpy(req->priv.target_port_id, &target->id_ext, 8);
329 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
330 359
331 status = ib_send_cm_req(target->cm_id, &req->param); 360 status = ib_send_cm_req(target->cm_id, &req->param);
332 361
@@ -359,9 +388,9 @@ static void srp_remove_work(void *target_ptr)
359 target->state = SRP_TARGET_REMOVED; 388 target->state = SRP_TARGET_REMOVED;
360 spin_unlock_irq(target->scsi_host->host_lock); 389 spin_unlock_irq(target->scsi_host->host_lock);
361 390
362 mutex_lock(&target->srp_host->target_mutex); 391 spin_lock(&target->srp_host->target_lock);
363 list_del(&target->list); 392 list_del(&target->list);
364 mutex_unlock(&target->srp_host->target_mutex); 393 spin_unlock(&target->srp_host->target_lock);
365 394
366 scsi_remove_host(target->scsi_host); 395 scsi_remove_host(target->scsi_host);
367 ib_destroy_cm_id(target->cm_id); 396 ib_destroy_cm_id(target->cm_id);
@@ -421,6 +450,11 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
421 scmnd->sc_data_direction != DMA_FROM_DEVICE)) 450 scmnd->sc_data_direction != DMA_FROM_DEVICE))
422 return; 451 return;
423 452
453 if (req->fmr) {
454 ib_fmr_pool_unmap(req->fmr);
455 req->fmr = NULL;
456 }
457
424 /* 458 /*
425 * This handling of non-SG commands can be killed when the 459 * This handling of non-SG commands can be killed when the
426 * SCSI midlayer no longer generates non-SG commands. 460 * SCSI midlayer no longer generates non-SG commands.
@@ -433,18 +467,30 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
433 scat = &req->fake_sg; 467 scat = &req->fake_sg;
434 } 468 }
435 469
436 dma_unmap_sg(target->srp_host->dev->dma_device, scat, nents, 470 dma_unmap_sg(target->srp_host->dev->dev->dma_device, scat, nents,
437 scmnd->sc_data_direction); 471 scmnd->sc_data_direction);
438} 472}
439 473
474static void srp_remove_req(struct srp_target_port *target, struct srp_request *req)
475{
476 srp_unmap_data(req->scmnd, target, req);
477 list_move_tail(&req->list, &target->free_reqs);
478}
479
480static void srp_reset_req(struct srp_target_port *target, struct srp_request *req)
481{
482 req->scmnd->result = DID_RESET << 16;
483 req->scmnd->scsi_done(req->scmnd);
484 srp_remove_req(target, req);
485}
486
440static int srp_reconnect_target(struct srp_target_port *target) 487static int srp_reconnect_target(struct srp_target_port *target)
441{ 488{
442 struct ib_cm_id *new_cm_id; 489 struct ib_cm_id *new_cm_id;
443 struct ib_qp_attr qp_attr; 490 struct ib_qp_attr qp_attr;
444 struct srp_request *req; 491 struct srp_request *req, *tmp;
445 struct ib_wc wc; 492 struct ib_wc wc;
446 int ret; 493 int ret;
447 int i;
448 494
449 spin_lock_irq(target->scsi_host->host_lock); 495 spin_lock_irq(target->scsi_host->host_lock);
450 if (target->state != SRP_TARGET_LIVE) { 496 if (target->state != SRP_TARGET_LIVE) {
@@ -459,7 +505,7 @@ static int srp_reconnect_target(struct srp_target_port *target)
459 * Now get a new local CM ID so that we avoid confusing the 505 * Now get a new local CM ID so that we avoid confusing the
460 * target in case things are really fouled up. 506 * target in case things are really fouled up.
461 */ 507 */
462 new_cm_id = ib_create_cm_id(target->srp_host->dev, 508 new_cm_id = ib_create_cm_id(target->srp_host->dev->dev,
463 srp_cm_handler, target); 509 srp_cm_handler, target);
464 if (IS_ERR(new_cm_id)) { 510 if (IS_ERR(new_cm_id)) {
465 ret = PTR_ERR(new_cm_id); 511 ret = PTR_ERR(new_cm_id);
@@ -480,19 +526,12 @@ static int srp_reconnect_target(struct srp_target_port *target)
480 while (ib_poll_cq(target->cq, 1, &wc) > 0) 526 while (ib_poll_cq(target->cq, 1, &wc) > 0)
481 ; /* nothing */ 527 ; /* nothing */
482 528
483 list_for_each_entry(req, &target->req_queue, list) { 529 list_for_each_entry_safe(req, tmp, &target->req_queue, list)
484 req->scmnd->result = DID_RESET << 16; 530 srp_reset_req(target, req);
485 req->scmnd->scsi_done(req->scmnd);
486 srp_unmap_data(req->scmnd, target, req);
487 }
488 531
489 target->rx_head = 0; 532 target->rx_head = 0;
490 target->tx_head = 0; 533 target->tx_head = 0;
491 target->tx_tail = 0; 534 target->tx_tail = 0;
492 INIT_LIST_HEAD(&target->free_reqs);
493 INIT_LIST_HEAD(&target->req_queue);
494 for (i = 0; i < SRP_SQ_SIZE; ++i)
495 list_add_tail(&target->req_ring[i].list, &target->free_reqs);
496 535
497 ret = srp_connect_target(target); 536 ret = srp_connect_target(target);
498 if (ret) 537 if (ret)
@@ -528,14 +567,79 @@ err:
528 return ret; 567 return ret;
529} 568}
530 569
570static int srp_map_fmr(struct srp_device *dev, struct scatterlist *scat,
571 int sg_cnt, struct srp_request *req,
572 struct srp_direct_buf *buf)
573{
574 u64 io_addr = 0;
575 u64 *dma_pages;
576 u32 len;
577 int page_cnt;
578 int i, j;
579 int ret;
580
581 if (!dev->fmr_pool)
582 return -ENODEV;
583
584 len = page_cnt = 0;
585 for (i = 0; i < sg_cnt; ++i) {
586 if (sg_dma_address(&scat[i]) & ~dev->fmr_page_mask) {
587 if (i > 0)
588 return -EINVAL;
589 else
590 ++page_cnt;
591 }
592 if ((sg_dma_address(&scat[i]) + sg_dma_len(&scat[i])) &
593 ~dev->fmr_page_mask) {
594 if (i < sg_cnt - 1)
595 return -EINVAL;
596 else
597 ++page_cnt;
598 }
599
600 len += sg_dma_len(&scat[i]);
601 }
602
603 page_cnt += len >> dev->fmr_page_shift;
604 if (page_cnt > SRP_FMR_SIZE)
605 return -ENOMEM;
606
607 dma_pages = kmalloc(sizeof (u64) * page_cnt, GFP_ATOMIC);
608 if (!dma_pages)
609 return -ENOMEM;
610
611 page_cnt = 0;
612 for (i = 0; i < sg_cnt; ++i)
613 for (j = 0; j < sg_dma_len(&scat[i]); j += dev->fmr_page_size)
614 dma_pages[page_cnt++] =
615 (sg_dma_address(&scat[i]) & dev->fmr_page_mask) + j;
616
617 req->fmr = ib_fmr_pool_map_phys(dev->fmr_pool,
618 dma_pages, page_cnt, &io_addr);
619 if (IS_ERR(req->fmr)) {
620 ret = PTR_ERR(req->fmr);
621 goto out;
622 }
623
624 buf->va = cpu_to_be64(sg_dma_address(&scat[0]) & ~dev->fmr_page_mask);
625 buf->key = cpu_to_be32(req->fmr->fmr->rkey);
626 buf->len = cpu_to_be32(len);
627
628 ret = 0;
629
630out:
631 kfree(dma_pages);
632
633 return ret;
634}
635
531static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, 636static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
532 struct srp_request *req) 637 struct srp_request *req)
533{ 638{
534 struct scatterlist *scat; 639 struct scatterlist *scat;
535 struct srp_cmd *cmd = req->cmd->buf; 640 struct srp_cmd *cmd = req->cmd->buf;
536 int len, nents, count; 641 int len, nents, count;
537 int i; 642 u8 fmt = SRP_DATA_DESC_DIRECT;
538 u8 fmt;
539 643
540 if (!scmnd->request_buffer || scmnd->sc_data_direction == DMA_NONE) 644 if (!scmnd->request_buffer || scmnd->sc_data_direction == DMA_NONE)
541 return sizeof (struct srp_cmd); 645 return sizeof (struct srp_cmd);
@@ -560,53 +664,63 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
560 sg_init_one(scat, scmnd->request_buffer, scmnd->request_bufflen); 664 sg_init_one(scat, scmnd->request_buffer, scmnd->request_bufflen);
561 } 665 }
562 666
563 count = dma_map_sg(target->srp_host->dev->dma_device, scat, nents, 667 count = dma_map_sg(target->srp_host->dev->dev->dma_device,
564 scmnd->sc_data_direction); 668 scat, nents, scmnd->sc_data_direction);
669
670 fmt = SRP_DATA_DESC_DIRECT;
671 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
565 672
566 if (count == 1) { 673 if (count == 1) {
674 /*
675 * The midlayer only generated a single gather/scatter
676 * entry, or DMA mapping coalesced everything to a
677 * single entry. So a direct descriptor along with
678 * the DMA MR suffices.
679 */
567 struct srp_direct_buf *buf = (void *) cmd->add_data; 680 struct srp_direct_buf *buf = (void *) cmd->add_data;
568 681
569 fmt = SRP_DATA_DESC_DIRECT;
570
571 buf->va = cpu_to_be64(sg_dma_address(scat)); 682 buf->va = cpu_to_be64(sg_dma_address(scat));
572 buf->key = cpu_to_be32(target->srp_host->mr->rkey); 683 buf->key = cpu_to_be32(target->srp_host->dev->mr->rkey);
573 buf->len = cpu_to_be32(sg_dma_len(scat)); 684 buf->len = cpu_to_be32(sg_dma_len(scat));
574 685 } else if (srp_map_fmr(target->srp_host->dev, scat, count, req,
575 len = sizeof (struct srp_cmd) + 686 (void *) cmd->add_data)) {
576 sizeof (struct srp_direct_buf); 687 /*
577 } else { 688 * FMR mapping failed, and the scatterlist has more
689 * than one entry. Generate an indirect memory
690 * descriptor.
691 */
578 struct srp_indirect_buf *buf = (void *) cmd->add_data; 692 struct srp_indirect_buf *buf = (void *) cmd->add_data;
579 u32 datalen = 0; 693 u32 datalen = 0;
694 int i;
580 695
581 fmt = SRP_DATA_DESC_INDIRECT; 696 fmt = SRP_DATA_DESC_INDIRECT;
697 len = sizeof (struct srp_cmd) +
698 sizeof (struct srp_indirect_buf) +
699 count * sizeof (struct srp_direct_buf);
700
701 for (i = 0; i < count; ++i) {
702 buf->desc_list[i].va =
703 cpu_to_be64(sg_dma_address(&scat[i]));
704 buf->desc_list[i].key =
705 cpu_to_be32(target->srp_host->dev->mr->rkey);
706 buf->desc_list[i].len =
707 cpu_to_be32(sg_dma_len(&scat[i]));
708 datalen += sg_dma_len(&scat[i]);
709 }
582 710
583 if (scmnd->sc_data_direction == DMA_TO_DEVICE) 711 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
584 cmd->data_out_desc_cnt = count; 712 cmd->data_out_desc_cnt = count;
585 else 713 else
586 cmd->data_in_desc_cnt = count; 714 cmd->data_in_desc_cnt = count;
587 715
588 buf->table_desc.va = cpu_to_be64(req->cmd->dma + 716 buf->table_desc.va =
589 sizeof *cmd + 717 cpu_to_be64(req->cmd->dma + sizeof *cmd + sizeof *buf);
590 sizeof *buf);
591 buf->table_desc.key = 718 buf->table_desc.key =
592 cpu_to_be32(target->srp_host->mr->rkey); 719 cpu_to_be32(target->srp_host->dev->mr->rkey);
593 buf->table_desc.len = 720 buf->table_desc.len =
594 cpu_to_be32(count * sizeof (struct srp_direct_buf)); 721 cpu_to_be32(count * sizeof (struct srp_direct_buf));
595 722
596 for (i = 0; i < count; ++i) {
597 buf->desc_list[i].va = cpu_to_be64(sg_dma_address(&scat[i]));
598 buf->desc_list[i].key =
599 cpu_to_be32(target->srp_host->mr->rkey);
600 buf->desc_list[i].len = cpu_to_be32(sg_dma_len(&scat[i]));
601
602 datalen += sg_dma_len(&scat[i]);
603 }
604
605 buf->len = cpu_to_be32(datalen); 723 buf->len = cpu_to_be32(datalen);
606
607 len = sizeof (struct srp_cmd) +
608 sizeof (struct srp_indirect_buf) +
609 count * sizeof (struct srp_direct_buf);
610 } 724 }
611 725
612 if (scmnd->sc_data_direction == DMA_TO_DEVICE) 726 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
@@ -617,12 +731,6 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
617 return len; 731 return len;
618} 732}
619 733
620static void srp_remove_req(struct srp_target_port *target, struct srp_request *req)
621{
622 srp_unmap_data(req->scmnd, target, req);
623 list_move_tail(&req->list, &target->free_reqs);
624}
625
626static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) 734static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
627{ 735{
628 struct srp_request *req; 736 struct srp_request *req;
@@ -689,7 +797,7 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
689 797
690 iu = target->rx_ring[wc->wr_id & ~SRP_OP_RECV]; 798 iu = target->rx_ring[wc->wr_id & ~SRP_OP_RECV];
691 799
692 dma_sync_single_for_cpu(target->srp_host->dev->dma_device, iu->dma, 800 dma_sync_single_for_cpu(target->srp_host->dev->dev->dma_device, iu->dma,
693 target->max_ti_iu_len, DMA_FROM_DEVICE); 801 target->max_ti_iu_len, DMA_FROM_DEVICE);
694 802
695 opcode = *(u8 *) iu->buf; 803 opcode = *(u8 *) iu->buf;
@@ -726,7 +834,7 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
726 break; 834 break;
727 } 835 }
728 836
729 dma_sync_single_for_device(target->srp_host->dev->dma_device, iu->dma, 837 dma_sync_single_for_device(target->srp_host->dev->dev->dma_device, iu->dma,
730 target->max_ti_iu_len, DMA_FROM_DEVICE); 838 target->max_ti_iu_len, DMA_FROM_DEVICE);
731} 839}
732 840
@@ -770,7 +878,7 @@ static int __srp_post_recv(struct srp_target_port *target)
770 878
771 list.addr = iu->dma; 879 list.addr = iu->dma;
772 list.length = iu->size; 880 list.length = iu->size;
773 list.lkey = target->srp_host->mr->lkey; 881 list.lkey = target->srp_host->dev->mr->lkey;
774 882
775 wr.next = NULL; 883 wr.next = NULL;
776 wr.sg_list = &list; 884 wr.sg_list = &list;
@@ -805,12 +913,8 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target)
805 if (target->tx_head - target->tx_tail >= SRP_SQ_SIZE) 913 if (target->tx_head - target->tx_tail >= SRP_SQ_SIZE)
806 return NULL; 914 return NULL;
807 915
808 if (unlikely(target->req_lim < 1)) { 916 if (unlikely(target->req_lim < 1))
809 if (printk_ratelimit()) 917 ++target->zero_req_lim;
810 printk(KERN_DEBUG PFX "Target has req_lim %d\n",
811 target->req_lim);
812 return NULL;
813 }
814 918
815 return target->tx_ring[target->tx_head & SRP_SQ_SIZE]; 919 return target->tx_ring[target->tx_head & SRP_SQ_SIZE];
816} 920}
@@ -828,7 +932,7 @@ static int __srp_post_send(struct srp_target_port *target,
828 932
829 list.addr = iu->dma; 933 list.addr = iu->dma;
830 list.length = len; 934 list.length = len;
831 list.lkey = target->srp_host->mr->lkey; 935 list.lkey = target->srp_host->dev->mr->lkey;
832 936
833 wr.next = NULL; 937 wr.next = NULL;
834 wr.wr_id = target->tx_head & SRP_SQ_SIZE; 938 wr.wr_id = target->tx_head & SRP_SQ_SIZE;
@@ -870,8 +974,8 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd,
870 if (!iu) 974 if (!iu)
871 goto err; 975 goto err;
872 976
873 dma_sync_single_for_cpu(target->srp_host->dev->dma_device, iu->dma, 977 dma_sync_single_for_cpu(target->srp_host->dev->dev->dma_device, iu->dma,
874 SRP_MAX_IU_LEN, DMA_TO_DEVICE); 978 srp_max_iu_len, DMA_TO_DEVICE);
875 979
876 req = list_entry(target->free_reqs.next, struct srp_request, list); 980 req = list_entry(target->free_reqs.next, struct srp_request, list);
877 981
@@ -903,8 +1007,8 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd,
903 goto err_unmap; 1007 goto err_unmap;
904 } 1008 }
905 1009
906 dma_sync_single_for_device(target->srp_host->dev->dma_device, iu->dma, 1010 dma_sync_single_for_device(target->srp_host->dev->dev->dma_device, iu->dma,
907 SRP_MAX_IU_LEN, DMA_TO_DEVICE); 1011 srp_max_iu_len, DMA_TO_DEVICE);
908 1012
909 if (__srp_post_send(target, iu, len)) { 1013 if (__srp_post_send(target, iu, len)) {
910 printk(KERN_ERR PFX "Send failed\n"); 1014 printk(KERN_ERR PFX "Send failed\n");
@@ -936,7 +1040,7 @@ static int srp_alloc_iu_bufs(struct srp_target_port *target)
936 1040
937 for (i = 0; i < SRP_SQ_SIZE + 1; ++i) { 1041 for (i = 0; i < SRP_SQ_SIZE + 1; ++i) {
938 target->tx_ring[i] = srp_alloc_iu(target->srp_host, 1042 target->tx_ring[i] = srp_alloc_iu(target->srp_host,
939 SRP_MAX_IU_LEN, 1043 srp_max_iu_len,
940 GFP_KERNEL, DMA_TO_DEVICE); 1044 GFP_KERNEL, DMA_TO_DEVICE);
941 if (!target->tx_ring[i]) 1045 if (!target->tx_ring[i])
942 goto err; 1046 goto err;
@@ -1107,11 +1211,10 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1107 srp_cm_rej_handler(cm_id, event, target); 1211 srp_cm_rej_handler(cm_id, event, target);
1108 break; 1212 break;
1109 1213
1110 case IB_CM_MRA_RECEIVED: 1214 case IB_CM_DREQ_RECEIVED:
1111 printk(KERN_ERR PFX "MRA received\n"); 1215 printk(KERN_WARNING PFX "DREQ received - connection closed\n");
1112 break; 1216 if (ib_send_cm_drep(cm_id, NULL, 0))
1113 1217 printk(KERN_ERR PFX "Sending CM DREP failed\n");
1114 case IB_CM_DREP_RECEIVED:
1115 break; 1218 break;
1116 1219
1117 case IB_CM_TIMEWAIT_EXIT: 1220 case IB_CM_TIMEWAIT_EXIT:
@@ -1121,6 +1224,11 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1121 target->status = 0; 1224 target->status = 0;
1122 break; 1225 break;
1123 1226
1227 case IB_CM_MRA_RECEIVED:
1228 case IB_CM_DREQ_ERROR:
1229 case IB_CM_DREP_RECEIVED:
1230 break;
1231
1124 default: 1232 default:
1125 printk(KERN_WARNING PFX "Unhandled CM event %d\n", event->event); 1233 printk(KERN_WARNING PFX "Unhandled CM event %d\n", event->event);
1126 break; 1234 break;
@@ -1239,11 +1347,8 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
1239 spin_lock_irq(target->scsi_host->host_lock); 1347 spin_lock_irq(target->scsi_host->host_lock);
1240 1348
1241 list_for_each_entry_safe(req, tmp, &target->req_queue, list) 1349 list_for_each_entry_safe(req, tmp, &target->req_queue, list)
1242 if (req->scmnd->device == scmnd->device) { 1350 if (req->scmnd->device == scmnd->device)
1243 req->scmnd->result = DID_RESET << 16; 1351 srp_reset_req(target, req);
1244 req->scmnd->scsi_done(req->scmnd);
1245 srp_remove_req(target, req);
1246 }
1247 1352
1248 spin_unlock_irq(target->scsi_host->host_lock); 1353 spin_unlock_irq(target->scsi_host->host_lock);
1249 1354
@@ -1329,11 +1434,23 @@ static ssize_t show_dgid(struct class_device *cdev, char *buf)
1329 be16_to_cpu(((__be16 *) target->path.dgid.raw)[7])); 1434 be16_to_cpu(((__be16 *) target->path.dgid.raw)[7]));
1330} 1435}
1331 1436
1437static ssize_t show_zero_req_lim(struct class_device *cdev, char *buf)
1438{
1439 struct srp_target_port *target = host_to_target(class_to_shost(cdev));
1440
1441 if (target->state == SRP_TARGET_DEAD ||
1442 target->state == SRP_TARGET_REMOVED)
1443 return -ENODEV;
1444
1445 return sprintf(buf, "%d\n", target->zero_req_lim);
1446}
1447
1332static CLASS_DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); 1448static CLASS_DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
1333static CLASS_DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); 1449static CLASS_DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
1334static CLASS_DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); 1450static CLASS_DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
1335static CLASS_DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); 1451static CLASS_DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
1336static CLASS_DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); 1452static CLASS_DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
1453static CLASS_DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
1337 1454
1338static struct class_device_attribute *srp_host_attrs[] = { 1455static struct class_device_attribute *srp_host_attrs[] = {
1339 &class_device_attr_id_ext, 1456 &class_device_attr_id_ext,
@@ -1341,6 +1458,7 @@ static struct class_device_attribute *srp_host_attrs[] = {
1341 &class_device_attr_service_id, 1458 &class_device_attr_service_id,
1342 &class_device_attr_pkey, 1459 &class_device_attr_pkey,
1343 &class_device_attr_dgid, 1460 &class_device_attr_dgid,
1461 &class_device_attr_zero_req_lim,
1344 NULL 1462 NULL
1345}; 1463};
1346 1464
@@ -1354,7 +1472,6 @@ static struct scsi_host_template srp_template = {
1354 .eh_host_reset_handler = srp_reset_host, 1472 .eh_host_reset_handler = srp_reset_host,
1355 .can_queue = SRP_SQ_SIZE, 1473 .can_queue = SRP_SQ_SIZE,
1356 .this_id = -1, 1474 .this_id = -1,
1357 .sg_tablesize = SRP_MAX_INDIRECT,
1358 .cmd_per_lun = SRP_SQ_SIZE, 1475 .cmd_per_lun = SRP_SQ_SIZE,
1359 .use_clustering = ENABLE_CLUSTERING, 1476 .use_clustering = ENABLE_CLUSTERING,
1360 .shost_attrs = srp_host_attrs 1477 .shost_attrs = srp_host_attrs
@@ -1365,18 +1482,17 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
1365 sprintf(target->target_name, "SRP.T10:%016llX", 1482 sprintf(target->target_name, "SRP.T10:%016llX",
1366 (unsigned long long) be64_to_cpu(target->id_ext)); 1483 (unsigned long long) be64_to_cpu(target->id_ext));
1367 1484
1368 if (scsi_add_host(target->scsi_host, host->dev->dma_device)) 1485 if (scsi_add_host(target->scsi_host, host->dev->dev->dma_device))
1369 return -ENODEV; 1486 return -ENODEV;
1370 1487
1371 mutex_lock(&host->target_mutex); 1488 spin_lock(&host->target_lock);
1372 list_add_tail(&target->list, &host->target_list); 1489 list_add_tail(&target->list, &host->target_list);
1373 mutex_unlock(&host->target_mutex); 1490 spin_unlock(&host->target_lock);
1374 1491
1375 target->state = SRP_TARGET_LIVE; 1492 target->state = SRP_TARGET_LIVE;
1376 1493
1377 /* XXX: are we supposed to have a definition of SCAN_WILD_CARD ?? */
1378 scsi_scan_target(&target->scsi_host->shost_gendev, 1494 scsi_scan_target(&target->scsi_host->shost_gendev,
1379 0, target->scsi_id, ~0, 0); 1495 0, target->scsi_id, SCAN_WILD_CARD, 0);
1380 1496
1381 return 0; 1497 return 0;
1382} 1498}
@@ -1410,6 +1526,8 @@ enum {
1410 SRP_OPT_PKEY = 1 << 3, 1526 SRP_OPT_PKEY = 1 << 3,
1411 SRP_OPT_SERVICE_ID = 1 << 4, 1527 SRP_OPT_SERVICE_ID = 1 << 4,
1412 SRP_OPT_MAX_SECT = 1 << 5, 1528 SRP_OPT_MAX_SECT = 1 << 5,
1529 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
1530 SRP_OPT_IO_CLASS = 1 << 7,
1413 SRP_OPT_ALL = (SRP_OPT_ID_EXT | 1531 SRP_OPT_ALL = (SRP_OPT_ID_EXT |
1414 SRP_OPT_IOC_GUID | 1532 SRP_OPT_IOC_GUID |
1415 SRP_OPT_DGID | 1533 SRP_OPT_DGID |
@@ -1418,13 +1536,15 @@ enum {
1418}; 1536};
1419 1537
1420static match_table_t srp_opt_tokens = { 1538static match_table_t srp_opt_tokens = {
1421 { SRP_OPT_ID_EXT, "id_ext=%s" }, 1539 { SRP_OPT_ID_EXT, "id_ext=%s" },
1422 { SRP_OPT_IOC_GUID, "ioc_guid=%s" }, 1540 { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
1423 { SRP_OPT_DGID, "dgid=%s" }, 1541 { SRP_OPT_DGID, "dgid=%s" },
1424 { SRP_OPT_PKEY, "pkey=%x" }, 1542 { SRP_OPT_PKEY, "pkey=%x" },
1425 { SRP_OPT_SERVICE_ID, "service_id=%s" }, 1543 { SRP_OPT_SERVICE_ID, "service_id=%s" },
1426 { SRP_OPT_MAX_SECT, "max_sect=%d" }, 1544 { SRP_OPT_MAX_SECT, "max_sect=%d" },
1427 { SRP_OPT_ERR, NULL } 1545 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
1546 { SRP_OPT_IO_CLASS, "io_class=%x" },
1547 { SRP_OPT_ERR, NULL }
1428}; 1548};
1429 1549
1430static int srp_parse_options(const char *buf, struct srp_target_port *target) 1550static int srp_parse_options(const char *buf, struct srp_target_port *target)
@@ -1500,6 +1620,29 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
1500 target->scsi_host->max_sectors = token; 1620 target->scsi_host->max_sectors = token;
1501 break; 1621 break;
1502 1622
1623 case SRP_OPT_MAX_CMD_PER_LUN:
1624 if (match_int(args, &token)) {
1625 printk(KERN_WARNING PFX "bad max cmd_per_lun parameter '%s'\n", p);
1626 goto out;
1627 }
1628 target->scsi_host->cmd_per_lun = min(token, SRP_SQ_SIZE);
1629 break;
1630
1631 case SRP_OPT_IO_CLASS:
1632 if (match_hex(args, &token)) {
1633 printk(KERN_WARNING PFX "bad IO class parameter '%s' \n", p);
1634 goto out;
1635 }
1636 if (token != SRP_REV10_IB_IO_CLASS &&
1637 token != SRP_REV16A_IB_IO_CLASS) {
1638 printk(KERN_WARNING PFX "unknown IO class parameter value"
1639 " %x specified (use %x or %x).\n",
1640 token, SRP_REV10_IB_IO_CLASS, SRP_REV16A_IB_IO_CLASS);
1641 goto out;
1642 }
1643 target->io_class = token;
1644 break;
1645
1503 default: 1646 default:
1504 printk(KERN_WARNING PFX "unknown parameter or missing value " 1647 printk(KERN_WARNING PFX "unknown parameter or missing value "
1505 "'%s' in target creation request\n", p); 1648 "'%s' in target creation request\n", p);
@@ -1542,6 +1685,7 @@ static ssize_t srp_create_target(struct class_device *class_dev,
1542 target = host_to_target(target_host); 1685 target = host_to_target(target_host);
1543 memset(target, 0, sizeof *target); 1686 memset(target, 0, sizeof *target);
1544 1687
1688 target->io_class = SRP_REV16A_IB_IO_CLASS;
1545 target->scsi_host = target_host; 1689 target->scsi_host = target_host;
1546 target->srp_host = host; 1690 target->srp_host = host;
1547 1691
@@ -1558,7 +1702,7 @@ static ssize_t srp_create_target(struct class_device *class_dev,
1558 if (ret) 1702 if (ret)
1559 goto err; 1703 goto err;
1560 1704
1561 ib_get_cached_gid(host->dev, host->port, 0, &target->path.sgid); 1705 ib_get_cached_gid(host->dev->dev, host->port, 0, &target->path.sgid);
1562 1706
1563 printk(KERN_DEBUG PFX "new target: id_ext %016llx ioc_guid %016llx pkey %04x " 1707 printk(KERN_DEBUG PFX "new target: id_ext %016llx ioc_guid %016llx pkey %04x "
1564 "service_id %016llx dgid %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", 1708 "service_id %016llx dgid %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
@@ -1579,7 +1723,7 @@ static ssize_t srp_create_target(struct class_device *class_dev,
1579 if (ret) 1723 if (ret)
1580 goto err; 1724 goto err;
1581 1725
1582 target->cm_id = ib_create_cm_id(host->dev, srp_cm_handler, target); 1726 target->cm_id = ib_create_cm_id(host->dev->dev, srp_cm_handler, target);
1583 if (IS_ERR(target->cm_id)) { 1727 if (IS_ERR(target->cm_id)) {
1584 ret = PTR_ERR(target->cm_id); 1728 ret = PTR_ERR(target->cm_id);
1585 goto err_free; 1729 goto err_free;
@@ -1619,7 +1763,7 @@ static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
1619 struct srp_host *host = 1763 struct srp_host *host =
1620 container_of(class_dev, struct srp_host, class_dev); 1764 container_of(class_dev, struct srp_host, class_dev);
1621 1765
1622 return sprintf(buf, "%s\n", host->dev->name); 1766 return sprintf(buf, "%s\n", host->dev->dev->name);
1623} 1767}
1624 1768
1625static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); 1769static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
@@ -1634,7 +1778,7 @@ static ssize_t show_port(struct class_device *class_dev, char *buf)
1634 1778
1635static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL); 1779static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
1636 1780
1637static struct srp_host *srp_add_port(struct ib_device *device, u8 port) 1781static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
1638{ 1782{
1639 struct srp_host *host; 1783 struct srp_host *host;
1640 1784
@@ -1643,32 +1787,21 @@ static struct srp_host *srp_add_port(struct ib_device *device, u8 port)
1643 return NULL; 1787 return NULL;
1644 1788
1645 INIT_LIST_HEAD(&host->target_list); 1789 INIT_LIST_HEAD(&host->target_list);
1646 mutex_init(&host->target_mutex); 1790 spin_lock_init(&host->target_lock);
1647 init_completion(&host->released); 1791 init_completion(&host->released);
1648 host->dev = device; 1792 host->dev = device;
1649 host->port = port; 1793 host->port = port;
1650 1794
1651 host->initiator_port_id[7] = port; 1795 host->initiator_port_id[7] = port;
1652 memcpy(host->initiator_port_id + 8, &device->node_guid, 8); 1796 memcpy(host->initiator_port_id + 8, &device->dev->node_guid, 8);
1653
1654 host->pd = ib_alloc_pd(device);
1655 if (IS_ERR(host->pd))
1656 goto err_free;
1657
1658 host->mr = ib_get_dma_mr(host->pd,
1659 IB_ACCESS_LOCAL_WRITE |
1660 IB_ACCESS_REMOTE_READ |
1661 IB_ACCESS_REMOTE_WRITE);
1662 if (IS_ERR(host->mr))
1663 goto err_pd;
1664 1797
1665 host->class_dev.class = &srp_class; 1798 host->class_dev.class = &srp_class;
1666 host->class_dev.dev = device->dma_device; 1799 host->class_dev.dev = device->dev->dma_device;
1667 snprintf(host->class_dev.class_id, BUS_ID_SIZE, "srp-%s-%d", 1800 snprintf(host->class_dev.class_id, BUS_ID_SIZE, "srp-%s-%d",
1668 device->name, port); 1801 device->dev->name, port);
1669 1802
1670 if (class_device_register(&host->class_dev)) 1803 if (class_device_register(&host->class_dev))
1671 goto err_mr; 1804 goto free_host;
1672 if (class_device_create_file(&host->class_dev, &class_device_attr_add_target)) 1805 if (class_device_create_file(&host->class_dev, &class_device_attr_add_target))
1673 goto err_class; 1806 goto err_class;
1674 if (class_device_create_file(&host->class_dev, &class_device_attr_ibdev)) 1807 if (class_device_create_file(&host->class_dev, &class_device_attr_ibdev))
@@ -1681,13 +1814,7 @@ static struct srp_host *srp_add_port(struct ib_device *device, u8 port)
1681err_class: 1814err_class:
1682 class_device_unregister(&host->class_dev); 1815 class_device_unregister(&host->class_dev);
1683 1816
1684err_mr: 1817free_host:
1685 ib_dereg_mr(host->mr);
1686
1687err_pd:
1688 ib_dealloc_pd(host->pd);
1689
1690err_free:
1691 kfree(host); 1818 kfree(host);
1692 1819
1693 return NULL; 1820 return NULL;
@@ -1695,15 +1822,62 @@ err_free:
1695 1822
1696static void srp_add_one(struct ib_device *device) 1823static void srp_add_one(struct ib_device *device)
1697{ 1824{
1698 struct list_head *dev_list; 1825 struct srp_device *srp_dev;
1826 struct ib_device_attr *dev_attr;
1827 struct ib_fmr_pool_param fmr_param;
1699 struct srp_host *host; 1828 struct srp_host *host;
1700 int s, e, p; 1829 int s, e, p;
1701 1830
1702 dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); 1831 dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
1703 if (!dev_list) 1832 if (!dev_attr)
1704 return; 1833 return;
1705 1834
1706 INIT_LIST_HEAD(dev_list); 1835 if (ib_query_device(device, dev_attr)) {
1836 printk(KERN_WARNING PFX "Query device failed for %s\n",
1837 device->name);
1838 goto free_attr;
1839 }
1840
1841 srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
1842 if (!srp_dev)
1843 goto free_attr;
1844
1845 /*
1846 * Use the smallest page size supported by the HCA, down to a
1847 * minimum of 512 bytes (which is the smallest sector that a
1848 * SCSI command will ever carry).
1849 */
1850 srp_dev->fmr_page_shift = max(9, ffs(dev_attr->page_size_cap) - 1);
1851 srp_dev->fmr_page_size = 1 << srp_dev->fmr_page_shift;
1852 srp_dev->fmr_page_mask = ~((unsigned long) srp_dev->fmr_page_size - 1);
1853
1854 INIT_LIST_HEAD(&srp_dev->dev_list);
1855
1856 srp_dev->dev = device;
1857 srp_dev->pd = ib_alloc_pd(device);
1858 if (IS_ERR(srp_dev->pd))
1859 goto free_dev;
1860
1861 srp_dev->mr = ib_get_dma_mr(srp_dev->pd,
1862 IB_ACCESS_LOCAL_WRITE |
1863 IB_ACCESS_REMOTE_READ |
1864 IB_ACCESS_REMOTE_WRITE);
1865 if (IS_ERR(srp_dev->mr))
1866 goto err_pd;
1867
1868 memset(&fmr_param, 0, sizeof fmr_param);
1869 fmr_param.pool_size = SRP_FMR_POOL_SIZE;
1870 fmr_param.dirty_watermark = SRP_FMR_DIRTY_SIZE;
1871 fmr_param.cache = 1;
1872 fmr_param.max_pages_per_fmr = SRP_FMR_SIZE;
1873 fmr_param.page_shift = srp_dev->fmr_page_shift;
1874 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
1875 IB_ACCESS_REMOTE_WRITE |
1876 IB_ACCESS_REMOTE_READ);
1877
1878 srp_dev->fmr_pool = ib_create_fmr_pool(srp_dev->pd, &fmr_param);
1879 if (IS_ERR(srp_dev->fmr_pool))
1880 srp_dev->fmr_pool = NULL;
1707 1881
1708 if (device->node_type == IB_NODE_SWITCH) { 1882 if (device->node_type == IB_NODE_SWITCH) {
1709 s = 0; 1883 s = 0;
@@ -1714,25 +1888,35 @@ static void srp_add_one(struct ib_device *device)
1714 } 1888 }
1715 1889
1716 for (p = s; p <= e; ++p) { 1890 for (p = s; p <= e; ++p) {
1717 host = srp_add_port(device, p); 1891 host = srp_add_port(srp_dev, p);
1718 if (host) 1892 if (host)
1719 list_add_tail(&host->list, dev_list); 1893 list_add_tail(&host->list, &srp_dev->dev_list);
1720 } 1894 }
1721 1895
1722 ib_set_client_data(device, &srp_client, dev_list); 1896 ib_set_client_data(device, &srp_client, srp_dev);
1897
1898 goto free_attr;
1899
1900err_pd:
1901 ib_dealloc_pd(srp_dev->pd);
1902
1903free_dev:
1904 kfree(srp_dev);
1905
1906free_attr:
1907 kfree(dev_attr);
1723} 1908}
1724 1909
1725static void srp_remove_one(struct ib_device *device) 1910static void srp_remove_one(struct ib_device *device)
1726{ 1911{
1727 struct list_head *dev_list; 1912 struct srp_device *srp_dev;
1728 struct srp_host *host, *tmp_host; 1913 struct srp_host *host, *tmp_host;
1729 LIST_HEAD(target_list); 1914 LIST_HEAD(target_list);
1730 struct srp_target_port *target, *tmp_target; 1915 struct srp_target_port *target, *tmp_target;
1731 unsigned long flags;
1732 1916
1733 dev_list = ib_get_client_data(device, &srp_client); 1917 srp_dev = ib_get_client_data(device, &srp_client);
1734 1918
1735 list_for_each_entry_safe(host, tmp_host, dev_list, list) { 1919 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
1736 class_device_unregister(&host->class_dev); 1920 class_device_unregister(&host->class_dev);
1737 /* 1921 /*
1738 * Wait for the sysfs entry to go away, so that no new 1922 * Wait for the sysfs entry to go away, so that no new
@@ -1744,15 +1928,13 @@ static void srp_remove_one(struct ib_device *device)
1744 * Mark all target ports as removed, so we stop queueing 1928 * Mark all target ports as removed, so we stop queueing
1745 * commands and don't try to reconnect. 1929 * commands and don't try to reconnect.
1746 */ 1930 */
1747 mutex_lock(&host->target_mutex); 1931 spin_lock(&host->target_lock);
1748 list_for_each_entry_safe(target, tmp_target, 1932 list_for_each_entry(target, &host->target_list, list) {
1749 &host->target_list, list) { 1933 spin_lock_irq(target->scsi_host->host_lock);
1750 spin_lock_irqsave(target->scsi_host->host_lock, flags); 1934 target->state = SRP_TARGET_REMOVED;
1751 if (target->state != SRP_TARGET_REMOVED) 1935 spin_unlock_irq(target->scsi_host->host_lock);
1752 target->state = SRP_TARGET_REMOVED;
1753 spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
1754 } 1936 }
1755 mutex_unlock(&host->target_mutex); 1937 spin_unlock(&host->target_lock);
1756 1938
1757 /* 1939 /*
1758 * Wait for any reconnection tasks that may have 1940 * Wait for any reconnection tasks that may have
@@ -1770,18 +1952,26 @@ static void srp_remove_one(struct ib_device *device)
1770 scsi_host_put(target->scsi_host); 1952 scsi_host_put(target->scsi_host);
1771 } 1953 }
1772 1954
1773 ib_dereg_mr(host->mr);
1774 ib_dealloc_pd(host->pd);
1775 kfree(host); 1955 kfree(host);
1776 } 1956 }
1777 1957
1778 kfree(dev_list); 1958 if (srp_dev->fmr_pool)
1959 ib_destroy_fmr_pool(srp_dev->fmr_pool);
1960 ib_dereg_mr(srp_dev->mr);
1961 ib_dealloc_pd(srp_dev->pd);
1962
1963 kfree(srp_dev);
1779} 1964}
1780 1965
1781static int __init srp_init_module(void) 1966static int __init srp_init_module(void)
1782{ 1967{
1783 int ret; 1968 int ret;
1784 1969
1970 srp_template.sg_tablesize = srp_sg_tablesize;
1971 srp_max_iu_len = (sizeof (struct srp_cmd) +
1972 sizeof (struct srp_indirect_buf) +
1973 srp_sg_tablesize * 16);
1974
1785 ret = class_register(&srp_class); 1975 ret = class_register(&srp_class);
1786 if (ret) { 1976 if (ret) {
1787 printk(KERN_ERR PFX "couldn't register class infiniband_srp\n"); 1977 printk(KERN_ERR PFX "couldn't register class infiniband_srp\n");
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index c5cd43aae860..5b581fb8eb0d 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -46,6 +46,7 @@
46#include <rdma/ib_verbs.h> 46#include <rdma/ib_verbs.h>
47#include <rdma/ib_sa.h> 47#include <rdma/ib_sa.h>
48#include <rdma/ib_cm.h> 48#include <rdma/ib_cm.h>
49#include <rdma/ib_fmr_pool.h>
49 50
50enum { 51enum {
51 SRP_PATH_REC_TIMEOUT_MS = 1000, 52 SRP_PATH_REC_TIMEOUT_MS = 1000,
@@ -55,20 +56,21 @@ enum {
55 SRP_DLID_REDIRECT = 2, 56 SRP_DLID_REDIRECT = 2,
56 57
57 SRP_MAX_LUN = 512, 58 SRP_MAX_LUN = 512,
58 SRP_MAX_IU_LEN = 256, 59 SRP_DEF_SG_TABLESIZE = 12,
59 60
60 SRP_RQ_SHIFT = 6, 61 SRP_RQ_SHIFT = 6,
61 SRP_RQ_SIZE = 1 << SRP_RQ_SHIFT, 62 SRP_RQ_SIZE = 1 << SRP_RQ_SHIFT,
62 SRP_SQ_SIZE = SRP_RQ_SIZE - 1, 63 SRP_SQ_SIZE = SRP_RQ_SIZE - 1,
63 SRP_CQ_SIZE = SRP_SQ_SIZE + SRP_RQ_SIZE, 64 SRP_CQ_SIZE = SRP_SQ_SIZE + SRP_RQ_SIZE,
64 65
65 SRP_TAG_TSK_MGMT = 1 << (SRP_RQ_SHIFT + 1) 66 SRP_TAG_TSK_MGMT = 1 << (SRP_RQ_SHIFT + 1),
67
68 SRP_FMR_SIZE = 256,
69 SRP_FMR_POOL_SIZE = 1024,
70 SRP_FMR_DIRTY_SIZE = SRP_FMR_POOL_SIZE / 4
66}; 71};
67 72
68#define SRP_OP_RECV (1 << 31) 73#define SRP_OP_RECV (1 << 31)
69#define SRP_MAX_INDIRECT ((SRP_MAX_IU_LEN - \
70 sizeof (struct srp_cmd) - \
71 sizeof (struct srp_indirect_buf)) / 16)
72 74
73enum srp_target_state { 75enum srp_target_state {
74 SRP_TARGET_LIVE, 76 SRP_TARGET_LIVE,
@@ -77,15 +79,24 @@ enum srp_target_state {
77 SRP_TARGET_REMOVED 79 SRP_TARGET_REMOVED
78}; 80};
79 81
80struct srp_host { 82struct srp_device {
81 u8 initiator_port_id[16]; 83 struct list_head dev_list;
82 struct ib_device *dev; 84 struct ib_device *dev;
83 u8 port;
84 struct ib_pd *pd; 85 struct ib_pd *pd;
85 struct ib_mr *mr; 86 struct ib_mr *mr;
87 struct ib_fmr_pool *fmr_pool;
88 int fmr_page_shift;
89 int fmr_page_size;
90 unsigned long fmr_page_mask;
91};
92
93struct srp_host {
94 u8 initiator_port_id[16];
95 struct srp_device *dev;
96 u8 port;
86 struct class_device class_dev; 97 struct class_device class_dev;
87 struct list_head target_list; 98 struct list_head target_list;
88 struct mutex target_mutex; 99 spinlock_t target_lock;
89 struct completion released; 100 struct completion released;
90 struct list_head list; 101 struct list_head list;
91}; 102};
@@ -95,6 +106,7 @@ struct srp_request {
95 struct scsi_cmnd *scmnd; 106 struct scsi_cmnd *scmnd;
96 struct srp_iu *cmd; 107 struct srp_iu *cmd;
97 struct srp_iu *tsk_mgmt; 108 struct srp_iu *tsk_mgmt;
109 struct ib_pool_fmr *fmr;
98 /* 110 /*
99 * Fake scatterlist used when scmnd->use_sg==0. Can be killed 111 * Fake scatterlist used when scmnd->use_sg==0. Can be killed
100 * when the SCSI midlayer no longer generates non-SG commands. 112 * when the SCSI midlayer no longer generates non-SG commands.
@@ -110,6 +122,7 @@ struct srp_target_port {
110 __be64 id_ext; 122 __be64 id_ext;
111 __be64 ioc_guid; 123 __be64 ioc_guid;
112 __be64 service_id; 124 __be64 service_id;
125 u16 io_class;
113 struct srp_host *srp_host; 126 struct srp_host *srp_host;
114 struct Scsi_Host *scsi_host; 127 struct Scsi_Host *scsi_host;
115 char target_name[32]; 128 char target_name[32];
@@ -126,6 +139,8 @@ struct srp_target_port {
126 int max_ti_iu_len; 139 int max_ti_iu_len;
127 s32 req_lim; 140 s32 req_lim;
128 141
142 int zero_req_lim;
143
129 unsigned rx_head; 144 unsigned rx_head;
130 struct srp_iu *rx_ring[SRP_RQ_SIZE]; 145 struct srp_iu *rx_ring[SRP_RQ_SIZE];
131 146