aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/agent.c293
-rw-r--r--drivers/infiniband/core/agent.h13
-rw-r--r--drivers/infiniband/core/agent_priv.h62
-rw-r--r--drivers/infiniband/core/cm.c217
-rw-r--r--drivers/infiniband/core/cm_msgs.h1
-rw-r--r--drivers/infiniband/core/device.c12
-rw-r--r--drivers/infiniband/core/mad.c335
-rw-r--r--drivers/infiniband/core/mad_priv.h8
-rw-r--r--drivers/infiniband/core/mad_rmpp.c114
-rw-r--r--drivers/infiniband/core/mad_rmpp.h2
-rw-r--r--drivers/infiniband/core/sa_query.c270
-rw-r--r--drivers/infiniband/core/smi.h2
-rw-r--r--drivers/infiniband/core/sysfs.c16
-rw-r--r--drivers/infiniband/core/ucm.c267
-rw-r--r--drivers/infiniband/core/ucm.h83
-rw-r--r--drivers/infiniband/core/user_mad.c399
-rw-r--r--drivers/infiniband/core/uverbs.h62
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c858
-rw-r--r--drivers/infiniband/core/uverbs_main.c502
-rw-r--r--drivers/infiniband/core/verbs.c18
-rw-r--r--drivers/infiniband/hw/mthca/Makefile3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_catas.c153
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c11
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h22
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c21
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c72
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c11
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mcg.c11
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c49
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c16
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c43
-rw-r--r--drivers/infiniband/hw/mthca/mthca_user.h6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h23
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c120
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c15
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c9
38 files changed, 2447 insertions, 1678 deletions
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 5ac86f566dc0..0c3c6952faae 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -37,58 +37,41 @@
37 * $Id: agent.c 1389 2004-12-27 22:56:47Z roland $ 37 * $Id: agent.c 1389 2004-12-27 22:56:47Z roland $
38 */ 38 */
39 39
40#include <linux/dma-mapping.h> 40#include "agent.h"
41 41#include "smi.h"
42#include <asm/bug.h>
43 42
44#include <rdma/ib_smi.h> 43#define SPFX "ib_agent: "
45 44
46#include "smi.h" 45struct ib_agent_port_private {
47#include "agent_priv.h" 46 struct list_head port_list;
48#include "mad_priv.h" 47 struct ib_mad_agent *agent[2];
49#include "agent.h" 48};
50 49
51spinlock_t ib_agent_port_list_lock; 50static DEFINE_SPINLOCK(ib_agent_port_list_lock);
52static LIST_HEAD(ib_agent_port_list); 51static LIST_HEAD(ib_agent_port_list);
53 52
54/* 53static struct ib_agent_port_private *
55 * Caller must hold ib_agent_port_list_lock 54__ib_get_agent_port(struct ib_device *device, int port_num)
56 */
57static inline struct ib_agent_port_private *
58__ib_get_agent_port(struct ib_device *device, int port_num,
59 struct ib_mad_agent *mad_agent)
60{ 55{
61 struct ib_agent_port_private *entry; 56 struct ib_agent_port_private *entry;
62 57
63 BUG_ON(!(!!device ^ !!mad_agent)); /* Exactly one MUST be (!NULL) */ 58 list_for_each_entry(entry, &ib_agent_port_list, port_list) {
64 59 if (entry->agent[0]->device == device &&
65 if (device) { 60 entry->agent[0]->port_num == port_num)
66 list_for_each_entry(entry, &ib_agent_port_list, port_list) { 61 return entry;
67 if (entry->smp_agent->device == device &&
68 entry->port_num == port_num)
69 return entry;
70 }
71 } else {
72 list_for_each_entry(entry, &ib_agent_port_list, port_list) {
73 if ((entry->smp_agent == mad_agent) ||
74 (entry->perf_mgmt_agent == mad_agent))
75 return entry;
76 }
77 } 62 }
78 return NULL; 63 return NULL;
79} 64}
80 65
81static inline struct ib_agent_port_private * 66static struct ib_agent_port_private *
82ib_get_agent_port(struct ib_device *device, int port_num, 67ib_get_agent_port(struct ib_device *device, int port_num)
83 struct ib_mad_agent *mad_agent)
84{ 68{
85 struct ib_agent_port_private *entry; 69 struct ib_agent_port_private *entry;
86 unsigned long flags; 70 unsigned long flags;
87 71
88 spin_lock_irqsave(&ib_agent_port_list_lock, flags); 72 spin_lock_irqsave(&ib_agent_port_list_lock, flags);
89 entry = __ib_get_agent_port(device, port_num, mad_agent); 73 entry = __ib_get_agent_port(device, port_num);
90 spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); 74 spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
91
92 return entry; 75 return entry;
93} 76}
94 77
@@ -100,192 +83,76 @@ int smi_check_local_dr_smp(struct ib_smp *smp,
100 83
101 if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) 84 if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
102 return 1; 85 return 1;
103 port_priv = ib_get_agent_port(device, port_num, NULL); 86
87 port_priv = ib_get_agent_port(device, port_num);
104 if (!port_priv) { 88 if (!port_priv) {
105 printk(KERN_DEBUG SPFX "smi_check_local_dr_smp %s port %d " 89 printk(KERN_DEBUG SPFX "smi_check_local_dr_smp %s port %d "
106 "not open\n", 90 "not open\n", device->name, port_num);
107 device->name, port_num);
108 return 1; 91 return 1;
109 } 92 }
110 93
111 return smi_check_local_smp(port_priv->smp_agent, smp); 94 return smi_check_local_smp(port_priv->agent[0], smp);
112} 95}
113 96
114static int agent_mad_send(struct ib_mad_agent *mad_agent, 97int agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
115 struct ib_agent_port_private *port_priv, 98 struct ib_wc *wc, struct ib_device *device,
116 struct ib_mad_private *mad_priv, 99 int port_num, int qpn)
117 struct ib_grh *grh,
118 struct ib_wc *wc)
119{ 100{
120 struct ib_agent_send_wr *agent_send_wr; 101 struct ib_agent_port_private *port_priv;
121 struct ib_sge gather_list; 102 struct ib_mad_agent *agent;
122 struct ib_send_wr send_wr; 103 struct ib_mad_send_buf *send_buf;
123 struct ib_send_wr *bad_send_wr; 104 struct ib_ah *ah;
124 struct ib_ah_attr ah_attr; 105 int ret;
125 unsigned long flags;
126 int ret = 1;
127
128 agent_send_wr = kmalloc(sizeof(*agent_send_wr), GFP_KERNEL);
129 if (!agent_send_wr)
130 goto out;
131 agent_send_wr->mad = mad_priv;
132
133 gather_list.addr = dma_map_single(mad_agent->device->dma_device,
134 &mad_priv->mad,
135 sizeof(mad_priv->mad),
136 DMA_TO_DEVICE);
137 gather_list.length = sizeof(mad_priv->mad);
138 gather_list.lkey = mad_agent->mr->lkey;
139
140 send_wr.next = NULL;
141 send_wr.opcode = IB_WR_SEND;
142 send_wr.sg_list = &gather_list;
143 send_wr.num_sge = 1;
144 send_wr.wr.ud.remote_qpn = wc->src_qp; /* DQPN */
145 send_wr.wr.ud.timeout_ms = 0;
146 send_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
147 106
148 ah_attr.dlid = wc->slid; 107 port_priv = ib_get_agent_port(device, port_num);
149 ah_attr.port_num = mad_agent->port_num; 108 if (!port_priv) {
150 ah_attr.src_path_bits = wc->dlid_path_bits; 109 printk(KERN_ERR SPFX "Unable to find port agent\n");
151 ah_attr.sl = wc->sl; 110 return -ENODEV;
152 ah_attr.static_rate = 0;
153 ah_attr.ah_flags = 0; /* No GRH */
154 if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) {
155 if (wc->wc_flags & IB_WC_GRH) {
156 ah_attr.ah_flags = IB_AH_GRH;
157 /* Should sgid be looked up ? */
158 ah_attr.grh.sgid_index = 0;
159 ah_attr.grh.hop_limit = grh->hop_limit;
160 ah_attr.grh.flow_label = be32_to_cpu(
161 grh->version_tclass_flow) & 0xfffff;
162 ah_attr.grh.traffic_class = (be32_to_cpu(
163 grh->version_tclass_flow) >> 20) & 0xff;
164 memcpy(ah_attr.grh.dgid.raw,
165 grh->sgid.raw,
166 sizeof(ah_attr.grh.dgid));
167 }
168 } 111 }
169 112
170 agent_send_wr->ah = ib_create_ah(mad_agent->qp->pd, &ah_attr); 113 agent = port_priv->agent[qpn];
171 if (IS_ERR(agent_send_wr->ah)) { 114 ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num);
172 printk(KERN_ERR SPFX "No memory for address handle\n"); 115 if (IS_ERR(ah)) {
173 kfree(agent_send_wr); 116 ret = PTR_ERR(ah);
174 goto out; 117 printk(KERN_ERR SPFX "ib_create_ah_from_wc error:%d\n", ret);
118 return ret;
175 } 119 }
176 120
177 send_wr.wr.ud.ah = agent_send_wr->ah; 121 send_buf = ib_create_send_mad(agent, wc->src_qp, wc->pkey_index, 0,
178 if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) { 122 IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
179 send_wr.wr.ud.pkey_index = wc->pkey_index; 123 GFP_KERNEL);
180 send_wr.wr.ud.remote_qkey = IB_QP1_QKEY; 124 if (IS_ERR(send_buf)) {
181 } else { /* for SMPs */ 125 ret = PTR_ERR(send_buf);
182 send_wr.wr.ud.pkey_index = 0; 126 printk(KERN_ERR SPFX "ib_create_send_mad error:%d\n", ret);
183 send_wr.wr.ud.remote_qkey = 0; 127 goto err1;
184 } 128 }
185 send_wr.wr.ud.mad_hdr = &mad_priv->mad.mad.mad_hdr;
186 send_wr.wr_id = (unsigned long)agent_send_wr;
187 129
188 pci_unmap_addr_set(agent_send_wr, mapping, gather_list.addr); 130 memcpy(send_buf->mad, mad, sizeof *mad);
189 131 send_buf->ah = ah;
190 /* Send */ 132 if ((ret = ib_post_send_mad(send_buf, NULL))) {
191 spin_lock_irqsave(&port_priv->send_list_lock, flags); 133 printk(KERN_ERR SPFX "ib_post_send_mad error:%d\n", ret);
192 if (ib_post_send_mad(mad_agent, &send_wr, &bad_send_wr)) { 134 goto err2;
193 spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
194 dma_unmap_single(mad_agent->device->dma_device,
195 pci_unmap_addr(agent_send_wr, mapping),
196 sizeof(mad_priv->mad),
197 DMA_TO_DEVICE);
198 ib_destroy_ah(agent_send_wr->ah);
199 kfree(agent_send_wr);
200 } else {
201 list_add_tail(&agent_send_wr->send_list,
202 &port_priv->send_posted_list);
203 spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
204 ret = 0;
205 } 135 }
206 136 return 0;
207out: 137err2:
138 ib_free_send_mad(send_buf);
139err1:
140 ib_destroy_ah(ah);
208 return ret; 141 return ret;
209} 142}
210 143
211int agent_send(struct ib_mad_private *mad,
212 struct ib_grh *grh,
213 struct ib_wc *wc,
214 struct ib_device *device,
215 int port_num)
216{
217 struct ib_agent_port_private *port_priv;
218 struct ib_mad_agent *mad_agent;
219
220 port_priv = ib_get_agent_port(device, port_num, NULL);
221 if (!port_priv) {
222 printk(KERN_DEBUG SPFX "agent_send %s port %d not open\n",
223 device->name, port_num);
224 return 1;
225 }
226
227 /* Get mad agent based on mgmt_class in MAD */
228 switch (mad->mad.mad.mad_hdr.mgmt_class) {
229 case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
230 case IB_MGMT_CLASS_SUBN_LID_ROUTED:
231 mad_agent = port_priv->smp_agent;
232 break;
233 case IB_MGMT_CLASS_PERF_MGMT:
234 mad_agent = port_priv->perf_mgmt_agent;
235 break;
236 default:
237 return 1;
238 }
239
240 return agent_mad_send(mad_agent, port_priv, mad, grh, wc);
241}
242
243static void agent_send_handler(struct ib_mad_agent *mad_agent, 144static void agent_send_handler(struct ib_mad_agent *mad_agent,
244 struct ib_mad_send_wc *mad_send_wc) 145 struct ib_mad_send_wc *mad_send_wc)
245{ 146{
246 struct ib_agent_port_private *port_priv; 147 ib_destroy_ah(mad_send_wc->send_buf->ah);
247 struct ib_agent_send_wr *agent_send_wr; 148 ib_free_send_mad(mad_send_wc->send_buf);
248 unsigned long flags;
249
250 /* Find matching MAD agent */
251 port_priv = ib_get_agent_port(NULL, 0, mad_agent);
252 if (!port_priv) {
253 printk(KERN_ERR SPFX "agent_send_handler: no matching MAD "
254 "agent %p\n", mad_agent);
255 return;
256 }
257
258 agent_send_wr = (struct ib_agent_send_wr *)(unsigned long)mad_send_wc->wr_id;
259 spin_lock_irqsave(&port_priv->send_list_lock, flags);
260 /* Remove completed send from posted send MAD list */
261 list_del(&agent_send_wr->send_list);
262 spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
263
264 dma_unmap_single(mad_agent->device->dma_device,
265 pci_unmap_addr(agent_send_wr, mapping),
266 sizeof(agent_send_wr->mad->mad),
267 DMA_TO_DEVICE);
268
269 ib_destroy_ah(agent_send_wr->ah);
270
271 /* Release allocated memory */
272 kmem_cache_free(ib_mad_cache, agent_send_wr->mad);
273 kfree(agent_send_wr);
274} 149}
275 150
276int ib_agent_port_open(struct ib_device *device, int port_num) 151int ib_agent_port_open(struct ib_device *device, int port_num)
277{ 152{
278 int ret;
279 struct ib_agent_port_private *port_priv; 153 struct ib_agent_port_private *port_priv;
280 unsigned long flags; 154 unsigned long flags;
281 155 int ret;
282 /* First, check if port already open for SMI */
283 port_priv = ib_get_agent_port(device, port_num, NULL);
284 if (port_priv) {
285 printk(KERN_DEBUG SPFX "%s port %d already open\n",
286 device->name, port_num);
287 return 0;
288 }
289 156
290 /* Create new device info */ 157 /* Create new device info */
291 port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL); 158 port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL);
@@ -294,32 +161,25 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
294 ret = -ENOMEM; 161 ret = -ENOMEM;
295 goto error1; 162 goto error1;
296 } 163 }
297
298 memset(port_priv, 0, sizeof *port_priv); 164 memset(port_priv, 0, sizeof *port_priv);
299 port_priv->port_num = port_num;
300 spin_lock_init(&port_priv->send_list_lock);
301 INIT_LIST_HEAD(&port_priv->send_posted_list);
302 165
303 /* Obtain send only MAD agent for SM class (SMI QP) */ 166 /* Obtain send only MAD agent for SMI QP */
304 port_priv->smp_agent = ib_register_mad_agent(device, port_num, 167 port_priv->agent[0] = ib_register_mad_agent(device, port_num,
305 IB_QPT_SMI, 168 IB_QPT_SMI, NULL, 0,
306 NULL, 0,
307 &agent_send_handler, 169 &agent_send_handler,
308 NULL, NULL); 170 NULL, NULL);
309 171 if (IS_ERR(port_priv->agent[0])) {
310 if (IS_ERR(port_priv->smp_agent)) { 172 ret = PTR_ERR(port_priv->agent[0]);
311 ret = PTR_ERR(port_priv->smp_agent);
312 goto error2; 173 goto error2;
313 } 174 }
314 175
315 /* Obtain send only MAD agent for PerfMgmt class (GSI QP) */ 176 /* Obtain send only MAD agent for GSI QP */
316 port_priv->perf_mgmt_agent = ib_register_mad_agent(device, port_num, 177 port_priv->agent[1] = ib_register_mad_agent(device, port_num,
317 IB_QPT_GSI, 178 IB_QPT_GSI, NULL, 0,
318 NULL, 0, 179 &agent_send_handler,
319 &agent_send_handler, 180 NULL, NULL);
320 NULL, NULL); 181 if (IS_ERR(port_priv->agent[1])) {
321 if (IS_ERR(port_priv->perf_mgmt_agent)) { 182 ret = PTR_ERR(port_priv->agent[1]);
322 ret = PTR_ERR(port_priv->perf_mgmt_agent);
323 goto error3; 183 goto error3;
324 } 184 }
325 185
@@ -330,7 +190,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
330 return 0; 190 return 0;
331 191
332error3: 192error3:
333 ib_unregister_mad_agent(port_priv->smp_agent); 193 ib_unregister_mad_agent(port_priv->agent[0]);
334error2: 194error2:
335 kfree(port_priv); 195 kfree(port_priv);
336error1: 196error1:
@@ -343,7 +203,7 @@ int ib_agent_port_close(struct ib_device *device, int port_num)
343 unsigned long flags; 203 unsigned long flags;
344 204
345 spin_lock_irqsave(&ib_agent_port_list_lock, flags); 205 spin_lock_irqsave(&ib_agent_port_list_lock, flags);
346 port_priv = __ib_get_agent_port(device, port_num, NULL); 206 port_priv = __ib_get_agent_port(device, port_num);
347 if (port_priv == NULL) { 207 if (port_priv == NULL) {
348 spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); 208 spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
349 printk(KERN_ERR SPFX "Port %d not found\n", port_num); 209 printk(KERN_ERR SPFX "Port %d not found\n", port_num);
@@ -352,9 +212,8 @@ int ib_agent_port_close(struct ib_device *device, int port_num)
352 list_del(&port_priv->port_list); 212 list_del(&port_priv->port_list);
353 spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); 213 spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
354 214
355 ib_unregister_mad_agent(port_priv->perf_mgmt_agent); 215 ib_unregister_mad_agent(port_priv->agent[1]);
356 ib_unregister_mad_agent(port_priv->smp_agent); 216 ib_unregister_mad_agent(port_priv->agent[0]);
357 kfree(port_priv); 217 kfree(port_priv);
358
359 return 0; 218 return 0;
360} 219}
diff --git a/drivers/infiniband/core/agent.h b/drivers/infiniband/core/agent.h
index d9426842254a..c5f3cfec942a 100644
--- a/drivers/infiniband/core/agent.h
+++ b/drivers/infiniband/core/agent.h
@@ -39,17 +39,14 @@
39#ifndef __AGENT_H_ 39#ifndef __AGENT_H_
40#define __AGENT_H_ 40#define __AGENT_H_
41 41
42extern spinlock_t ib_agent_port_list_lock; 42#include <rdma/ib_mad.h>
43 43
44extern int ib_agent_port_open(struct ib_device *device, 44extern int ib_agent_port_open(struct ib_device *device, int port_num);
45 int port_num);
46 45
47extern int ib_agent_port_close(struct ib_device *device, int port_num); 46extern int ib_agent_port_close(struct ib_device *device, int port_num);
48 47
49extern int agent_send(struct ib_mad_private *mad, 48extern int agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
50 struct ib_grh *grh, 49 struct ib_wc *wc, struct ib_device *device,
51 struct ib_wc *wc, 50 int port_num, int qpn);
52 struct ib_device *device,
53 int port_num);
54 51
55#endif /* __AGENT_H_ */ 52#endif /* __AGENT_H_ */
diff --git a/drivers/infiniband/core/agent_priv.h b/drivers/infiniband/core/agent_priv.h
deleted file mode 100644
index 2ec6d7f1b7d0..000000000000
--- a/drivers/infiniband/core/agent_priv.h
+++ /dev/null
@@ -1,62 +0,0 @@
1/*
2 * Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved.
4 * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved.
5 * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved.
6 * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 *
36 * $Id: agent_priv.h 1640 2005-01-24 22:39:02Z halr $
37 */
38
39#ifndef __IB_AGENT_PRIV_H__
40#define __IB_AGENT_PRIV_H__
41
42#include <linux/pci.h>
43
44#define SPFX "ib_agent: "
45
46struct ib_agent_send_wr {
47 struct list_head send_list;
48 struct ib_ah *ah;
49 struct ib_mad_private *mad;
50 DECLARE_PCI_UNMAP_ADDR(mapping)
51};
52
53struct ib_agent_port_private {
54 struct list_head port_list;
55 struct list_head send_posted_list;
56 spinlock_t send_list_lock;
57 int port_num;
58 struct ib_mad_agent *smp_agent; /* SM class */
59 struct ib_mad_agent *perf_mgmt_agent; /* PerfMgmt class */
60};
61
62#endif /* __IB_AGENT_PRIV_H__ */
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 54db6d4831f1..580c3a2bb102 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -135,6 +135,7 @@ struct cm_id_private {
135 __be64 tid; 135 __be64 tid;
136 __be32 local_qpn; 136 __be32 local_qpn;
137 __be32 remote_qpn; 137 __be32 remote_qpn;
138 enum ib_qp_type qp_type;
138 __be32 sq_psn; 139 __be32 sq_psn;
139 __be32 rq_psn; 140 __be32 rq_psn;
140 int timeout_ms; 141 int timeout_ms;
@@ -175,8 +176,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
175 176
176 m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, 177 m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
177 cm_id_priv->av.pkey_index, 178 cm_id_priv->av.pkey_index,
178 ah, 0, sizeof(struct ib_mad_hdr), 179 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
179 sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr),
180 GFP_ATOMIC); 180 GFP_ATOMIC);
181 if (IS_ERR(m)) { 181 if (IS_ERR(m)) {
182 ib_destroy_ah(ah); 182 ib_destroy_ah(ah);
@@ -184,7 +184,8 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
184 } 184 }
185 185
186 /* Timeout set by caller if response is expected. */ 186 /* Timeout set by caller if response is expected. */
187 m->send_wr.wr.ud.retries = cm_id_priv->max_cm_retries; 187 m->ah = ah;
188 m->retries = cm_id_priv->max_cm_retries;
188 189
189 atomic_inc(&cm_id_priv->refcount); 190 atomic_inc(&cm_id_priv->refcount);
190 m->context[0] = cm_id_priv; 191 m->context[0] = cm_id_priv;
@@ -205,20 +206,20 @@ static int cm_alloc_response_msg(struct cm_port *port,
205 return PTR_ERR(ah); 206 return PTR_ERR(ah);
206 207
207 m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index, 208 m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
208 ah, 0, sizeof(struct ib_mad_hdr), 209 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
209 sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr),
210 GFP_ATOMIC); 210 GFP_ATOMIC);
211 if (IS_ERR(m)) { 211 if (IS_ERR(m)) {
212 ib_destroy_ah(ah); 212 ib_destroy_ah(ah);
213 return PTR_ERR(m); 213 return PTR_ERR(m);
214 } 214 }
215 m->ah = ah;
215 *msg = m; 216 *msg = m;
216 return 0; 217 return 0;
217} 218}
218 219
219static void cm_free_msg(struct ib_mad_send_buf *msg) 220static void cm_free_msg(struct ib_mad_send_buf *msg)
220{ 221{
221 ib_destroy_ah(msg->send_wr.wr.ud.ah); 222 ib_destroy_ah(msg->ah);
222 if (msg->context[0]) 223 if (msg->context[0])
223 cm_deref_id(msg->context[0]); 224 cm_deref_id(msg->context[0]);
224 ib_free_send_mad(msg); 225 ib_free_send_mad(msg);
@@ -366,9 +367,15 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
366 cur_cm_id_priv = rb_entry(parent, struct cm_id_private, 367 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
367 service_node); 368 service_node);
368 if ((cur_cm_id_priv->id.service_mask & service_id) == 369 if ((cur_cm_id_priv->id.service_mask & service_id) ==
369 (service_mask & cur_cm_id_priv->id.service_id)) 370 (service_mask & cur_cm_id_priv->id.service_id) &&
370 return cm_id_priv; 371 (cm_id_priv->id.device == cur_cm_id_priv->id.device))
371 if (service_id < cur_cm_id_priv->id.service_id) 372 return cur_cm_id_priv;
373
374 if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
375 link = &(*link)->rb_left;
376 else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
377 link = &(*link)->rb_right;
378 else if (service_id < cur_cm_id_priv->id.service_id)
372 link = &(*link)->rb_left; 379 link = &(*link)->rb_left;
373 else 380 else
374 link = &(*link)->rb_right; 381 link = &(*link)->rb_right;
@@ -378,7 +385,8 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
378 return NULL; 385 return NULL;
379} 386}
380 387
381static struct cm_id_private * cm_find_listen(__be64 service_id) 388static struct cm_id_private * cm_find_listen(struct ib_device *device,
389 __be64 service_id)
382{ 390{
383 struct rb_node *node = cm.listen_service_table.rb_node; 391 struct rb_node *node = cm.listen_service_table.rb_node;
384 struct cm_id_private *cm_id_priv; 392 struct cm_id_private *cm_id_priv;
@@ -386,9 +394,15 @@ static struct cm_id_private * cm_find_listen(__be64 service_id)
386 while (node) { 394 while (node) {
387 cm_id_priv = rb_entry(node, struct cm_id_private, service_node); 395 cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
388 if ((cm_id_priv->id.service_mask & service_id) == 396 if ((cm_id_priv->id.service_mask & service_id) ==
389 (cm_id_priv->id.service_mask & cm_id_priv->id.service_id)) 397 cm_id_priv->id.service_id &&
398 (cm_id_priv->id.device == device))
390 return cm_id_priv; 399 return cm_id_priv;
391 if (service_id < cm_id_priv->id.service_id) 400
401 if (device < cm_id_priv->id.device)
402 node = node->rb_left;
403 else if (device > cm_id_priv->id.device)
404 node = node->rb_right;
405 else if (service_id < cm_id_priv->id.service_id)
392 node = node->rb_left; 406 node = node->rb_left;
393 else 407 else
394 node = node->rb_right; 408 node = node->rb_right;
@@ -523,7 +537,8 @@ static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
523 ib_send_cm_sidr_rep(&cm_id_priv->id, &param); 537 ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
524} 538}
525 539
526struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler, 540struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
541 ib_cm_handler cm_handler,
527 void *context) 542 void *context)
528{ 543{
529 struct cm_id_private *cm_id_priv; 544 struct cm_id_private *cm_id_priv;
@@ -535,6 +550,7 @@ struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler,
535 550
536 memset(cm_id_priv, 0, sizeof *cm_id_priv); 551 memset(cm_id_priv, 0, sizeof *cm_id_priv);
537 cm_id_priv->id.state = IB_CM_IDLE; 552 cm_id_priv->id.state = IB_CM_IDLE;
553 cm_id_priv->id.device = device;
538 cm_id_priv->id.cm_handler = cm_handler; 554 cm_id_priv->id.cm_handler = cm_handler;
539 cm_id_priv->id.context = context; 555 cm_id_priv->id.context = context;
540 cm_id_priv->id.remote_cm_qpn = 1; 556 cm_id_priv->id.remote_cm_qpn = 1;
@@ -662,8 +678,7 @@ retest:
662 break; 678 break;
663 case IB_CM_SIDR_REQ_SENT: 679 case IB_CM_SIDR_REQ_SENT:
664 cm_id->state = IB_CM_IDLE; 680 cm_id->state = IB_CM_IDLE;
665 ib_cancel_mad(cm_id_priv->av.port->mad_agent, 681 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
666 (unsigned long) cm_id_priv->msg);
667 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 682 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
668 break; 683 break;
669 case IB_CM_SIDR_REQ_RCVD: 684 case IB_CM_SIDR_REQ_RCVD:
@@ -674,8 +689,7 @@ retest:
674 case IB_CM_MRA_REQ_RCVD: 689 case IB_CM_MRA_REQ_RCVD:
675 case IB_CM_REP_SENT: 690 case IB_CM_REP_SENT:
676 case IB_CM_MRA_REP_RCVD: 691 case IB_CM_MRA_REP_RCVD:
677 ib_cancel_mad(cm_id_priv->av.port->mad_agent, 692 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
678 (unsigned long) cm_id_priv->msg);
679 /* Fall through */ 693 /* Fall through */
680 case IB_CM_REQ_RCVD: 694 case IB_CM_REQ_RCVD:
681 case IB_CM_MRA_REQ_SENT: 695 case IB_CM_MRA_REQ_SENT:
@@ -692,8 +706,7 @@ retest:
692 ib_send_cm_dreq(cm_id, NULL, 0); 706 ib_send_cm_dreq(cm_id, NULL, 0);
693 goto retest; 707 goto retest;
694 case IB_CM_DREQ_SENT: 708 case IB_CM_DREQ_SENT:
695 ib_cancel_mad(cm_id_priv->av.port->mad_agent, 709 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
696 (unsigned long) cm_id_priv->msg);
697 cm_enter_timewait(cm_id_priv); 710 cm_enter_timewait(cm_id_priv);
698 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 711 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
699 break; 712 break;
@@ -867,7 +880,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
867 struct ib_cm_req_param *param) 880 struct ib_cm_req_param *param)
868{ 881{
869 struct cm_id_private *cm_id_priv; 882 struct cm_id_private *cm_id_priv;
870 struct ib_send_wr *bad_send_wr;
871 struct cm_req_msg *req_msg; 883 struct cm_req_msg *req_msg;
872 unsigned long flags; 884 unsigned long flags;
873 int ret; 885 int ret;
@@ -911,6 +923,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
911 cm_id_priv->responder_resources = param->responder_resources; 923 cm_id_priv->responder_resources = param->responder_resources;
912 cm_id_priv->retry_count = param->retry_count; 924 cm_id_priv->retry_count = param->retry_count;
913 cm_id_priv->path_mtu = param->primary_path->mtu; 925 cm_id_priv->path_mtu = param->primary_path->mtu;
926 cm_id_priv->qp_type = param->qp_type;
914 927
915 ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg); 928 ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
916 if (ret) 929 if (ret)
@@ -919,7 +932,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
919 req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad; 932 req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
920 cm_format_req(req_msg, cm_id_priv, param); 933 cm_format_req(req_msg, cm_id_priv, param);
921 cm_id_priv->tid = req_msg->hdr.tid; 934 cm_id_priv->tid = req_msg->hdr.tid;
922 cm_id_priv->msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; 935 cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
923 cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT; 936 cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
924 937
925 cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg); 938 cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
@@ -928,8 +941,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
928 cm_req_get_primary_local_ack_timeout(req_msg); 941 cm_req_get_primary_local_ack_timeout(req_msg);
929 942
930 spin_lock_irqsave(&cm_id_priv->lock, flags); 943 spin_lock_irqsave(&cm_id_priv->lock, flags);
931 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, 944 ret = ib_post_send_mad(cm_id_priv->msg, NULL);
932 &cm_id_priv->msg->send_wr, &bad_send_wr);
933 if (ret) { 945 if (ret) {
934 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 946 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
935 goto error2; 947 goto error2;
@@ -952,7 +964,6 @@ static int cm_issue_rej(struct cm_port *port,
952 void *ari, u8 ari_length) 964 void *ari, u8 ari_length)
953{ 965{
954 struct ib_mad_send_buf *msg = NULL; 966 struct ib_mad_send_buf *msg = NULL;
955 struct ib_send_wr *bad_send_wr;
956 struct cm_rej_msg *rej_msg, *rcv_msg; 967 struct cm_rej_msg *rej_msg, *rcv_msg;
957 int ret; 968 int ret;
958 969
@@ -975,7 +986,7 @@ static int cm_issue_rej(struct cm_port *port,
975 memcpy(rej_msg->ari, ari, ari_length); 986 memcpy(rej_msg->ari, ari, ari_length);
976 } 987 }
977 988
978 ret = ib_post_send_mad(port->mad_agent, &msg->send_wr, &bad_send_wr); 989 ret = ib_post_send_mad(msg, NULL);
979 if (ret) 990 if (ret)
980 cm_free_msg(msg); 991 cm_free_msg(msg);
981 992
@@ -1047,7 +1058,6 @@ static void cm_format_req_event(struct cm_work *work,
1047 req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; 1058 req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1048 param = &work->cm_event.param.req_rcvd; 1059 param = &work->cm_event.param.req_rcvd;
1049 param->listen_id = listen_id; 1060 param->listen_id = listen_id;
1050 param->device = cm_id_priv->av.port->mad_agent->device;
1051 param->port = cm_id_priv->av.port->port_num; 1061 param->port = cm_id_priv->av.port->port_num;
1052 param->primary_path = &work->path[0]; 1062 param->primary_path = &work->path[0];
1053 if (req_msg->alt_local_lid) 1063 if (req_msg->alt_local_lid)
@@ -1156,7 +1166,6 @@ static void cm_dup_req_handler(struct cm_work *work,
1156 struct cm_id_private *cm_id_priv) 1166 struct cm_id_private *cm_id_priv)
1157{ 1167{
1158 struct ib_mad_send_buf *msg = NULL; 1168 struct ib_mad_send_buf *msg = NULL;
1159 struct ib_send_wr *bad_send_wr;
1160 unsigned long flags; 1169 unsigned long flags;
1161 int ret; 1170 int ret;
1162 1171
@@ -1185,8 +1194,7 @@ static void cm_dup_req_handler(struct cm_work *work,
1185 } 1194 }
1186 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1195 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1187 1196
1188 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, 1197 ret = ib_post_send_mad(msg, NULL);
1189 &bad_send_wr);
1190 if (ret) 1198 if (ret)
1191 goto free; 1199 goto free;
1192 return; 1200 return;
@@ -1226,7 +1234,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
1226 } 1234 }
1227 1235
1228 /* Find matching listen request. */ 1236 /* Find matching listen request. */
1229 listen_cm_id_priv = cm_find_listen(req_msg->service_id); 1237 listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
1238 req_msg->service_id);
1230 if (!listen_cm_id_priv) { 1239 if (!listen_cm_id_priv) {
1231 spin_unlock_irqrestore(&cm.lock, flags); 1240 spin_unlock_irqrestore(&cm.lock, flags);
1232 cm_issue_rej(work->port, work->mad_recv_wc, 1241 cm_issue_rej(work->port, work->mad_recv_wc,
@@ -1254,7 +1263,7 @@ static int cm_req_handler(struct cm_work *work)
1254 1263
1255 req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; 1264 req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1256 1265
1257 cm_id = ib_create_cm_id(NULL, NULL); 1266 cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL);
1258 if (IS_ERR(cm_id)) 1267 if (IS_ERR(cm_id))
1259 return PTR_ERR(cm_id); 1268 return PTR_ERR(cm_id);
1260 1269
@@ -1305,6 +1314,7 @@ static int cm_req_handler(struct cm_work *work)
1305 cm_req_get_primary_local_ack_timeout(req_msg); 1314 cm_req_get_primary_local_ack_timeout(req_msg);
1306 cm_id_priv->retry_count = cm_req_get_retry_count(req_msg); 1315 cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
1307 cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); 1316 cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1317 cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
1308 1318
1309 cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id); 1319 cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
1310 cm_process_work(cm_id_priv, work); 1320 cm_process_work(cm_id_priv, work);
@@ -1349,7 +1359,6 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
1349 struct cm_id_private *cm_id_priv; 1359 struct cm_id_private *cm_id_priv;
1350 struct ib_mad_send_buf *msg; 1360 struct ib_mad_send_buf *msg;
1351 struct cm_rep_msg *rep_msg; 1361 struct cm_rep_msg *rep_msg;
1352 struct ib_send_wr *bad_send_wr;
1353 unsigned long flags; 1362 unsigned long flags;
1354 int ret; 1363 int ret;
1355 1364
@@ -1371,11 +1380,10 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
1371 1380
1372 rep_msg = (struct cm_rep_msg *) msg->mad; 1381 rep_msg = (struct cm_rep_msg *) msg->mad;
1373 cm_format_rep(rep_msg, cm_id_priv, param); 1382 cm_format_rep(rep_msg, cm_id_priv, param);
1374 msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; 1383 msg->timeout_ms = cm_id_priv->timeout_ms;
1375 msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT; 1384 msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
1376 1385
1377 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, 1386 ret = ib_post_send_mad(msg, NULL);
1378 &msg->send_wr, &bad_send_wr);
1379 if (ret) { 1387 if (ret) {
1380 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1388 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1381 cm_free_msg(msg); 1389 cm_free_msg(msg);
@@ -1413,7 +1421,6 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
1413{ 1421{
1414 struct cm_id_private *cm_id_priv; 1422 struct cm_id_private *cm_id_priv;
1415 struct ib_mad_send_buf *msg; 1423 struct ib_mad_send_buf *msg;
1416 struct ib_send_wr *bad_send_wr;
1417 unsigned long flags; 1424 unsigned long flags;
1418 void *data; 1425 void *data;
1419 int ret; 1426 int ret;
@@ -1440,8 +1447,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
1440 cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, 1447 cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1441 private_data, private_data_len); 1448 private_data, private_data_len);
1442 1449
1443 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, 1450 ret = ib_post_send_mad(msg, NULL);
1444 &msg->send_wr, &bad_send_wr);
1445 if (ret) { 1451 if (ret) {
1446 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1452 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1447 cm_free_msg(msg); 1453 cm_free_msg(msg);
@@ -1486,7 +1492,6 @@ static void cm_dup_rep_handler(struct cm_work *work)
1486 struct cm_id_private *cm_id_priv; 1492 struct cm_id_private *cm_id_priv;
1487 struct cm_rep_msg *rep_msg; 1493 struct cm_rep_msg *rep_msg;
1488 struct ib_mad_send_buf *msg = NULL; 1494 struct ib_mad_send_buf *msg = NULL;
1489 struct ib_send_wr *bad_send_wr;
1490 unsigned long flags; 1495 unsigned long flags;
1491 int ret; 1496 int ret;
1492 1497
@@ -1514,8 +1519,7 @@ static void cm_dup_rep_handler(struct cm_work *work)
1514 goto unlock; 1519 goto unlock;
1515 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1520 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1516 1521
1517 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, 1522 ret = ib_post_send_mad(msg, NULL);
1518 &bad_send_wr);
1519 if (ret) 1523 if (ret)
1520 goto free; 1524 goto free;
1521 goto deref; 1525 goto deref;
@@ -1583,8 +1587,7 @@ static int cm_rep_handler(struct cm_work *work)
1583 1587
1584 /* todo: handle peer_to_peer */ 1588 /* todo: handle peer_to_peer */
1585 1589
1586 ib_cancel_mad(cm_id_priv->av.port->mad_agent, 1590 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1587 (unsigned long) cm_id_priv->msg);
1588 ret = atomic_inc_and_test(&cm_id_priv->work_count); 1591 ret = atomic_inc_and_test(&cm_id_priv->work_count);
1589 if (!ret) 1592 if (!ret)
1590 list_add_tail(&work->list, &cm_id_priv->work_list); 1593 list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -1618,8 +1621,7 @@ static int cm_establish_handler(struct cm_work *work)
1618 goto out; 1621 goto out;
1619 } 1622 }
1620 1623
1621 ib_cancel_mad(cm_id_priv->av.port->mad_agent, 1624 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1622 (unsigned long) cm_id_priv->msg);
1623 ret = atomic_inc_and_test(&cm_id_priv->work_count); 1625 ret = atomic_inc_and_test(&cm_id_priv->work_count);
1624 if (!ret) 1626 if (!ret)
1625 list_add_tail(&work->list, &cm_id_priv->work_list); 1627 list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -1658,8 +1660,7 @@ static int cm_rtu_handler(struct cm_work *work)
1658 } 1660 }
1659 cm_id_priv->id.state = IB_CM_ESTABLISHED; 1661 cm_id_priv->id.state = IB_CM_ESTABLISHED;
1660 1662
1661 ib_cancel_mad(cm_id_priv->av.port->mad_agent, 1663 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1662 (unsigned long) cm_id_priv->msg);
1663 ret = atomic_inc_and_test(&cm_id_priv->work_count); 1664 ret = atomic_inc_and_test(&cm_id_priv->work_count);
1664 if (!ret) 1665 if (!ret)
1665 list_add_tail(&work->list, &cm_id_priv->work_list); 1666 list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -1696,7 +1697,6 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
1696{ 1697{
1697 struct cm_id_private *cm_id_priv; 1698 struct cm_id_private *cm_id_priv;
1698 struct ib_mad_send_buf *msg; 1699 struct ib_mad_send_buf *msg;
1699 struct ib_send_wr *bad_send_wr;
1700 unsigned long flags; 1700 unsigned long flags;
1701 int ret; 1701 int ret;
1702 1702
@@ -1718,11 +1718,10 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
1718 1718
1719 cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, 1719 cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
1720 private_data, private_data_len); 1720 private_data, private_data_len);
1721 msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; 1721 msg->timeout_ms = cm_id_priv->timeout_ms;
1722 msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT; 1722 msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
1723 1723
1724 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, 1724 ret = ib_post_send_mad(msg, NULL);
1725 &msg->send_wr, &bad_send_wr);
1726 if (ret) { 1725 if (ret) {
1727 cm_enter_timewait(cm_id_priv); 1726 cm_enter_timewait(cm_id_priv);
1728 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1727 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -1756,7 +1755,6 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id,
1756{ 1755{
1757 struct cm_id_private *cm_id_priv; 1756 struct cm_id_private *cm_id_priv;
1758 struct ib_mad_send_buf *msg; 1757 struct ib_mad_send_buf *msg;
1759 struct ib_send_wr *bad_send_wr;
1760 unsigned long flags; 1758 unsigned long flags;
1761 void *data; 1759 void *data;
1762 int ret; 1760 int ret;
@@ -1786,8 +1784,7 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id,
1786 cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, 1784 cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
1787 private_data, private_data_len); 1785 private_data, private_data_len);
1788 1786
1789 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, 1787 ret = ib_post_send_mad(msg, NULL);
1790 &bad_send_wr);
1791 if (ret) { 1788 if (ret) {
1792 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1789 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1793 cm_free_msg(msg); 1790 cm_free_msg(msg);
@@ -1804,7 +1801,6 @@ static int cm_dreq_handler(struct cm_work *work)
1804 struct cm_id_private *cm_id_priv; 1801 struct cm_id_private *cm_id_priv;
1805 struct cm_dreq_msg *dreq_msg; 1802 struct cm_dreq_msg *dreq_msg;
1806 struct ib_mad_send_buf *msg = NULL; 1803 struct ib_mad_send_buf *msg = NULL;
1807 struct ib_send_wr *bad_send_wr;
1808 unsigned long flags; 1804 unsigned long flags;
1809 int ret; 1805 int ret;
1810 1806
@@ -1823,8 +1819,7 @@ static int cm_dreq_handler(struct cm_work *work)
1823 switch (cm_id_priv->id.state) { 1819 switch (cm_id_priv->id.state) {
1824 case IB_CM_REP_SENT: 1820 case IB_CM_REP_SENT:
1825 case IB_CM_DREQ_SENT: 1821 case IB_CM_DREQ_SENT:
1826 ib_cancel_mad(cm_id_priv->av.port->mad_agent, 1822 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1827 (unsigned long) cm_id_priv->msg);
1828 break; 1823 break;
1829 case IB_CM_ESTABLISHED: 1824 case IB_CM_ESTABLISHED:
1830 case IB_CM_MRA_REP_RCVD: 1825 case IB_CM_MRA_REP_RCVD:
@@ -1838,8 +1833,7 @@ static int cm_dreq_handler(struct cm_work *work)
1838 cm_id_priv->private_data_len); 1833 cm_id_priv->private_data_len);
1839 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1834 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1840 1835
1841 if (ib_post_send_mad(cm_id_priv->av.port->mad_agent, 1836 if (ib_post_send_mad(msg, NULL))
1842 &msg->send_wr, &bad_send_wr))
1843 cm_free_msg(msg); 1837 cm_free_msg(msg);
1844 goto deref; 1838 goto deref;
1845 default: 1839 default:
@@ -1886,8 +1880,7 @@ static int cm_drep_handler(struct cm_work *work)
1886 } 1880 }
1887 cm_enter_timewait(cm_id_priv); 1881 cm_enter_timewait(cm_id_priv);
1888 1882
1889 ib_cancel_mad(cm_id_priv->av.port->mad_agent, 1883 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1890 (unsigned long) cm_id_priv->msg);
1891 ret = atomic_inc_and_test(&cm_id_priv->work_count); 1884 ret = atomic_inc_and_test(&cm_id_priv->work_count);
1892 if (!ret) 1885 if (!ret)
1893 list_add_tail(&work->list, &cm_id_priv->work_list); 1886 list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -1912,7 +1905,6 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
1912{ 1905{
1913 struct cm_id_private *cm_id_priv; 1906 struct cm_id_private *cm_id_priv;
1914 struct ib_mad_send_buf *msg; 1907 struct ib_mad_send_buf *msg;
1915 struct ib_send_wr *bad_send_wr;
1916 unsigned long flags; 1908 unsigned long flags;
1917 int ret; 1909 int ret;
1918 1910
@@ -1956,8 +1948,7 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
1956 if (ret) 1948 if (ret)
1957 goto out; 1949 goto out;
1958 1950
1959 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, 1951 ret = ib_post_send_mad(msg, NULL);
1960 &msg->send_wr, &bad_send_wr);
1961 if (ret) 1952 if (ret)
1962 cm_free_msg(msg); 1953 cm_free_msg(msg);
1963 1954
@@ -2033,8 +2024,7 @@ static int cm_rej_handler(struct cm_work *work)
2033 case IB_CM_MRA_REQ_RCVD: 2024 case IB_CM_MRA_REQ_RCVD:
2034 case IB_CM_REP_SENT: 2025 case IB_CM_REP_SENT:
2035 case IB_CM_MRA_REP_RCVD: 2026 case IB_CM_MRA_REP_RCVD:
2036 ib_cancel_mad(cm_id_priv->av.port->mad_agent, 2027 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2037 (unsigned long) cm_id_priv->msg);
2038 /* fall through */ 2028 /* fall through */
2039 case IB_CM_REQ_RCVD: 2029 case IB_CM_REQ_RCVD:
2040 case IB_CM_MRA_REQ_SENT: 2030 case IB_CM_MRA_REQ_SENT:
@@ -2044,8 +2034,7 @@ static int cm_rej_handler(struct cm_work *work)
2044 cm_reset_to_idle(cm_id_priv); 2034 cm_reset_to_idle(cm_id_priv);
2045 break; 2035 break;
2046 case IB_CM_DREQ_SENT: 2036 case IB_CM_DREQ_SENT:
2047 ib_cancel_mad(cm_id_priv->av.port->mad_agent, 2037 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2048 (unsigned long) cm_id_priv->msg);
2049 /* fall through */ 2038 /* fall through */
2050 case IB_CM_REP_RCVD: 2039 case IB_CM_REP_RCVD:
2051 case IB_CM_MRA_REP_SENT: 2040 case IB_CM_MRA_REP_SENT:
@@ -2080,7 +2069,6 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
2080{ 2069{
2081 struct cm_id_private *cm_id_priv; 2070 struct cm_id_private *cm_id_priv;
2082 struct ib_mad_send_buf *msg; 2071 struct ib_mad_send_buf *msg;
2083 struct ib_send_wr *bad_send_wr;
2084 void *data; 2072 void *data;
2085 unsigned long flags; 2073 unsigned long flags;
2086 int ret; 2074 int ret;
@@ -2104,8 +2092,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
2104 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, 2092 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2105 CM_MSG_RESPONSE_REQ, service_timeout, 2093 CM_MSG_RESPONSE_REQ, service_timeout,
2106 private_data, private_data_len); 2094 private_data, private_data_len);
2107 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, 2095 ret = ib_post_send_mad(msg, NULL);
2108 &msg->send_wr, &bad_send_wr);
2109 if (ret) 2096 if (ret)
2110 goto error2; 2097 goto error2;
2111 cm_id->state = IB_CM_MRA_REQ_SENT; 2098 cm_id->state = IB_CM_MRA_REQ_SENT;
@@ -2118,8 +2105,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
2118 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, 2105 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2119 CM_MSG_RESPONSE_REP, service_timeout, 2106 CM_MSG_RESPONSE_REP, service_timeout,
2120 private_data, private_data_len); 2107 private_data, private_data_len);
2121 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, 2108 ret = ib_post_send_mad(msg, NULL);
2122 &msg->send_wr, &bad_send_wr);
2123 if (ret) 2109 if (ret)
2124 goto error2; 2110 goto error2;
2125 cm_id->state = IB_CM_MRA_REP_SENT; 2111 cm_id->state = IB_CM_MRA_REP_SENT;
@@ -2132,8 +2118,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
2132 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, 2118 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2133 CM_MSG_RESPONSE_OTHER, service_timeout, 2119 CM_MSG_RESPONSE_OTHER, service_timeout,
2134 private_data, private_data_len); 2120 private_data, private_data_len);
2135 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, 2121 ret = ib_post_send_mad(msg, NULL);
2136 &msg->send_wr, &bad_send_wr);
2137 if (ret) 2122 if (ret)
2138 goto error2; 2123 goto error2;
2139 cm_id->lap_state = IB_CM_MRA_LAP_SENT; 2124 cm_id->lap_state = IB_CM_MRA_LAP_SENT;
@@ -2195,14 +2180,14 @@ static int cm_mra_handler(struct cm_work *work)
2195 case IB_CM_REQ_SENT: 2180 case IB_CM_REQ_SENT:
2196 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ || 2181 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
2197 ib_modify_mad(cm_id_priv->av.port->mad_agent, 2182 ib_modify_mad(cm_id_priv->av.port->mad_agent,
2198 (unsigned long) cm_id_priv->msg, timeout)) 2183 cm_id_priv->msg, timeout))
2199 goto out; 2184 goto out;
2200 cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD; 2185 cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
2201 break; 2186 break;
2202 case IB_CM_REP_SENT: 2187 case IB_CM_REP_SENT:
2203 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP || 2188 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
2204 ib_modify_mad(cm_id_priv->av.port->mad_agent, 2189 ib_modify_mad(cm_id_priv->av.port->mad_agent,
2205 (unsigned long) cm_id_priv->msg, timeout)) 2190 cm_id_priv->msg, timeout))
2206 goto out; 2191 goto out;
2207 cm_id_priv->id.state = IB_CM_MRA_REP_RCVD; 2192 cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
2208 break; 2193 break;
@@ -2210,7 +2195,7 @@ static int cm_mra_handler(struct cm_work *work)
2210 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER || 2195 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
2211 cm_id_priv->id.lap_state != IB_CM_LAP_SENT || 2196 cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
2212 ib_modify_mad(cm_id_priv->av.port->mad_agent, 2197 ib_modify_mad(cm_id_priv->av.port->mad_agent,
2213 (unsigned long) cm_id_priv->msg, timeout)) 2198 cm_id_priv->msg, timeout))
2214 goto out; 2199 goto out;
2215 cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD; 2200 cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
2216 break; 2201 break;
@@ -2273,7 +2258,6 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
2273{ 2258{
2274 struct cm_id_private *cm_id_priv; 2259 struct cm_id_private *cm_id_priv;
2275 struct ib_mad_send_buf *msg; 2260 struct ib_mad_send_buf *msg;
2276 struct ib_send_wr *bad_send_wr;
2277 unsigned long flags; 2261 unsigned long flags;
2278 int ret; 2262 int ret;
2279 2263
@@ -2294,11 +2278,10 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
2294 2278
2295 cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv, 2279 cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
2296 alternate_path, private_data, private_data_len); 2280 alternate_path, private_data, private_data_len);
2297 msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; 2281 msg->timeout_ms = cm_id_priv->timeout_ms;
2298 msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED; 2282 msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
2299 2283
2300 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, 2284 ret = ib_post_send_mad(msg, NULL);
2301 &msg->send_wr, &bad_send_wr);
2302 if (ret) { 2285 if (ret) {
2303 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2286 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2304 cm_free_msg(msg); 2287 cm_free_msg(msg);
@@ -2342,7 +2325,6 @@ static int cm_lap_handler(struct cm_work *work)
2342 struct cm_lap_msg *lap_msg; 2325 struct cm_lap_msg *lap_msg;
2343 struct ib_cm_lap_event_param *param; 2326 struct ib_cm_lap_event_param *param;
2344 struct ib_mad_send_buf *msg = NULL; 2327 struct ib_mad_send_buf *msg = NULL;
2345 struct ib_send_wr *bad_send_wr;
2346 unsigned long flags; 2328 unsigned long flags;
2347 int ret; 2329 int ret;
2348 2330
@@ -2376,8 +2358,7 @@ static int cm_lap_handler(struct cm_work *work)
2376 cm_id_priv->private_data_len); 2358 cm_id_priv->private_data_len);
2377 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2359 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2378 2360
2379 if (ib_post_send_mad(cm_id_priv->av.port->mad_agent, 2361 if (ib_post_send_mad(msg, NULL))
2380 &msg->send_wr, &bad_send_wr))
2381 cm_free_msg(msg); 2362 cm_free_msg(msg);
2382 goto deref; 2363 goto deref;
2383 default: 2364 default:
@@ -2433,7 +2414,6 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id,
2433{ 2414{
2434 struct cm_id_private *cm_id_priv; 2415 struct cm_id_private *cm_id_priv;
2435 struct ib_mad_send_buf *msg; 2416 struct ib_mad_send_buf *msg;
2436 struct ib_send_wr *bad_send_wr;
2437 unsigned long flags; 2417 unsigned long flags;
2438 int ret; 2418 int ret;
2439 2419
@@ -2456,8 +2436,7 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id,
2456 2436
2457 cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status, 2437 cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
2458 info, info_length, private_data, private_data_len); 2438 info, info_length, private_data, private_data_len);
2459 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, 2439 ret = ib_post_send_mad(msg, NULL);
2460 &msg->send_wr, &bad_send_wr);
2461 if (ret) { 2440 if (ret) {
2462 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2441 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2463 cm_free_msg(msg); 2442 cm_free_msg(msg);
@@ -2496,8 +2475,7 @@ static int cm_apr_handler(struct cm_work *work)
2496 goto out; 2475 goto out;
2497 } 2476 }
2498 cm_id_priv->id.lap_state = IB_CM_LAP_IDLE; 2477 cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
2499 ib_cancel_mad(cm_id_priv->av.port->mad_agent, 2478 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2500 (unsigned long) cm_id_priv->msg);
2501 cm_id_priv->msg = NULL; 2479 cm_id_priv->msg = NULL;
2502 2480
2503 ret = atomic_inc_and_test(&cm_id_priv->work_count); 2481 ret = atomic_inc_and_test(&cm_id_priv->work_count);
@@ -2572,7 +2550,6 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
2572{ 2550{
2573 struct cm_id_private *cm_id_priv; 2551 struct cm_id_private *cm_id_priv;
2574 struct ib_mad_send_buf *msg; 2552 struct ib_mad_send_buf *msg;
2575 struct ib_send_wr *bad_send_wr;
2576 unsigned long flags; 2553 unsigned long flags;
2577 int ret; 2554 int ret;
2578 2555
@@ -2595,13 +2572,12 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
2595 2572
2596 cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv, 2573 cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
2597 param); 2574 param);
2598 msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; 2575 msg->timeout_ms = cm_id_priv->timeout_ms;
2599 msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT; 2576 msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
2600 2577
2601 spin_lock_irqsave(&cm_id_priv->lock, flags); 2578 spin_lock_irqsave(&cm_id_priv->lock, flags);
2602 if (cm_id->state == IB_CM_IDLE) 2579 if (cm_id->state == IB_CM_IDLE)
2603 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, 2580 ret = ib_post_send_mad(msg, NULL);
2604 &msg->send_wr, &bad_send_wr);
2605 else 2581 else
2606 ret = -EINVAL; 2582 ret = -EINVAL;
2607 2583
@@ -2629,7 +2605,6 @@ static void cm_format_sidr_req_event(struct cm_work *work,
2629 param = &work->cm_event.param.sidr_req_rcvd; 2605 param = &work->cm_event.param.sidr_req_rcvd;
2630 param->pkey = __be16_to_cpu(sidr_req_msg->pkey); 2606 param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
2631 param->listen_id = listen_id; 2607 param->listen_id = listen_id;
2632 param->device = work->port->mad_agent->device;
2633 param->port = work->port->port_num; 2608 param->port = work->port->port_num;
2634 work->cm_event.private_data = &sidr_req_msg->private_data; 2609 work->cm_event.private_data = &sidr_req_msg->private_data;
2635} 2610}
@@ -2642,7 +2617,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
2642 struct ib_wc *wc; 2617 struct ib_wc *wc;
2643 unsigned long flags; 2618 unsigned long flags;
2644 2619
2645 cm_id = ib_create_cm_id(NULL, NULL); 2620 cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL);
2646 if (IS_ERR(cm_id)) 2621 if (IS_ERR(cm_id))
2647 return PTR_ERR(cm_id); 2622 return PTR_ERR(cm_id);
2648 cm_id_priv = container_of(cm_id, struct cm_id_private, id); 2623 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
@@ -2666,7 +2641,8 @@ static int cm_sidr_req_handler(struct cm_work *work)
2666 spin_unlock_irqrestore(&cm.lock, flags); 2641 spin_unlock_irqrestore(&cm.lock, flags);
2667 goto out; /* Duplicate message. */ 2642 goto out; /* Duplicate message. */
2668 } 2643 }
2669 cur_cm_id_priv = cm_find_listen(sidr_req_msg->service_id); 2644 cur_cm_id_priv = cm_find_listen(cm_id->device,
2645 sidr_req_msg->service_id);
2670 if (!cur_cm_id_priv) { 2646 if (!cur_cm_id_priv) {
2671 rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); 2647 rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
2672 spin_unlock_irqrestore(&cm.lock, flags); 2648 spin_unlock_irqrestore(&cm.lock, flags);
@@ -2715,7 +2691,6 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
2715{ 2691{
2716 struct cm_id_private *cm_id_priv; 2692 struct cm_id_private *cm_id_priv;
2717 struct ib_mad_send_buf *msg; 2693 struct ib_mad_send_buf *msg;
2718 struct ib_send_wr *bad_send_wr;
2719 unsigned long flags; 2694 unsigned long flags;
2720 int ret; 2695 int ret;
2721 2696
@@ -2737,8 +2712,7 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
2737 2712
2738 cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv, 2713 cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
2739 param); 2714 param);
2740 ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, 2715 ret = ib_post_send_mad(msg, NULL);
2741 &msg->send_wr, &bad_send_wr);
2742 if (ret) { 2716 if (ret) {
2743 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2717 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2744 cm_free_msg(msg); 2718 cm_free_msg(msg);
@@ -2791,8 +2765,7 @@ static int cm_sidr_rep_handler(struct cm_work *work)
2791 goto out; 2765 goto out;
2792 } 2766 }
2793 cm_id_priv->id.state = IB_CM_IDLE; 2767 cm_id_priv->id.state = IB_CM_IDLE;
2794 ib_cancel_mad(cm_id_priv->av.port->mad_agent, 2768 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2795 (unsigned long) cm_id_priv->msg);
2796 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2769 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2797 2770
2798 cm_format_sidr_rep_event(work); 2771 cm_format_sidr_rep_event(work);
@@ -2860,9 +2833,7 @@ discard:
2860static void cm_send_handler(struct ib_mad_agent *mad_agent, 2833static void cm_send_handler(struct ib_mad_agent *mad_agent,
2861 struct ib_mad_send_wc *mad_send_wc) 2834 struct ib_mad_send_wc *mad_send_wc)
2862{ 2835{
2863 struct ib_mad_send_buf *msg; 2836 struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
2864
2865 msg = (struct ib_mad_send_buf *)(unsigned long)mad_send_wc->wr_id;
2866 2837
2867 switch (mad_send_wc->status) { 2838 switch (mad_send_wc->status) {
2868 case IB_WC_SUCCESS: 2839 case IB_WC_SUCCESS:
@@ -3064,10 +3035,10 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
3064 case IB_CM_ESTABLISHED: 3035 case IB_CM_ESTABLISHED:
3065 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | 3036 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
3066 IB_QP_PKEY_INDEX | IB_QP_PORT; 3037 IB_QP_PKEY_INDEX | IB_QP_PORT;
3067 qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; 3038 qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
3039 IB_ACCESS_REMOTE_WRITE;
3068 if (cm_id_priv->responder_resources) 3040 if (cm_id_priv->responder_resources)
3069 qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_WRITE | 3041 qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ;
3070 IB_ACCESS_REMOTE_READ;
3071 qp_attr->pkey_index = cm_id_priv->av.pkey_index; 3042 qp_attr->pkey_index = cm_id_priv->av.pkey_index;
3072 qp_attr->port_num = cm_id_priv->av.port->port_num; 3043 qp_attr->port_num = cm_id_priv->av.port->port_num;
3073 ret = 0; 3044 ret = 0;
@@ -3097,14 +3068,18 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
3097 case IB_CM_MRA_REP_RCVD: 3068 case IB_CM_MRA_REP_RCVD:
3098 case IB_CM_ESTABLISHED: 3069 case IB_CM_ESTABLISHED:
3099 *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | 3070 *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
3100 IB_QP_DEST_QPN | IB_QP_RQ_PSN | 3071 IB_QP_DEST_QPN | IB_QP_RQ_PSN;
3101 IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER;
3102 qp_attr->ah_attr = cm_id_priv->av.ah_attr; 3072 qp_attr->ah_attr = cm_id_priv->av.ah_attr;
3103 qp_attr->path_mtu = cm_id_priv->path_mtu; 3073 qp_attr->path_mtu = cm_id_priv->path_mtu;
3104 qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); 3074 qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
3105 qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); 3075 qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
3106 qp_attr->max_dest_rd_atomic = cm_id_priv->responder_resources; 3076 if (cm_id_priv->qp_type == IB_QPT_RC) {
3107 qp_attr->min_rnr_timer = 0; 3077 *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
3078 IB_QP_MIN_RNR_TIMER;
3079 qp_attr->max_dest_rd_atomic =
3080 cm_id_priv->responder_resources;
3081 qp_attr->min_rnr_timer = 0;
3082 }
3108 if (cm_id_priv->alt_av.ah_attr.dlid) { 3083 if (cm_id_priv->alt_av.ah_attr.dlid) {
3109 *qp_attr_mask |= IB_QP_ALT_PATH; 3084 *qp_attr_mask |= IB_QP_ALT_PATH;
3110 qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; 3085 qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
@@ -3133,14 +3108,17 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
3133 case IB_CM_REP_SENT: 3108 case IB_CM_REP_SENT:
3134 case IB_CM_MRA_REP_RCVD: 3109 case IB_CM_MRA_REP_RCVD:
3135 case IB_CM_ESTABLISHED: 3110 case IB_CM_ESTABLISHED:
3136 *qp_attr_mask = IB_QP_STATE | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | 3111 *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
3137 IB_QP_RNR_RETRY | IB_QP_SQ_PSN |
3138 IB_QP_MAX_QP_RD_ATOMIC;
3139 qp_attr->timeout = cm_id_priv->local_ack_timeout;
3140 qp_attr->retry_cnt = cm_id_priv->retry_count;
3141 qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
3142 qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); 3112 qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
3143 qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; 3113 if (cm_id_priv->qp_type == IB_QPT_RC) {
3114 *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
3115 IB_QP_RNR_RETRY |
3116 IB_QP_MAX_QP_RD_ATOMIC;
3117 qp_attr->timeout = cm_id_priv->local_ack_timeout;
3118 qp_attr->retry_cnt = cm_id_priv->retry_count;
3119 qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
3120 qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
3121 }
3144 if (cm_id_priv->alt_av.ah_attr.dlid) { 3122 if (cm_id_priv->alt_av.ah_attr.dlid) {
3145 *qp_attr_mask |= IB_QP_PATH_MIG_STATE; 3123 *qp_attr_mask |= IB_QP_PATH_MIG_STATE;
3146 qp_attr->path_mig_state = IB_MIG_REARM; 3124 qp_attr->path_mig_state = IB_MIG_REARM;
@@ -3323,6 +3301,7 @@ static void __exit ib_cm_cleanup(void)
3323 flush_workqueue(cm.wq); 3301 flush_workqueue(cm.wq);
3324 destroy_workqueue(cm.wq); 3302 destroy_workqueue(cm.wq);
3325 ib_unregister_client(&cm_client); 3303 ib_unregister_client(&cm_client);
3304 idr_destroy(&cm.local_id_table);
3326} 3305}
3327 3306
3328module_init(ib_cm_init); 3307module_init(ib_cm_init);
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
index 813ab70bf6d5..4d3aee90c249 100644
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -186,6 +186,7 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg,
186 req_msg->offset40 = cpu_to_be32((be32_to_cpu( 186 req_msg->offset40 = cpu_to_be32((be32_to_cpu(
187 req_msg->offset40) & 187 req_msg->offset40) &
188 0xFFFFFFF9) | 0x2); 188 0xFFFFFFF9) | 0x2);
189 break;
189 default: 190 default:
190 req_msg->offset40 = cpu_to_be32(be32_to_cpu( 191 req_msg->offset40 = cpu_to_be32(be32_to_cpu(
191 req_msg->offset40) & 192 req_msg->offset40) &
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index d3cf84e01587..5a6e44976405 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -514,6 +514,12 @@ int ib_query_port(struct ib_device *device,
514 u8 port_num, 514 u8 port_num,
515 struct ib_port_attr *port_attr) 515 struct ib_port_attr *port_attr)
516{ 516{
517 if (device->node_type == IB_NODE_SWITCH) {
518 if (port_num)
519 return -EINVAL;
520 } else if (port_num < 1 || port_num > device->phys_port_cnt)
521 return -EINVAL;
522
517 return device->query_port(device, port_num, port_attr); 523 return device->query_port(device, port_num, port_attr);
518} 524}
519EXPORT_SYMBOL(ib_query_port); 525EXPORT_SYMBOL(ib_query_port);
@@ -583,6 +589,12 @@ int ib_modify_port(struct ib_device *device,
583 u8 port_num, int port_modify_mask, 589 u8 port_num, int port_modify_mask,
584 struct ib_port_modify *port_modify) 590 struct ib_port_modify *port_modify)
585{ 591{
592 if (device->node_type == IB_NODE_SWITCH) {
593 if (port_num)
594 return -EINVAL;
595 } else if (port_num < 1 || port_num > device->phys_port_cnt)
596 return -EINVAL;
597
586 return device->modify_port(device, port_num, port_modify_mask, 598 return device->modify_port(device, port_num, port_modify_mask,
587 port_modify); 599 port_modify);
588} 600}
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index a14ca87fda18..88f9f8c9eacc 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -579,7 +579,7 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list)
579} 579}
580 580
581static void snoop_send(struct ib_mad_qp_info *qp_info, 581static void snoop_send(struct ib_mad_qp_info *qp_info,
582 struct ib_send_wr *send_wr, 582 struct ib_mad_send_buf *send_buf,
583 struct ib_mad_send_wc *mad_send_wc, 583 struct ib_mad_send_wc *mad_send_wc,
584 int mad_snoop_flags) 584 int mad_snoop_flags)
585{ 585{
@@ -597,7 +597,7 @@ static void snoop_send(struct ib_mad_qp_info *qp_info,
597 atomic_inc(&mad_snoop_priv->refcount); 597 atomic_inc(&mad_snoop_priv->refcount);
598 spin_unlock_irqrestore(&qp_info->snoop_lock, flags); 598 spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
599 mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent, 599 mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent,
600 send_wr, mad_send_wc); 600 send_buf, mad_send_wc);
601 if (atomic_dec_and_test(&mad_snoop_priv->refcount)) 601 if (atomic_dec_and_test(&mad_snoop_priv->refcount))
602 wake_up(&mad_snoop_priv->wait); 602 wake_up(&mad_snoop_priv->wait);
603 spin_lock_irqsave(&qp_info->snoop_lock, flags); 603 spin_lock_irqsave(&qp_info->snoop_lock, flags);
@@ -654,10 +654,10 @@ static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
654 * Return < 0 if error 654 * Return < 0 if error
655 */ 655 */
656static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, 656static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
657 struct ib_smp *smp, 657 struct ib_mad_send_wr_private *mad_send_wr)
658 struct ib_send_wr *send_wr)
659{ 658{
660 int ret; 659 int ret;
660 struct ib_smp *smp = mad_send_wr->send_buf.mad;
661 unsigned long flags; 661 unsigned long flags;
662 struct ib_mad_local_private *local; 662 struct ib_mad_local_private *local;
663 struct ib_mad_private *mad_priv; 663 struct ib_mad_private *mad_priv;
@@ -666,6 +666,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
666 struct ib_device *device = mad_agent_priv->agent.device; 666 struct ib_device *device = mad_agent_priv->agent.device;
667 u8 port_num = mad_agent_priv->agent.port_num; 667 u8 port_num = mad_agent_priv->agent.port_num;
668 struct ib_wc mad_wc; 668 struct ib_wc mad_wc;
669 struct ib_send_wr *send_wr = &mad_send_wr->send_wr;
669 670
670 if (!smi_handle_dr_smp_send(smp, device->node_type, port_num)) { 671 if (!smi_handle_dr_smp_send(smp, device->node_type, port_num)) {
671 ret = -EINVAL; 672 ret = -EINVAL;
@@ -745,13 +746,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
745 goto out; 746 goto out;
746 } 747 }
747 748
748 local->send_wr = *send_wr; 749 local->mad_send_wr = mad_send_wr;
749 local->send_wr.sg_list = local->sg_list;
750 memcpy(local->sg_list, send_wr->sg_list,
751 sizeof *send_wr->sg_list * send_wr->num_sge);
752 local->send_wr.next = NULL;
753 local->tid = send_wr->wr.ud.mad_hdr->tid;
754 local->wr_id = send_wr->wr_id;
755 /* Reference MAD agent until send side of local completion handled */ 750 /* Reference MAD agent until send side of local completion handled */
756 atomic_inc(&mad_agent_priv->refcount); 751 atomic_inc(&mad_agent_priv->refcount);
757 /* Queue local completion to local list */ 752 /* Queue local completion to local list */
@@ -781,17 +776,17 @@ static int get_buf_length(int hdr_len, int data_len)
781 776
782struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, 777struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
783 u32 remote_qpn, u16 pkey_index, 778 u32 remote_qpn, u16 pkey_index,
784 struct ib_ah *ah, int rmpp_active, 779 int rmpp_active,
785 int hdr_len, int data_len, 780 int hdr_len, int data_len,
786 gfp_t gfp_mask) 781 gfp_t gfp_mask)
787{ 782{
788 struct ib_mad_agent_private *mad_agent_priv; 783 struct ib_mad_agent_private *mad_agent_priv;
789 struct ib_mad_send_buf *send_buf; 784 struct ib_mad_send_wr_private *mad_send_wr;
790 int buf_size; 785 int buf_size;
791 void *buf; 786 void *buf;
792 787
793 mad_agent_priv = container_of(mad_agent, 788 mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
794 struct ib_mad_agent_private, agent); 789 agent);
795 buf_size = get_buf_length(hdr_len, data_len); 790 buf_size = get_buf_length(hdr_len, data_len);
796 791
797 if ((!mad_agent->rmpp_version && 792 if ((!mad_agent->rmpp_version &&
@@ -799,45 +794,40 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
799 (!rmpp_active && buf_size > sizeof(struct ib_mad))) 794 (!rmpp_active && buf_size > sizeof(struct ib_mad)))
800 return ERR_PTR(-EINVAL); 795 return ERR_PTR(-EINVAL);
801 796
802 buf = kmalloc(sizeof *send_buf + buf_size, gfp_mask); 797 buf = kmalloc(sizeof *mad_send_wr + buf_size, gfp_mask);
803 if (!buf) 798 if (!buf)
804 return ERR_PTR(-ENOMEM); 799 return ERR_PTR(-ENOMEM);
805 memset(buf, 0, sizeof *send_buf + buf_size); 800 memset(buf, 0, sizeof *mad_send_wr + buf_size);
806 801
807 send_buf = buf + buf_size; 802 mad_send_wr = buf + buf_size;
808 send_buf->mad = buf; 803 mad_send_wr->send_buf.mad = buf;
809 804
810 send_buf->sge.addr = dma_map_single(mad_agent->device->dma_device, 805 mad_send_wr->mad_agent_priv = mad_agent_priv;
811 buf, buf_size, DMA_TO_DEVICE); 806 mad_send_wr->sg_list[0].length = buf_size;
812 pci_unmap_addr_set(send_buf, mapping, send_buf->sge.addr); 807 mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey;
813 send_buf->sge.length = buf_size; 808
814 send_buf->sge.lkey = mad_agent->mr->lkey; 809 mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
815 810 mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
816 send_buf->send_wr.wr_id = (unsigned long) send_buf; 811 mad_send_wr->send_wr.num_sge = 1;
817 send_buf->send_wr.sg_list = &send_buf->sge; 812 mad_send_wr->send_wr.opcode = IB_WR_SEND;
818 send_buf->send_wr.num_sge = 1; 813 mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED;
819 send_buf->send_wr.opcode = IB_WR_SEND; 814 mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn;
820 send_buf->send_wr.send_flags = IB_SEND_SIGNALED; 815 mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
821 send_buf->send_wr.wr.ud.ah = ah; 816 mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index;
822 send_buf->send_wr.wr.ud.mad_hdr = &send_buf->mad->mad_hdr;
823 send_buf->send_wr.wr.ud.remote_qpn = remote_qpn;
824 send_buf->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
825 send_buf->send_wr.wr.ud.pkey_index = pkey_index;
826 817
827 if (rmpp_active) { 818 if (rmpp_active) {
828 struct ib_rmpp_mad *rmpp_mad; 819 struct ib_rmpp_mad *rmpp_mad = mad_send_wr->send_buf.mad;
829 rmpp_mad = (struct ib_rmpp_mad *)send_buf->mad;
830 rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len - 820 rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len -
831 offsetof(struct ib_rmpp_mad, data) + data_len); 821 IB_MGMT_RMPP_HDR + data_len);
832 rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version; 822 rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version;
833 rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA; 823 rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
834 ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, 824 ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr,
835 IB_MGMT_RMPP_FLAG_ACTIVE); 825 IB_MGMT_RMPP_FLAG_ACTIVE);
836 } 826 }
837 827
838 send_buf->mad_agent = mad_agent; 828 mad_send_wr->send_buf.mad_agent = mad_agent;
839 atomic_inc(&mad_agent_priv->refcount); 829 atomic_inc(&mad_agent_priv->refcount);
840 return send_buf; 830 return &mad_send_wr->send_buf;
841} 831}
842EXPORT_SYMBOL(ib_create_send_mad); 832EXPORT_SYMBOL(ib_create_send_mad);
843 833
@@ -847,10 +837,6 @@ void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
847 837
848 mad_agent_priv = container_of(send_buf->mad_agent, 838 mad_agent_priv = container_of(send_buf->mad_agent,
849 struct ib_mad_agent_private, agent); 839 struct ib_mad_agent_private, agent);
850
851 dma_unmap_single(send_buf->mad_agent->device->dma_device,
852 pci_unmap_addr(send_buf, mapping),
853 send_buf->sge.length, DMA_TO_DEVICE);
854 kfree(send_buf->mad); 840 kfree(send_buf->mad);
855 841
856 if (atomic_dec_and_test(&mad_agent_priv->refcount)) 842 if (atomic_dec_and_test(&mad_agent_priv->refcount))
@@ -861,8 +847,10 @@ EXPORT_SYMBOL(ib_free_send_mad);
861int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) 847int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
862{ 848{
863 struct ib_mad_qp_info *qp_info; 849 struct ib_mad_qp_info *qp_info;
864 struct ib_send_wr *bad_send_wr;
865 struct list_head *list; 850 struct list_head *list;
851 struct ib_send_wr *bad_send_wr;
852 struct ib_mad_agent *mad_agent;
853 struct ib_sge *sge;
866 unsigned long flags; 854 unsigned long flags;
867 int ret; 855 int ret;
868 856
@@ -871,10 +859,17 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
871 mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list; 859 mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
872 mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; 860 mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
873 861
862 mad_agent = mad_send_wr->send_buf.mad_agent;
863 sge = mad_send_wr->sg_list;
864 sge->addr = dma_map_single(mad_agent->device->dma_device,
865 mad_send_wr->send_buf.mad, sge->length,
866 DMA_TO_DEVICE);
867 pci_unmap_addr_set(mad_send_wr, mapping, sge->addr);
868
874 spin_lock_irqsave(&qp_info->send_queue.lock, flags); 869 spin_lock_irqsave(&qp_info->send_queue.lock, flags);
875 if (qp_info->send_queue.count < qp_info->send_queue.max_active) { 870 if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
876 ret = ib_post_send(mad_send_wr->mad_agent_priv->agent.qp, 871 ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr,
877 &mad_send_wr->send_wr, &bad_send_wr); 872 &bad_send_wr);
878 list = &qp_info->send_queue.list; 873 list = &qp_info->send_queue.list;
879 } else { 874 } else {
880 ret = 0; 875 ret = 0;
@@ -886,6 +881,11 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
886 list_add_tail(&mad_send_wr->mad_list.list, list); 881 list_add_tail(&mad_send_wr->mad_list.list, list);
887 } 882 }
888 spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); 883 spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
884 if (ret)
885 dma_unmap_single(mad_agent->device->dma_device,
886 pci_unmap_addr(mad_send_wr, mapping),
887 sge->length, DMA_TO_DEVICE);
888
889 return ret; 889 return ret;
890} 890}
891 891
@@ -893,45 +893,28 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
893 * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated 893 * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
894 * with the registered client 894 * with the registered client
895 */ 895 */
896int ib_post_send_mad(struct ib_mad_agent *mad_agent, 896int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
897 struct ib_send_wr *send_wr, 897 struct ib_mad_send_buf **bad_send_buf)
898 struct ib_send_wr **bad_send_wr)
899{ 898{
900 int ret = -EINVAL;
901 struct ib_mad_agent_private *mad_agent_priv; 899 struct ib_mad_agent_private *mad_agent_priv;
902 900 struct ib_mad_send_buf *next_send_buf;
903 /* Validate supplied parameters */ 901 struct ib_mad_send_wr_private *mad_send_wr;
904 if (!bad_send_wr) 902 unsigned long flags;
905 goto error1; 903 int ret = -EINVAL;
906
907 if (!mad_agent || !send_wr)
908 goto error2;
909
910 if (!mad_agent->send_handler)
911 goto error2;
912
913 mad_agent_priv = container_of(mad_agent,
914 struct ib_mad_agent_private,
915 agent);
916 904
917 /* Walk list of send WRs and post each on send list */ 905 /* Walk list of send WRs and post each on send list */
918 while (send_wr) { 906 for (; send_buf; send_buf = next_send_buf) {
919 unsigned long flags;
920 struct ib_send_wr *next_send_wr;
921 struct ib_mad_send_wr_private *mad_send_wr;
922 struct ib_smp *smp;
923
924 /* Validate more parameters */
925 if (send_wr->num_sge > IB_MAD_SEND_REQ_MAX_SG)
926 goto error2;
927 907
928 if (send_wr->wr.ud.timeout_ms && !mad_agent->recv_handler) 908 mad_send_wr = container_of(send_buf,
929 goto error2; 909 struct ib_mad_send_wr_private,
930 910 send_buf);
931 if (!send_wr->wr.ud.mad_hdr) { 911 mad_agent_priv = mad_send_wr->mad_agent_priv;
932 printk(KERN_ERR PFX "MAD header must be supplied " 912
933 "in WR %p\n", send_wr); 913 if (!send_buf->mad_agent->send_handler ||
934 goto error2; 914 (send_buf->timeout_ms &&
915 !send_buf->mad_agent->recv_handler)) {
916 ret = -EINVAL;
917 goto error;
935 } 918 }
936 919
937 /* 920 /*
@@ -939,40 +922,24 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent,
939 * current one completes, and the user modifies the work 922 * current one completes, and the user modifies the work
940 * request associated with the completion 923 * request associated with the completion
941 */ 924 */
942 next_send_wr = (struct ib_send_wr *)send_wr->next; 925 next_send_buf = send_buf->next;
926 mad_send_wr->send_wr.wr.ud.ah = send_buf->ah;
943 927
944 smp = (struct ib_smp *)send_wr->wr.ud.mad_hdr; 928 if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class ==
945 if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { 929 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
946 ret = handle_outgoing_dr_smp(mad_agent_priv, smp, 930 ret = handle_outgoing_dr_smp(mad_agent_priv,
947 send_wr); 931 mad_send_wr);
948 if (ret < 0) /* error */ 932 if (ret < 0) /* error */
949 goto error2; 933 goto error;
950 else if (ret == 1) /* locally consumed */ 934 else if (ret == 1) /* locally consumed */
951 goto next; 935 continue;
952 } 936 }
953 937
954 /* Allocate MAD send WR tracking structure */ 938 mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
955 mad_send_wr = kmalloc(sizeof *mad_send_wr, GFP_ATOMIC);
956 if (!mad_send_wr) {
957 printk(KERN_ERR PFX "No memory for "
958 "ib_mad_send_wr_private\n");
959 ret = -ENOMEM;
960 goto error2;
961 }
962 memset(mad_send_wr, 0, sizeof *mad_send_wr);
963
964 mad_send_wr->send_wr = *send_wr;
965 mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
966 memcpy(mad_send_wr->sg_list, send_wr->sg_list,
967 sizeof *send_wr->sg_list * send_wr->num_sge);
968 mad_send_wr->wr_id = send_wr->wr_id;
969 mad_send_wr->tid = send_wr->wr.ud.mad_hdr->tid;
970 mad_send_wr->mad_agent_priv = mad_agent_priv;
971 /* Timeout will be updated after send completes */ 939 /* Timeout will be updated after send completes */
972 mad_send_wr->timeout = msecs_to_jiffies(send_wr->wr. 940 mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
973 ud.timeout_ms); 941 mad_send_wr->retries = send_buf->retries;
974 mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries; 942 /* Reference for work request to QP + response */
975 /* One reference for each work request to QP + response */
976 mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); 943 mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
977 mad_send_wr->status = IB_WC_SUCCESS; 944 mad_send_wr->status = IB_WC_SUCCESS;
978 945
@@ -995,16 +962,13 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent,
995 list_del(&mad_send_wr->agent_list); 962 list_del(&mad_send_wr->agent_list);
996 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 963 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
997 atomic_dec(&mad_agent_priv->refcount); 964 atomic_dec(&mad_agent_priv->refcount);
998 goto error2; 965 goto error;
999 } 966 }
1000next:
1001 send_wr = next_send_wr;
1002 } 967 }
1003 return 0; 968 return 0;
1004 969error:
1005error2: 970 if (bad_send_buf)
1006 *bad_send_wr = send_wr; 971 *bad_send_buf = send_buf;
1007error1:
1008 return ret; 972 return ret;
1009} 973}
1010EXPORT_SYMBOL(ib_post_send_mad); 974EXPORT_SYMBOL(ib_post_send_mad);
@@ -1447,8 +1411,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
1447 * of MAD. 1411 * of MAD.
1448 */ 1412 */
1449 hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32; 1413 hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32;
1450 list_for_each_entry(entry, &port_priv->agent_list, 1414 list_for_each_entry(entry, &port_priv->agent_list, agent_list) {
1451 agent_list) {
1452 if (entry->agent.hi_tid == hi_tid) { 1415 if (entry->agent.hi_tid == hi_tid) {
1453 mad_agent = entry; 1416 mad_agent = entry;
1454 break; 1417 break;
@@ -1571,8 +1534,7 @@ ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid)
1571 */ 1534 */
1572 list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, 1535 list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
1573 agent_list) { 1536 agent_list) {
1574 if (is_data_mad(mad_agent_priv, 1537 if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) &&
1575 mad_send_wr->send_wr.wr.ud.mad_hdr) &&
1576 mad_send_wr->tid == tid && mad_send_wr->timeout) { 1538 mad_send_wr->tid == tid && mad_send_wr->timeout) {
1577 /* Verify request has not been canceled */ 1539 /* Verify request has not been canceled */
1578 return (mad_send_wr->status == IB_WC_SUCCESS) ? 1540 return (mad_send_wr->status == IB_WC_SUCCESS) ?
@@ -1628,14 +1590,14 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
1628 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 1590 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1629 1591
1630 /* Defined behavior is to complete response before request */ 1592 /* Defined behavior is to complete response before request */
1631 mad_recv_wc->wc->wr_id = mad_send_wr->wr_id; 1593 mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
1632 mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, 1594 mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
1633 mad_recv_wc); 1595 mad_recv_wc);
1634 atomic_dec(&mad_agent_priv->refcount); 1596 atomic_dec(&mad_agent_priv->refcount);
1635 1597
1636 mad_send_wc.status = IB_WC_SUCCESS; 1598 mad_send_wc.status = IB_WC_SUCCESS;
1637 mad_send_wc.vendor_err = 0; 1599 mad_send_wc.vendor_err = 0;
1638 mad_send_wc.wr_id = mad_send_wr->wr_id; 1600 mad_send_wc.send_buf = &mad_send_wr->send_buf;
1639 ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); 1601 ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
1640 } else { 1602 } else {
1641 mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, 1603 mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
@@ -1728,11 +1690,11 @@ local:
1728 if (ret & IB_MAD_RESULT_CONSUMED) 1690 if (ret & IB_MAD_RESULT_CONSUMED)
1729 goto out; 1691 goto out;
1730 if (ret & IB_MAD_RESULT_REPLY) { 1692 if (ret & IB_MAD_RESULT_REPLY) {
1731 /* Send response */ 1693 agent_send_response(&response->mad.mad,
1732 if (!agent_send(response, &recv->grh, wc, 1694 &recv->grh, wc,
1733 port_priv->device, 1695 port_priv->device,
1734 port_priv->port_num)) 1696 port_priv->port_num,
1735 response = NULL; 1697 qp_info->qp->qp_num);
1736 goto out; 1698 goto out;
1737 } 1699 }
1738 } 1700 }
@@ -1866,15 +1828,15 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
1866 1828
1867 if (mad_send_wr->status != IB_WC_SUCCESS ) 1829 if (mad_send_wr->status != IB_WC_SUCCESS )
1868 mad_send_wc->status = mad_send_wr->status; 1830 mad_send_wc->status = mad_send_wr->status;
1869 if (ret != IB_RMPP_RESULT_INTERNAL) 1831 if (ret == IB_RMPP_RESULT_INTERNAL)
1832 ib_rmpp_send_handler(mad_send_wc);
1833 else
1870 mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, 1834 mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
1871 mad_send_wc); 1835 mad_send_wc);
1872 1836
1873 /* Release reference on agent taken when sending */ 1837 /* Release reference on agent taken when sending */
1874 if (atomic_dec_and_test(&mad_agent_priv->refcount)) 1838 if (atomic_dec_and_test(&mad_agent_priv->refcount))
1875 wake_up(&mad_agent_priv->wait); 1839 wake_up(&mad_agent_priv->wait);
1876
1877 kfree(mad_send_wr);
1878 return; 1840 return;
1879done: 1841done:
1880 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 1842 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
@@ -1888,6 +1850,7 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
1888 struct ib_mad_qp_info *qp_info; 1850 struct ib_mad_qp_info *qp_info;
1889 struct ib_mad_queue *send_queue; 1851 struct ib_mad_queue *send_queue;
1890 struct ib_send_wr *bad_send_wr; 1852 struct ib_send_wr *bad_send_wr;
1853 struct ib_mad_send_wc mad_send_wc;
1891 unsigned long flags; 1854 unsigned long flags;
1892 int ret; 1855 int ret;
1893 1856
@@ -1898,6 +1861,9 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
1898 qp_info = send_queue->qp_info; 1861 qp_info = send_queue->qp_info;
1899 1862
1900retry: 1863retry:
1864 dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
1865 pci_unmap_addr(mad_send_wr, mapping),
1866 mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
1901 queued_send_wr = NULL; 1867 queued_send_wr = NULL;
1902 spin_lock_irqsave(&send_queue->lock, flags); 1868 spin_lock_irqsave(&send_queue->lock, flags);
1903 list_del(&mad_list->list); 1869 list_del(&mad_list->list);
@@ -1914,17 +1880,17 @@ retry:
1914 } 1880 }
1915 spin_unlock_irqrestore(&send_queue->lock, flags); 1881 spin_unlock_irqrestore(&send_queue->lock, flags);
1916 1882
1917 /* Restore client wr_id in WC and complete send */ 1883 mad_send_wc.send_buf = &mad_send_wr->send_buf;
1918 wc->wr_id = mad_send_wr->wr_id; 1884 mad_send_wc.status = wc->status;
1885 mad_send_wc.vendor_err = wc->vendor_err;
1919 if (atomic_read(&qp_info->snoop_count)) 1886 if (atomic_read(&qp_info->snoop_count))
1920 snoop_send(qp_info, &mad_send_wr->send_wr, 1887 snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc,
1921 (struct ib_mad_send_wc *)wc,
1922 IB_MAD_SNOOP_SEND_COMPLETIONS); 1888 IB_MAD_SNOOP_SEND_COMPLETIONS);
1923 ib_mad_complete_send_wr(mad_send_wr, (struct ib_mad_send_wc *)wc); 1889 ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
1924 1890
1925 if (queued_send_wr) { 1891 if (queued_send_wr) {
1926 ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr, 1892 ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
1927 &bad_send_wr); 1893 &bad_send_wr);
1928 if (ret) { 1894 if (ret) {
1929 printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret); 1895 printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret);
1930 mad_send_wr = queued_send_wr; 1896 mad_send_wr = queued_send_wr;
@@ -2066,38 +2032,37 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
2066 2032
2067 list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, 2033 list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
2068 &cancel_list, agent_list) { 2034 &cancel_list, agent_list) {
2069 mad_send_wc.wr_id = mad_send_wr->wr_id; 2035 mad_send_wc.send_buf = &mad_send_wr->send_buf;
2036 list_del(&mad_send_wr->agent_list);
2070 mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, 2037 mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2071 &mad_send_wc); 2038 &mad_send_wc);
2072
2073 list_del(&mad_send_wr->agent_list);
2074 kfree(mad_send_wr);
2075 atomic_dec(&mad_agent_priv->refcount); 2039 atomic_dec(&mad_agent_priv->refcount);
2076 } 2040 }
2077} 2041}
2078 2042
2079static struct ib_mad_send_wr_private* 2043static struct ib_mad_send_wr_private*
2080find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv, u64 wr_id) 2044find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
2045 struct ib_mad_send_buf *send_buf)
2081{ 2046{
2082 struct ib_mad_send_wr_private *mad_send_wr; 2047 struct ib_mad_send_wr_private *mad_send_wr;
2083 2048
2084 list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list, 2049 list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
2085 agent_list) { 2050 agent_list) {
2086 if (mad_send_wr->wr_id == wr_id) 2051 if (&mad_send_wr->send_buf == send_buf)
2087 return mad_send_wr; 2052 return mad_send_wr;
2088 } 2053 }
2089 2054
2090 list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, 2055 list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
2091 agent_list) { 2056 agent_list) {
2092 if (is_data_mad(mad_agent_priv, 2057 if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) &&
2093 mad_send_wr->send_wr.wr.ud.mad_hdr) && 2058 &mad_send_wr->send_buf == send_buf)
2094 mad_send_wr->wr_id == wr_id)
2095 return mad_send_wr; 2059 return mad_send_wr;
2096 } 2060 }
2097 return NULL; 2061 return NULL;
2098} 2062}
2099 2063
2100int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms) 2064int ib_modify_mad(struct ib_mad_agent *mad_agent,
2065 struct ib_mad_send_buf *send_buf, u32 timeout_ms)
2101{ 2066{
2102 struct ib_mad_agent_private *mad_agent_priv; 2067 struct ib_mad_agent_private *mad_agent_priv;
2103 struct ib_mad_send_wr_private *mad_send_wr; 2068 struct ib_mad_send_wr_private *mad_send_wr;
@@ -2107,7 +2072,7 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms)
2107 mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, 2072 mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
2108 agent); 2073 agent);
2109 spin_lock_irqsave(&mad_agent_priv->lock, flags); 2074 spin_lock_irqsave(&mad_agent_priv->lock, flags);
2110 mad_send_wr = find_send_by_wr_id(mad_agent_priv, wr_id); 2075 mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
2111 if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) { 2076 if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
2112 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 2077 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2113 return -EINVAL; 2078 return -EINVAL;
@@ -2119,7 +2084,7 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms)
2119 mad_send_wr->refcount -= (mad_send_wr->timeout > 0); 2084 mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2120 } 2085 }
2121 2086
2122 mad_send_wr->send_wr.wr.ud.timeout_ms = timeout_ms; 2087 mad_send_wr->send_buf.timeout_ms = timeout_ms;
2123 if (active) 2088 if (active)
2124 mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); 2089 mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
2125 else 2090 else
@@ -2130,9 +2095,10 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms)
2130} 2095}
2131EXPORT_SYMBOL(ib_modify_mad); 2096EXPORT_SYMBOL(ib_modify_mad);
2132 2097
2133void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id) 2098void ib_cancel_mad(struct ib_mad_agent *mad_agent,
2099 struct ib_mad_send_buf *send_buf)
2134{ 2100{
2135 ib_modify_mad(mad_agent, wr_id, 0); 2101 ib_modify_mad(mad_agent, send_buf, 0);
2136} 2102}
2137EXPORT_SYMBOL(ib_cancel_mad); 2103EXPORT_SYMBOL(ib_cancel_mad);
2138 2104
@@ -2166,10 +2132,9 @@ static void local_completions(void *data)
2166 * Defined behavior is to complete response 2132 * Defined behavior is to complete response
2167 * before request 2133 * before request
2168 */ 2134 */
2169 build_smp_wc(local->wr_id, 2135 build_smp_wc((unsigned long) local->mad_send_wr,
2170 be16_to_cpu(IB_LID_PERMISSIVE), 2136 be16_to_cpu(IB_LID_PERMISSIVE),
2171 0 /* pkey index */, 2137 0, recv_mad_agent->agent.port_num, &wc);
2172 recv_mad_agent->agent.port_num, &wc);
2173 2138
2174 local->mad_priv->header.recv_wc.wc = &wc; 2139 local->mad_priv->header.recv_wc.wc = &wc;
2175 local->mad_priv->header.recv_wc.mad_len = 2140 local->mad_priv->header.recv_wc.mad_len =
@@ -2196,11 +2161,11 @@ local_send_completion:
2196 /* Complete send */ 2161 /* Complete send */
2197 mad_send_wc.status = IB_WC_SUCCESS; 2162 mad_send_wc.status = IB_WC_SUCCESS;
2198 mad_send_wc.vendor_err = 0; 2163 mad_send_wc.vendor_err = 0;
2199 mad_send_wc.wr_id = local->wr_id; 2164 mad_send_wc.send_buf = &local->mad_send_wr->send_buf;
2200 if (atomic_read(&mad_agent_priv->qp_info->snoop_count)) 2165 if (atomic_read(&mad_agent_priv->qp_info->snoop_count))
2201 snoop_send(mad_agent_priv->qp_info, &local->send_wr, 2166 snoop_send(mad_agent_priv->qp_info,
2202 &mad_send_wc, 2167 &local->mad_send_wr->send_buf,
2203 IB_MAD_SNOOP_SEND_COMPLETIONS); 2168 &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS);
2204 mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, 2169 mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2205 &mad_send_wc); 2170 &mad_send_wc);
2206 2171
@@ -2221,8 +2186,7 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
2221 if (!mad_send_wr->retries--) 2186 if (!mad_send_wr->retries--)
2222 return -ETIMEDOUT; 2187 return -ETIMEDOUT;
2223 2188
2224 mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_wr. 2189 mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
2225 wr.ud.timeout_ms);
2226 2190
2227 if (mad_send_wr->mad_agent_priv->agent.rmpp_version) { 2191 if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
2228 ret = ib_retry_rmpp(mad_send_wr); 2192 ret = ib_retry_rmpp(mad_send_wr);
@@ -2285,11 +2249,10 @@ static void timeout_sends(void *data)
2285 mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR; 2249 mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
2286 else 2250 else
2287 mad_send_wc.status = mad_send_wr->status; 2251 mad_send_wc.status = mad_send_wr->status;
2288 mad_send_wc.wr_id = mad_send_wr->wr_id; 2252 mad_send_wc.send_buf = &mad_send_wr->send_buf;
2289 mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, 2253 mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2290 &mad_send_wc); 2254 &mad_send_wc);
2291 2255
2292 kfree(mad_send_wr);
2293 atomic_dec(&mad_agent_priv->refcount); 2256 atomic_dec(&mad_agent_priv->refcount);
2294 spin_lock_irqsave(&mad_agent_priv->lock, flags); 2257 spin_lock_irqsave(&mad_agent_priv->lock, flags);
2295 } 2258 }
@@ -2683,40 +2646,47 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
2683 2646
2684static void ib_mad_init_device(struct ib_device *device) 2647static void ib_mad_init_device(struct ib_device *device)
2685{ 2648{
2686 int num_ports, cur_port, i; 2649 int start, end, i;
2687 2650
2688 if (device->node_type == IB_NODE_SWITCH) { 2651 if (device->node_type == IB_NODE_SWITCH) {
2689 num_ports = 1; 2652 start = 0;
2690 cur_port = 0; 2653 end = 0;
2691 } else { 2654 } else {
2692 num_ports = device->phys_port_cnt; 2655 start = 1;
2693 cur_port = 1; 2656 end = device->phys_port_cnt;
2694 } 2657 }
2695 for (i = 0; i < num_ports; i++, cur_port++) { 2658
2696 if (ib_mad_port_open(device, cur_port)) { 2659 for (i = start; i <= end; i++) {
2660 if (ib_mad_port_open(device, i)) {
2697 printk(KERN_ERR PFX "Couldn't open %s port %d\n", 2661 printk(KERN_ERR PFX "Couldn't open %s port %d\n",
2698 device->name, cur_port); 2662 device->name, i);
2699 goto error_device_open; 2663 goto error;
2700 } 2664 }
2701 if (ib_agent_port_open(device, cur_port)) { 2665 if (ib_agent_port_open(device, i)) {
2702 printk(KERN_ERR PFX "Couldn't open %s port %d " 2666 printk(KERN_ERR PFX "Couldn't open %s port %d "
2703 "for agents\n", 2667 "for agents\n",
2704 device->name, cur_port); 2668 device->name, i);
2705 goto error_device_open; 2669 goto error_agent;
2706 } 2670 }
2707 } 2671 }
2708 return; 2672 return;
2709 2673
2710error_device_open: 2674error_agent:
2711 while (i > 0) { 2675 if (ib_mad_port_close(device, i))
2712 cur_port--; 2676 printk(KERN_ERR PFX "Couldn't close %s port %d\n",
2713 if (ib_agent_port_close(device, cur_port)) 2677 device->name, i);
2678
2679error:
2680 i--;
2681
2682 while (i >= start) {
2683 if (ib_agent_port_close(device, i))
2714 printk(KERN_ERR PFX "Couldn't close %s port %d " 2684 printk(KERN_ERR PFX "Couldn't close %s port %d "
2715 "for agents\n", 2685 "for agents\n",
2716 device->name, cur_port); 2686 device->name, i);
2717 if (ib_mad_port_close(device, cur_port)) 2687 if (ib_mad_port_close(device, i))
2718 printk(KERN_ERR PFX "Couldn't close %s port %d\n", 2688 printk(KERN_ERR PFX "Couldn't close %s port %d\n",
2719 device->name, cur_port); 2689 device->name, i);
2720 i--; 2690 i--;
2721 } 2691 }
2722} 2692}
@@ -2754,7 +2724,6 @@ static int __init ib_mad_init_module(void)
2754 int ret; 2724 int ret;
2755 2725
2756 spin_lock_init(&ib_mad_port_list_lock); 2726 spin_lock_init(&ib_mad_port_list_lock);
2757 spin_lock_init(&ib_agent_port_list_lock);
2758 2727
2759 ib_mad_cache = kmem_cache_create("ib_mad", 2728 ib_mad_cache = kmem_cache_create("ib_mad",
2760 sizeof(struct ib_mad_private), 2729 sizeof(struct ib_mad_private),
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index f1ba794e0daa..570f78682af3 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -118,9 +118,10 @@ struct ib_mad_send_wr_private {
118 struct ib_mad_list_head mad_list; 118 struct ib_mad_list_head mad_list;
119 struct list_head agent_list; 119 struct list_head agent_list;
120 struct ib_mad_agent_private *mad_agent_priv; 120 struct ib_mad_agent_private *mad_agent_priv;
121 struct ib_mad_send_buf send_buf;
122 DECLARE_PCI_UNMAP_ADDR(mapping)
121 struct ib_send_wr send_wr; 123 struct ib_send_wr send_wr;
122 struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; 124 struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
123 u64 wr_id; /* client WR ID */
124 __be64 tid; 125 __be64 tid;
125 unsigned long timeout; 126 unsigned long timeout;
126 int retries; 127 int retries;
@@ -141,10 +142,7 @@ struct ib_mad_local_private {
141 struct list_head completion_list; 142 struct list_head completion_list;
142 struct ib_mad_private *mad_priv; 143 struct ib_mad_private *mad_priv;
143 struct ib_mad_agent_private *recv_mad_agent; 144 struct ib_mad_agent_private *recv_mad_agent;
144 struct ib_send_wr send_wr; 145 struct ib_mad_send_wr_private *mad_send_wr;
145 struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
146 u64 wr_id; /* client WR ID */
147 __be64 tid;
148}; 146};
149 147
150struct ib_mad_mgmt_method_table { 148struct ib_mad_mgmt_method_table {
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index e23836d0e21b..3249e1d8c07b 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -103,12 +103,12 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent)
103static int data_offset(u8 mgmt_class) 103static int data_offset(u8 mgmt_class)
104{ 104{
105 if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM) 105 if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
106 return offsetof(struct ib_sa_mad, data); 106 return IB_MGMT_SA_HDR;
107 else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && 107 else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
108 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) 108 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
109 return offsetof(struct ib_vendor_mad, data); 109 return IB_MGMT_VENDOR_HDR;
110 else 110 else
111 return offsetof(struct ib_rmpp_mad, data); 111 return IB_MGMT_RMPP_HDR;
112} 112}
113 113
114static void format_ack(struct ib_rmpp_mad *ack, 114static void format_ack(struct ib_rmpp_mad *ack,
@@ -135,55 +135,52 @@ static void ack_recv(struct mad_rmpp_recv *rmpp_recv,
135 struct ib_mad_recv_wc *recv_wc) 135 struct ib_mad_recv_wc *recv_wc)
136{ 136{
137 struct ib_mad_send_buf *msg; 137 struct ib_mad_send_buf *msg;
138 struct ib_send_wr *bad_send_wr; 138 int ret;
139 int hdr_len, ret;
140 139
141 hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr);
142 msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp, 140 msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp,
143 recv_wc->wc->pkey_index, rmpp_recv->ah, 1, 141 recv_wc->wc->pkey_index, 1, IB_MGMT_RMPP_HDR,
144 hdr_len, sizeof(struct ib_rmpp_mad) - hdr_len, 142 IB_MGMT_RMPP_DATA, GFP_KERNEL);
145 GFP_KERNEL);
146 if (!msg) 143 if (!msg)
147 return; 144 return;
148 145
149 format_ack((struct ib_rmpp_mad *) msg->mad, 146 format_ack(msg->mad, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad,
150 (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv); 147 rmpp_recv);
151 ret = ib_post_send_mad(&rmpp_recv->agent->agent, &msg->send_wr, 148 msg->ah = rmpp_recv->ah;
152 &bad_send_wr); 149 ret = ib_post_send_mad(msg, NULL);
153 if (ret) 150 if (ret)
154 ib_free_send_mad(msg); 151 ib_free_send_mad(msg);
155} 152}
156 153
157static int alloc_response_msg(struct ib_mad_agent *agent, 154static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
158 struct ib_mad_recv_wc *recv_wc, 155 struct ib_mad_recv_wc *recv_wc)
159 struct ib_mad_send_buf **msg)
160{ 156{
161 struct ib_mad_send_buf *m; 157 struct ib_mad_send_buf *msg;
162 struct ib_ah *ah; 158 struct ib_ah *ah;
163 int hdr_len;
164 159
165 ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc, 160 ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc,
166 recv_wc->recv_buf.grh, agent->port_num); 161 recv_wc->recv_buf.grh, agent->port_num);
167 if (IS_ERR(ah)) 162 if (IS_ERR(ah))
168 return PTR_ERR(ah); 163 return (void *) ah;
169 164
170 hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr); 165 msg = ib_create_send_mad(agent, recv_wc->wc->src_qp,
171 m = ib_create_send_mad(agent, recv_wc->wc->src_qp, 166 recv_wc->wc->pkey_index, 1,
172 recv_wc->wc->pkey_index, ah, 1, hdr_len, 167 IB_MGMT_RMPP_HDR, IB_MGMT_RMPP_DATA,
173 sizeof(struct ib_rmpp_mad) - hdr_len, 168 GFP_KERNEL);
174 GFP_KERNEL); 169 if (IS_ERR(msg))
175 if (IS_ERR(m)) {
176 ib_destroy_ah(ah); 170 ib_destroy_ah(ah);
177 return PTR_ERR(m); 171 else
178 } 172 msg->ah = ah;
179 *msg = m; 173
180 return 0; 174 return msg;
181} 175}
182 176
183static void free_msg(struct ib_mad_send_buf *msg) 177void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
184{ 178{
185 ib_destroy_ah(msg->send_wr.wr.ud.ah); 179 struct ib_rmpp_mad *rmpp_mad = mad_send_wc->send_buf->mad;
186 ib_free_send_mad(msg); 180
181 if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_ACK)
182 ib_destroy_ah(mad_send_wc->send_buf->ah);
183 ib_free_send_mad(mad_send_wc->send_buf);
187} 184}
188 185
189static void nack_recv(struct ib_mad_agent_private *agent, 186static void nack_recv(struct ib_mad_agent_private *agent,
@@ -191,14 +188,13 @@ static void nack_recv(struct ib_mad_agent_private *agent,
191{ 188{
192 struct ib_mad_send_buf *msg; 189 struct ib_mad_send_buf *msg;
193 struct ib_rmpp_mad *rmpp_mad; 190 struct ib_rmpp_mad *rmpp_mad;
194 struct ib_send_wr *bad_send_wr;
195 int ret; 191 int ret;
196 192
197 ret = alloc_response_msg(&agent->agent, recv_wc, &msg); 193 msg = alloc_response_msg(&agent->agent, recv_wc);
198 if (ret) 194 if (IS_ERR(msg))
199 return; 195 return;
200 196
201 rmpp_mad = (struct ib_rmpp_mad *) msg->mad; 197 rmpp_mad = msg->mad;
202 memcpy(rmpp_mad, recv_wc->recv_buf.mad, 198 memcpy(rmpp_mad, recv_wc->recv_buf.mad,
203 data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class)); 199 data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class));
204 200
@@ -210,9 +206,11 @@ static void nack_recv(struct ib_mad_agent_private *agent,
210 rmpp_mad->rmpp_hdr.seg_num = 0; 206 rmpp_mad->rmpp_hdr.seg_num = 0;
211 rmpp_mad->rmpp_hdr.paylen_newwin = 0; 207 rmpp_mad->rmpp_hdr.paylen_newwin = 0;
212 208
213 ret = ib_post_send_mad(&agent->agent, &msg->send_wr, &bad_send_wr); 209 ret = ib_post_send_mad(msg, NULL);
214 if (ret) 210 if (ret) {
215 free_msg(msg); 211 ib_destroy_ah(msg->ah);
212 ib_free_send_mad(msg);
213 }
216} 214}
217 215
218static void recv_timeout_handler(void *data) 216static void recv_timeout_handler(void *data)
@@ -585,7 +583,7 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
585 int timeout; 583 int timeout;
586 u32 paylen; 584 u32 paylen;
587 585
588 rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; 586 rmpp_mad = mad_send_wr->send_buf.mad;
589 ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); 587 ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
590 rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(mad_send_wr->seg_num); 588 rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(mad_send_wr->seg_num);
591 589
@@ -612,7 +610,7 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
612 } 610 }
613 611
614 /* 2 seconds for an ACK until we can find the packet lifetime */ 612 /* 2 seconds for an ACK until we can find the packet lifetime */
615 timeout = mad_send_wr->send_wr.wr.ud.timeout_ms; 613 timeout = mad_send_wr->send_buf.timeout_ms;
616 if (!timeout || timeout > 2000) 614 if (!timeout || timeout > 2000)
617 mad_send_wr->timeout = msecs_to_jiffies(2000); 615 mad_send_wr->timeout = msecs_to_jiffies(2000);
618 mad_send_wr->seg_num++; 616 mad_send_wr->seg_num++;
@@ -640,7 +638,7 @@ static void abort_send(struct ib_mad_agent_private *agent, __be64 tid,
640 638
641 wc.status = IB_WC_REM_ABORT_ERR; 639 wc.status = IB_WC_REM_ABORT_ERR;
642 wc.vendor_err = rmpp_status; 640 wc.vendor_err = rmpp_status;
643 wc.wr_id = mad_send_wr->wr_id; 641 wc.send_buf = &mad_send_wr->send_buf;
644 ib_mad_complete_send_wr(mad_send_wr, &wc); 642 ib_mad_complete_send_wr(mad_send_wr, &wc);
645 return; 643 return;
646out: 644out:
@@ -694,12 +692,12 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
694 692
695 if (seg_num > mad_send_wr->last_ack) { 693 if (seg_num > mad_send_wr->last_ack) {
696 mad_send_wr->last_ack = seg_num; 694 mad_send_wr->last_ack = seg_num;
697 mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries; 695 mad_send_wr->retries = mad_send_wr->send_buf.retries;
698 } 696 }
699 mad_send_wr->newwin = newwin; 697 mad_send_wr->newwin = newwin;
700 if (mad_send_wr->last_ack == mad_send_wr->total_seg) { 698 if (mad_send_wr->last_ack == mad_send_wr->total_seg) {
701 /* If no response is expected, the ACK completes the send */ 699 /* If no response is expected, the ACK completes the send */
702 if (!mad_send_wr->send_wr.wr.ud.timeout_ms) { 700 if (!mad_send_wr->send_buf.timeout_ms) {
703 struct ib_mad_send_wc wc; 701 struct ib_mad_send_wc wc;
704 702
705 ib_mark_mad_done(mad_send_wr); 703 ib_mark_mad_done(mad_send_wr);
@@ -707,13 +705,13 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
707 705
708 wc.status = IB_WC_SUCCESS; 706 wc.status = IB_WC_SUCCESS;
709 wc.vendor_err = 0; 707 wc.vendor_err = 0;
710 wc.wr_id = mad_send_wr->wr_id; 708 wc.send_buf = &mad_send_wr->send_buf;
711 ib_mad_complete_send_wr(mad_send_wr, &wc); 709 ib_mad_complete_send_wr(mad_send_wr, &wc);
712 return; 710 return;
713 } 711 }
714 if (mad_send_wr->refcount == 1) 712 if (mad_send_wr->refcount == 1)
715 ib_reset_mad_timeout(mad_send_wr, mad_send_wr-> 713 ib_reset_mad_timeout(mad_send_wr,
716 send_wr.wr.ud.timeout_ms); 714 mad_send_wr->send_buf.timeout_ms);
717 } else if (mad_send_wr->refcount == 1 && 715 } else if (mad_send_wr->refcount == 1 &&
718 mad_send_wr->seg_num < mad_send_wr->newwin && 716 mad_send_wr->seg_num < mad_send_wr->newwin &&
719 mad_send_wr->seg_num <= mad_send_wr->total_seg) { 717 mad_send_wr->seg_num <= mad_send_wr->total_seg) {
@@ -842,7 +840,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
842 struct ib_rmpp_mad *rmpp_mad; 840 struct ib_rmpp_mad *rmpp_mad;
843 int i, total_len, ret; 841 int i, total_len, ret;
844 842
845 rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; 843 rmpp_mad = mad_send_wr->send_buf.mad;
846 if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & 844 if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
847 IB_MGMT_RMPP_FLAG_ACTIVE)) 845 IB_MGMT_RMPP_FLAG_ACTIVE))
848 return IB_RMPP_RESULT_UNHANDLED; 846 return IB_RMPP_RESULT_UNHANDLED;
@@ -863,7 +861,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
863 861
864 mad_send_wr->total_seg = (total_len - mad_send_wr->data_offset) / 862 mad_send_wr->total_seg = (total_len - mad_send_wr->data_offset) /
865 (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset); 863 (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset);
866 mad_send_wr->pad = total_len - offsetof(struct ib_rmpp_mad, data) - 864 mad_send_wr->pad = total_len - IB_MGMT_RMPP_HDR -
867 be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); 865 be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
868 866
869 /* We need to wait for the final ACK even if there isn't a response */ 867 /* We need to wait for the final ACK even if there isn't a response */
@@ -878,23 +876,15 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
878 struct ib_mad_send_wc *mad_send_wc) 876 struct ib_mad_send_wc *mad_send_wc)
879{ 877{
880 struct ib_rmpp_mad *rmpp_mad; 878 struct ib_rmpp_mad *rmpp_mad;
881 struct ib_mad_send_buf *msg;
882 int ret; 879 int ret;
883 880
884 rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; 881 rmpp_mad = mad_send_wr->send_buf.mad;
885 if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & 882 if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
886 IB_MGMT_RMPP_FLAG_ACTIVE)) 883 IB_MGMT_RMPP_FLAG_ACTIVE))
887 return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ 884 return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
888 885
889 if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) { 886 if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA)
890 msg = (struct ib_mad_send_buf *) (unsigned long)
891 mad_send_wc->wr_id;
892 if (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_ACK)
893 ib_free_send_mad(msg);
894 else
895 free_msg(msg);
896 return IB_RMPP_RESULT_INTERNAL; /* ACK, STOP, or ABORT */ 887 return IB_RMPP_RESULT_INTERNAL; /* ACK, STOP, or ABORT */
897 }
898 888
899 if (mad_send_wc->status != IB_WC_SUCCESS || 889 if (mad_send_wc->status != IB_WC_SUCCESS ||
900 mad_send_wr->status != IB_WC_SUCCESS) 890 mad_send_wr->status != IB_WC_SUCCESS)
@@ -905,7 +895,7 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
905 895
906 if (mad_send_wr->last_ack == mad_send_wr->total_seg) { 896 if (mad_send_wr->last_ack == mad_send_wr->total_seg) {
907 mad_send_wr->timeout = 897 mad_send_wr->timeout =
908 msecs_to_jiffies(mad_send_wr->send_wr.wr.ud.timeout_ms); 898 msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
909 return IB_RMPP_RESULT_PROCESSED; /* Send done */ 899 return IB_RMPP_RESULT_PROCESSED; /* Send done */
910 } 900 }
911 901
@@ -926,7 +916,7 @@ int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr)
926 struct ib_rmpp_mad *rmpp_mad; 916 struct ib_rmpp_mad *rmpp_mad;
927 int ret; 917 int ret;
928 918
929 rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; 919 rmpp_mad = mad_send_wr->send_buf.mad;
930 if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & 920 if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
931 IB_MGMT_RMPP_FLAG_ACTIVE)) 921 IB_MGMT_RMPP_FLAG_ACTIVE))
932 return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ 922 return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
diff --git a/drivers/infiniband/core/mad_rmpp.h b/drivers/infiniband/core/mad_rmpp.h
index c4924dfb8e75..f0616fd22494 100644
--- a/drivers/infiniband/core/mad_rmpp.h
+++ b/drivers/infiniband/core/mad_rmpp.h
@@ -51,6 +51,8 @@ ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent,
51int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, 51int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
52 struct ib_mad_send_wc *mad_send_wc); 52 struct ib_mad_send_wc *mad_send_wc);
53 53
54void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc);
55
54void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent); 56void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent);
55 57
56int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr); 58int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 262618210c1c..89ce9dc210d4 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -73,11 +73,10 @@ struct ib_sa_device {
73struct ib_sa_query { 73struct ib_sa_query {
74 void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *); 74 void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
75 void (*release)(struct ib_sa_query *); 75 void (*release)(struct ib_sa_query *);
76 struct ib_sa_port *port; 76 struct ib_sa_port *port;
77 struct ib_sa_mad *mad; 77 struct ib_mad_send_buf *mad_buf;
78 struct ib_sa_sm_ah *sm_ah; 78 struct ib_sa_sm_ah *sm_ah;
79 DECLARE_PCI_UNMAP_ADDR(mapping) 79 int id;
80 int id;
81}; 80};
82 81
83struct ib_sa_service_query { 82struct ib_sa_service_query {
@@ -426,6 +425,7 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
426{ 425{
427 unsigned long flags; 426 unsigned long flags;
428 struct ib_mad_agent *agent; 427 struct ib_mad_agent *agent;
428 struct ib_mad_send_buf *mad_buf;
429 429
430 spin_lock_irqsave(&idr_lock, flags); 430 spin_lock_irqsave(&idr_lock, flags);
431 if (idr_find(&query_idr, id) != query) { 431 if (idr_find(&query_idr, id) != query) {
@@ -433,9 +433,10 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
433 return; 433 return;
434 } 434 }
435 agent = query->port->agent; 435 agent = query->port->agent;
436 mad_buf = query->mad_buf;
436 spin_unlock_irqrestore(&idr_lock, flags); 437 spin_unlock_irqrestore(&idr_lock, flags);
437 438
438 ib_cancel_mad(agent, id); 439 ib_cancel_mad(agent, mad_buf);
439} 440}
440EXPORT_SYMBOL(ib_sa_cancel_query); 441EXPORT_SYMBOL(ib_sa_cancel_query);
441 442
@@ -457,71 +458,46 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
457 458
458static int send_mad(struct ib_sa_query *query, int timeout_ms) 459static int send_mad(struct ib_sa_query *query, int timeout_ms)
459{ 460{
460 struct ib_sa_port *port = query->port;
461 unsigned long flags; 461 unsigned long flags;
462 int ret; 462 int ret, id;
463 struct ib_sge gather_list;
464 struct ib_send_wr *bad_wr, wr = {
465 .opcode = IB_WR_SEND,
466 .sg_list = &gather_list,
467 .num_sge = 1,
468 .send_flags = IB_SEND_SIGNALED,
469 .wr = {
470 .ud = {
471 .mad_hdr = &query->mad->mad_hdr,
472 .remote_qpn = 1,
473 .remote_qkey = IB_QP1_QKEY,
474 .timeout_ms = timeout_ms,
475 }
476 }
477 };
478 463
479retry: 464retry:
480 if (!idr_pre_get(&query_idr, GFP_ATOMIC)) 465 if (!idr_pre_get(&query_idr, GFP_ATOMIC))
481 return -ENOMEM; 466 return -ENOMEM;
482 spin_lock_irqsave(&idr_lock, flags); 467 spin_lock_irqsave(&idr_lock, flags);
483 ret = idr_get_new(&query_idr, query, &query->id); 468 ret = idr_get_new(&query_idr, query, &id);
484 spin_unlock_irqrestore(&idr_lock, flags); 469 spin_unlock_irqrestore(&idr_lock, flags);
485 if (ret == -EAGAIN) 470 if (ret == -EAGAIN)
486 goto retry; 471 goto retry;
487 if (ret) 472 if (ret)
488 return ret; 473 return ret;
489 474
490 wr.wr_id = query->id; 475 query->mad_buf->timeout_ms = timeout_ms;
476 query->mad_buf->context[0] = query;
477 query->id = id;
491 478
492 spin_lock_irqsave(&port->ah_lock, flags); 479 spin_lock_irqsave(&query->port->ah_lock, flags);
493 kref_get(&port->sm_ah->ref); 480 kref_get(&query->port->sm_ah->ref);
494 query->sm_ah = port->sm_ah; 481 query->sm_ah = query->port->sm_ah;
495 wr.wr.ud.ah = port->sm_ah->ah; 482 spin_unlock_irqrestore(&query->port->ah_lock, flags);
496 spin_unlock_irqrestore(&port->ah_lock, flags);
497 483
498 gather_list.addr = dma_map_single(port->agent->device->dma_device, 484 query->mad_buf->ah = query->sm_ah->ah;
499 query->mad,
500 sizeof (struct ib_sa_mad),
501 DMA_TO_DEVICE);
502 gather_list.length = sizeof (struct ib_sa_mad);
503 gather_list.lkey = port->agent->mr->lkey;
504 pci_unmap_addr_set(query, mapping, gather_list.addr);
505 485
506 ret = ib_post_send_mad(port->agent, &wr, &bad_wr); 486 ret = ib_post_send_mad(query->mad_buf, NULL);
507 if (ret) { 487 if (ret) {
508 dma_unmap_single(port->agent->device->dma_device,
509 pci_unmap_addr(query, mapping),
510 sizeof (struct ib_sa_mad),
511 DMA_TO_DEVICE);
512 kref_put(&query->sm_ah->ref, free_sm_ah);
513 spin_lock_irqsave(&idr_lock, flags); 488 spin_lock_irqsave(&idr_lock, flags);
514 idr_remove(&query_idr, query->id); 489 idr_remove(&query_idr, id);
515 spin_unlock_irqrestore(&idr_lock, flags); 490 spin_unlock_irqrestore(&idr_lock, flags);
491
492 kref_put(&query->sm_ah->ref, free_sm_ah);
516 } 493 }
517 494
518 /* 495 /*
519 * It's not safe to dereference query any more, because the 496 * It's not safe to dereference query any more, because the
520 * send may already have completed and freed the query in 497 * send may already have completed and freed the query in
521 * another context. So use wr.wr_id, which has a copy of the 498 * another context.
522 * query's id.
523 */ 499 */
524 return ret ? ret : wr.wr_id; 500 return ret ? ret : id;
525} 501}
526 502
527static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, 503static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
@@ -543,7 +519,6 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
543 519
544static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) 520static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
545{ 521{
546 kfree(sa_query->mad);
547 kfree(container_of(sa_query, struct ib_sa_path_query, sa_query)); 522 kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
548} 523}
549 524
@@ -583,43 +558,58 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
583{ 558{
584 struct ib_sa_path_query *query; 559 struct ib_sa_path_query *query;
585 struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); 560 struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
586 struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; 561 struct ib_sa_port *port;
587 struct ib_mad_agent *agent = port->agent; 562 struct ib_mad_agent *agent;
563 struct ib_sa_mad *mad;
588 int ret; 564 int ret;
589 565
566 if (!sa_dev)
567 return -ENODEV;
568
569 port = &sa_dev->port[port_num - sa_dev->start_port];
570 agent = port->agent;
571
590 query = kmalloc(sizeof *query, gfp_mask); 572 query = kmalloc(sizeof *query, gfp_mask);
591 if (!query) 573 if (!query)
592 return -ENOMEM; 574 return -ENOMEM;
593 query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask); 575
594 if (!query->sa_query.mad) { 576 query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
595 kfree(query); 577 0, IB_MGMT_SA_HDR,
596 return -ENOMEM; 578 IB_MGMT_SA_DATA, gfp_mask);
579 if (!query->sa_query.mad_buf) {
580 ret = -ENOMEM;
581 goto err1;
597 } 582 }
598 583
599 query->callback = callback; 584 query->callback = callback;
600 query->context = context; 585 query->context = context;
601 586
602 init_mad(query->sa_query.mad, agent); 587 mad = query->sa_query.mad_buf->mad;
588 init_mad(mad, agent);
603 589
604 query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL; 590 query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
605 query->sa_query.release = ib_sa_path_rec_release; 591 query->sa_query.release = ib_sa_path_rec_release;
606 query->sa_query.port = port; 592 query->sa_query.port = port;
607 query->sa_query.mad->mad_hdr.method = IB_MGMT_METHOD_GET; 593 mad->mad_hdr.method = IB_MGMT_METHOD_GET;
608 query->sa_query.mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); 594 mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC);
609 query->sa_query.mad->sa_hdr.comp_mask = comp_mask; 595 mad->sa_hdr.comp_mask = comp_mask;
610 596
611 ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), 597 ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data);
612 rec, query->sa_query.mad->data);
613 598
614 *sa_query = &query->sa_query; 599 *sa_query = &query->sa_query;
615 600
616 ret = send_mad(&query->sa_query, timeout_ms); 601 ret = send_mad(&query->sa_query, timeout_ms);
617 if (ret < 0) { 602 if (ret < 0)
618 *sa_query = NULL; 603 goto err2;
619 kfree(query->sa_query.mad); 604
620 kfree(query); 605 return ret;
621 }
622 606
607err2:
608 *sa_query = NULL;
609 ib_free_send_mad(query->sa_query.mad_buf);
610
611err1:
612 kfree(query);
623 return ret; 613 return ret;
624} 614}
625EXPORT_SYMBOL(ib_sa_path_rec_get); 615EXPORT_SYMBOL(ib_sa_path_rec_get);
@@ -643,7 +633,6 @@ static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
643 633
644static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) 634static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
645{ 635{
646 kfree(sa_query->mad);
647 kfree(container_of(sa_query, struct ib_sa_service_query, sa_query)); 636 kfree(container_of(sa_query, struct ib_sa_service_query, sa_query));
648} 637}
649 638
@@ -685,10 +674,17 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
685{ 674{
686 struct ib_sa_service_query *query; 675 struct ib_sa_service_query *query;
687 struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); 676 struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
688 struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; 677 struct ib_sa_port *port;
689 struct ib_mad_agent *agent = port->agent; 678 struct ib_mad_agent *agent;
679 struct ib_sa_mad *mad;
690 int ret; 680 int ret;
691 681
682 if (!sa_dev)
683 return -ENODEV;
684
685 port = &sa_dev->port[port_num - sa_dev->start_port];
686 agent = port->agent;
687
692 if (method != IB_MGMT_METHOD_GET && 688 if (method != IB_MGMT_METHOD_GET &&
693 method != IB_MGMT_METHOD_SET && 689 method != IB_MGMT_METHOD_SET &&
694 method != IB_SA_METHOD_DELETE) 690 method != IB_SA_METHOD_DELETE)
@@ -697,37 +693,45 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
697 query = kmalloc(sizeof *query, gfp_mask); 693 query = kmalloc(sizeof *query, gfp_mask);
698 if (!query) 694 if (!query)
699 return -ENOMEM; 695 return -ENOMEM;
700 query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask); 696
701 if (!query->sa_query.mad) { 697 query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
702 kfree(query); 698 0, IB_MGMT_SA_HDR,
703 return -ENOMEM; 699 IB_MGMT_SA_DATA, gfp_mask);
700 if (!query->sa_query.mad_buf) {
701 ret = -ENOMEM;
702 goto err1;
704 } 703 }
705 704
706 query->callback = callback; 705 query->callback = callback;
707 query->context = context; 706 query->context = context;
708 707
709 init_mad(query->sa_query.mad, agent); 708 mad = query->sa_query.mad_buf->mad;
709 init_mad(mad, agent);
710 710
711 query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL; 711 query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
712 query->sa_query.release = ib_sa_service_rec_release; 712 query->sa_query.release = ib_sa_service_rec_release;
713 query->sa_query.port = port; 713 query->sa_query.port = port;
714 query->sa_query.mad->mad_hdr.method = method; 714 mad->mad_hdr.method = method;
715 query->sa_query.mad->mad_hdr.attr_id = 715 mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
716 cpu_to_be16(IB_SA_ATTR_SERVICE_REC); 716 mad->sa_hdr.comp_mask = comp_mask;
717 query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
718 717
719 ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table), 718 ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table),
720 rec, query->sa_query.mad->data); 719 rec, mad->data);
721 720
722 *sa_query = &query->sa_query; 721 *sa_query = &query->sa_query;
723 722
724 ret = send_mad(&query->sa_query, timeout_ms); 723 ret = send_mad(&query->sa_query, timeout_ms);
725 if (ret < 0) { 724 if (ret < 0)
726 *sa_query = NULL; 725 goto err2;
727 kfree(query->sa_query.mad); 726
728 kfree(query); 727 return ret;
729 }
730 728
729err2:
730 *sa_query = NULL;
731 ib_free_send_mad(query->sa_query.mad_buf);
732
733err1:
734 kfree(query);
731 return ret; 735 return ret;
732} 736}
733EXPORT_SYMBOL(ib_sa_service_rec_query); 737EXPORT_SYMBOL(ib_sa_service_rec_query);
@@ -751,7 +755,6 @@ static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
751 755
752static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query) 756static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
753{ 757{
754 kfree(sa_query->mad);
755 kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query)); 758 kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
756} 759}
757 760
@@ -768,60 +771,69 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
768{ 771{
769 struct ib_sa_mcmember_query *query; 772 struct ib_sa_mcmember_query *query;
770 struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); 773 struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
771 struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; 774 struct ib_sa_port *port;
772 struct ib_mad_agent *agent = port->agent; 775 struct ib_mad_agent *agent;
776 struct ib_sa_mad *mad;
773 int ret; 777 int ret;
774 778
779 if (!sa_dev)
780 return -ENODEV;
781
782 port = &sa_dev->port[port_num - sa_dev->start_port];
783 agent = port->agent;
784
775 query = kmalloc(sizeof *query, gfp_mask); 785 query = kmalloc(sizeof *query, gfp_mask);
776 if (!query) 786 if (!query)
777 return -ENOMEM; 787 return -ENOMEM;
778 query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask); 788
779 if (!query->sa_query.mad) { 789 query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
780 kfree(query); 790 0, IB_MGMT_SA_HDR,
781 return -ENOMEM; 791 IB_MGMT_SA_DATA, gfp_mask);
792 if (!query->sa_query.mad_buf) {
793 ret = -ENOMEM;
794 goto err1;
782 } 795 }
783 796
784 query->callback = callback; 797 query->callback = callback;
785 query->context = context; 798 query->context = context;
786 799
787 init_mad(query->sa_query.mad, agent); 800 mad = query->sa_query.mad_buf->mad;
801 init_mad(mad, agent);
788 802
789 query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL; 803 query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
790 query->sa_query.release = ib_sa_mcmember_rec_release; 804 query->sa_query.release = ib_sa_mcmember_rec_release;
791 query->sa_query.port = port; 805 query->sa_query.port = port;
792 query->sa_query.mad->mad_hdr.method = method; 806 mad->mad_hdr.method = method;
793 query->sa_query.mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); 807 mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
794 query->sa_query.mad->sa_hdr.comp_mask = comp_mask; 808 mad->sa_hdr.comp_mask = comp_mask;
795 809
796 ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table), 810 ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
797 rec, query->sa_query.mad->data); 811 rec, mad->data);
798 812
799 *sa_query = &query->sa_query; 813 *sa_query = &query->sa_query;
800 814
801 ret = send_mad(&query->sa_query, timeout_ms); 815 ret = send_mad(&query->sa_query, timeout_ms);
802 if (ret < 0) { 816 if (ret < 0)
803 *sa_query = NULL; 817 goto err2;
804 kfree(query->sa_query.mad);
805 kfree(query);
806 }
807 818
808 return ret; 819 return ret;
820
821err2:
822 *sa_query = NULL;
823 ib_free_send_mad(query->sa_query.mad_buf);
824
825err1:
826 kfree(query);
827 return ret;
809} 828}
810EXPORT_SYMBOL(ib_sa_mcmember_rec_query); 829EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
811 830
812static void send_handler(struct ib_mad_agent *agent, 831static void send_handler(struct ib_mad_agent *agent,
813 struct ib_mad_send_wc *mad_send_wc) 832 struct ib_mad_send_wc *mad_send_wc)
814{ 833{
815 struct ib_sa_query *query; 834 struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
816 unsigned long flags; 835 unsigned long flags;
817 836
818 spin_lock_irqsave(&idr_lock, flags);
819 query = idr_find(&query_idr, mad_send_wc->wr_id);
820 spin_unlock_irqrestore(&idr_lock, flags);
821
822 if (!query)
823 return;
824
825 if (query->callback) 837 if (query->callback)
826 switch (mad_send_wc->status) { 838 switch (mad_send_wc->status) {
827 case IB_WC_SUCCESS: 839 case IB_WC_SUCCESS:
@@ -838,30 +850,25 @@ static void send_handler(struct ib_mad_agent *agent,
838 break; 850 break;
839 } 851 }
840 852
841 dma_unmap_single(agent->device->dma_device,
842 pci_unmap_addr(query, mapping),
843 sizeof (struct ib_sa_mad),
844 DMA_TO_DEVICE);
845 kref_put(&query->sm_ah->ref, free_sm_ah);
846
847 query->release(query);
848
849 spin_lock_irqsave(&idr_lock, flags); 853 spin_lock_irqsave(&idr_lock, flags);
850 idr_remove(&query_idr, mad_send_wc->wr_id); 854 idr_remove(&query_idr, query->id);
851 spin_unlock_irqrestore(&idr_lock, flags); 855 spin_unlock_irqrestore(&idr_lock, flags);
856
857 ib_free_send_mad(mad_send_wc->send_buf);
858 kref_put(&query->sm_ah->ref, free_sm_ah);
859 query->release(query);
852} 860}
853 861
854static void recv_handler(struct ib_mad_agent *mad_agent, 862static void recv_handler(struct ib_mad_agent *mad_agent,
855 struct ib_mad_recv_wc *mad_recv_wc) 863 struct ib_mad_recv_wc *mad_recv_wc)
856{ 864{
857 struct ib_sa_query *query; 865 struct ib_sa_query *query;
858 unsigned long flags; 866 struct ib_mad_send_buf *mad_buf;
859 867
860 spin_lock_irqsave(&idr_lock, flags); 868 mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id;
861 query = idr_find(&query_idr, mad_recv_wc->wc->wr_id); 869 query = mad_buf->context[0];
862 spin_unlock_irqrestore(&idr_lock, flags);
863 870
864 if (query && query->callback) { 871 if (query->callback) {
865 if (mad_recv_wc->wc->status == IB_WC_SUCCESS) 872 if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
866 query->callback(query, 873 query->callback(query,
867 mad_recv_wc->recv_buf.mad->mad_hdr.status ? 874 mad_recv_wc->recv_buf.mad->mad_hdr.status ?
@@ -975,6 +982,7 @@ static int __init ib_sa_init(void)
975static void __exit ib_sa_cleanup(void) 982static void __exit ib_sa_cleanup(void)
976{ 983{
977 ib_unregister_client(&sa_client); 984 ib_unregister_client(&sa_client);
985 idr_destroy(&query_idr);
978} 986}
979 987
980module_init(ib_sa_init); 988module_init(ib_sa_init);
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
index db25503a0736..2b3c40198f81 100644
--- a/drivers/infiniband/core/smi.h
+++ b/drivers/infiniband/core/smi.h
@@ -39,6 +39,8 @@
39#ifndef __SMI_H_ 39#ifndef __SMI_H_
40#define __SMI_H_ 40#define __SMI_H_
41 41
42#include <rdma/ib_smi.h>
43
42int smi_handle_dr_smp_recv(struct ib_smp *smp, 44int smi_handle_dr_smp_recv(struct ib_smp *smp,
43 u8 node_type, 45 u8 node_type,
44 int port_num, 46 int port_num,
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 211ba3223f65..7ce7a6c782fa 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -65,6 +65,11 @@ struct port_table_attribute {
65 int index; 65 int index;
66}; 66};
67 67
68static inline int ibdev_is_alive(const struct ib_device *dev)
69{
70 return dev->reg_state == IB_DEV_REGISTERED;
71}
72
68static ssize_t port_attr_show(struct kobject *kobj, 73static ssize_t port_attr_show(struct kobject *kobj,
69 struct attribute *attr, char *buf) 74 struct attribute *attr, char *buf)
70{ 75{
@@ -74,6 +79,8 @@ static ssize_t port_attr_show(struct kobject *kobj,
74 79
75 if (!port_attr->show) 80 if (!port_attr->show)
76 return -EIO; 81 return -EIO;
82 if (!ibdev_is_alive(p->ibdev))
83 return -ENODEV;
77 84
78 return port_attr->show(p, port_attr, buf); 85 return port_attr->show(p, port_attr, buf);
79} 86}
@@ -581,6 +588,9 @@ static ssize_t show_node_type(struct class_device *cdev, char *buf)
581{ 588{
582 struct ib_device *dev = container_of(cdev, struct ib_device, class_dev); 589 struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
583 590
591 if (!ibdev_is_alive(dev))
592 return -ENODEV;
593
584 switch (dev->node_type) { 594 switch (dev->node_type) {
585 case IB_NODE_CA: return sprintf(buf, "%d: CA\n", dev->node_type); 595 case IB_NODE_CA: return sprintf(buf, "%d: CA\n", dev->node_type);
586 case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); 596 case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
@@ -595,6 +605,9 @@ static ssize_t show_sys_image_guid(struct class_device *cdev, char *buf)
595 struct ib_device_attr attr; 605 struct ib_device_attr attr;
596 ssize_t ret; 606 ssize_t ret;
597 607
608 if (!ibdev_is_alive(dev))
609 return -ENODEV;
610
598 ret = ib_query_device(dev, &attr); 611 ret = ib_query_device(dev, &attr);
599 if (ret) 612 if (ret)
600 return ret; 613 return ret;
@@ -612,6 +625,9 @@ static ssize_t show_node_guid(struct class_device *cdev, char *buf)
612 struct ib_device_attr attr; 625 struct ib_device_attr attr;
613 ssize_t ret; 626 ssize_t ret;
614 627
628 if (!ibdev_is_alive(dev))
629 return -ENODEV;
630
615 ret = ib_query_device(dev, &attr); 631 ret = ib_query_device(dev, &attr);
616 if (ret) 632 if (ret)
617 return ret; 633 return ret;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 021b8f1d36d3..28477565ecba 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -41,37 +41,81 @@
41#include <linux/file.h> 41#include <linux/file.h>
42#include <linux/mount.h> 42#include <linux/mount.h>
43#include <linux/cdev.h> 43#include <linux/cdev.h>
44#include <linux/idr.h>
44 45
45#include <asm/uaccess.h> 46#include <asm/uaccess.h>
46 47
47#include "ucm.h" 48#include <rdma/ib_cm.h>
49#include <rdma/ib_user_cm.h>
48 50
49MODULE_AUTHOR("Libor Michalek"); 51MODULE_AUTHOR("Libor Michalek");
50MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access"); 52MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access");
51MODULE_LICENSE("Dual BSD/GPL"); 53MODULE_LICENSE("Dual BSD/GPL");
52 54
53static int ucm_debug_level; 55struct ib_ucm_device {
56 int devnum;
57 struct cdev dev;
58 struct class_device class_dev;
59 struct ib_device *ib_dev;
60};
61
62struct ib_ucm_file {
63 struct semaphore mutex;
64 struct file *filp;
65 struct ib_ucm_device *device;
66
67 struct list_head ctxs;
68 struct list_head events;
69 wait_queue_head_t poll_wait;
70};
71
72struct ib_ucm_context {
73 int id;
74 wait_queue_head_t wait;
75 atomic_t ref;
76 int events_reported;
77
78 struct ib_ucm_file *file;
79 struct ib_cm_id *cm_id;
80 __u64 uid;
81
82 struct list_head events; /* list of pending events. */
83 struct list_head file_list; /* member in file ctx list */
84};
85
86struct ib_ucm_event {
87 struct ib_ucm_context *ctx;
88 struct list_head file_list; /* member in file event list */
89 struct list_head ctx_list; /* member in ctx event list */
54 90
55module_param_named(debug_level, ucm_debug_level, int, 0644); 91 struct ib_cm_id *cm_id;
56MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); 92 struct ib_ucm_event_resp resp;
93 void *data;
94 void *info;
95 int data_len;
96 int info_len;
97};
57 98
58enum { 99enum {
59 IB_UCM_MAJOR = 231, 100 IB_UCM_MAJOR = 231,
60 IB_UCM_MINOR = 255 101 IB_UCM_BASE_MINOR = 224,
102 IB_UCM_MAX_DEVICES = 32
61}; 103};
62 104
63#define IB_UCM_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_MINOR) 105#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR)
64 106
65#define PFX "UCM: " 107static void ib_ucm_add_one(struct ib_device *device);
108static void ib_ucm_remove_one(struct ib_device *device);
66 109
67#define ucm_dbg(format, arg...) \ 110static struct ib_client ucm_client = {
68 do { \ 111 .name = "ucm",
69 if (ucm_debug_level > 0) \ 112 .add = ib_ucm_add_one,
70 printk(KERN_DEBUG PFX format, ## arg); \ 113 .remove = ib_ucm_remove_one
71 } while (0) 114};
72 115
73static struct semaphore ctx_id_mutex; 116static DECLARE_MUTEX(ctx_id_mutex);
74static struct idr ctx_id_table; 117static DEFINE_IDR(ctx_id_table);
118static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES);
75 119
76static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) 120static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id)
77{ 121{
@@ -152,17 +196,13 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)
152 goto error; 196 goto error;
153 197
154 list_add_tail(&ctx->file_list, &file->ctxs); 198 list_add_tail(&ctx->file_list, &file->ctxs);
155 ucm_dbg("Allocated CM ID <%d>\n", ctx->id);
156 return ctx; 199 return ctx;
157 200
158error: 201error:
159 kfree(ctx); 202 kfree(ctx);
160 return NULL; 203 return NULL;
161} 204}
162/* 205
163 * Event portion of the API, handle CM events
164 * and allow event polling.
165 */
166static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath, 206static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath,
167 struct ib_sa_path_rec *kpath) 207 struct ib_sa_path_rec *kpath)
168{ 208{
@@ -209,6 +249,7 @@ static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq,
209 ureq->retry_count = kreq->retry_count; 249 ureq->retry_count = kreq->retry_count;
210 ureq->rnr_retry_count = kreq->rnr_retry_count; 250 ureq->rnr_retry_count = kreq->rnr_retry_count;
211 ureq->srq = kreq->srq; 251 ureq->srq = kreq->srq;
252 ureq->port = kreq->port;
212 253
213 ib_ucm_event_path_get(&ureq->primary_path, kreq->primary_path); 254 ib_ucm_event_path_get(&ureq->primary_path, kreq->primary_path);
214 ib_ucm_event_path_get(&ureq->alternate_path, kreq->alternate_path); 255 ib_ucm_event_path_get(&ureq->alternate_path, kreq->alternate_path);
@@ -295,6 +336,8 @@ static int ib_ucm_event_process(struct ib_cm_event *evt,
295 case IB_CM_SIDR_REQ_RECEIVED: 336 case IB_CM_SIDR_REQ_RECEIVED:
296 uvt->resp.u.sidr_req_resp.pkey = 337 uvt->resp.u.sidr_req_resp.pkey =
297 evt->param.sidr_req_rcvd.pkey; 338 evt->param.sidr_req_rcvd.pkey;
339 uvt->resp.u.sidr_req_resp.port =
340 evt->param.sidr_req_rcvd.port;
298 uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; 341 uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
299 break; 342 break;
300 case IB_CM_SIDR_REP_RECEIVED: 343 case IB_CM_SIDR_REP_RECEIVED:
@@ -387,9 +430,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
387 430
388 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 431 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
389 return -EFAULT; 432 return -EFAULT;
390 /* 433
391 * wait
392 */
393 down(&file->mutex); 434 down(&file->mutex);
394 while (list_empty(&file->events)) { 435 while (list_empty(&file->events)) {
395 436
@@ -471,7 +512,6 @@ done:
471 return result; 512 return result;
472} 513}
473 514
474
475static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, 515static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
476 const char __user *inbuf, 516 const char __user *inbuf,
477 int in_len, int out_len) 517 int in_len, int out_len)
@@ -494,29 +534,27 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
494 return -ENOMEM; 534 return -ENOMEM;
495 535
496 ctx->uid = cmd.uid; 536 ctx->uid = cmd.uid;
497 ctx->cm_id = ib_create_cm_id(ib_ucm_event_handler, ctx); 537 ctx->cm_id = ib_create_cm_id(file->device->ib_dev,
538 ib_ucm_event_handler, ctx);
498 if (IS_ERR(ctx->cm_id)) { 539 if (IS_ERR(ctx->cm_id)) {
499 result = PTR_ERR(ctx->cm_id); 540 result = PTR_ERR(ctx->cm_id);
500 goto err; 541 goto err1;
501 } 542 }
502 543
503 resp.id = ctx->id; 544 resp.id = ctx->id;
504 if (copy_to_user((void __user *)(unsigned long)cmd.response, 545 if (copy_to_user((void __user *)(unsigned long)cmd.response,
505 &resp, sizeof(resp))) { 546 &resp, sizeof(resp))) {
506 result = -EFAULT; 547 result = -EFAULT;
507 goto err; 548 goto err2;
508 } 549 }
509
510 return 0; 550 return 0;
511 551
512err: 552err2:
553 ib_destroy_cm_id(ctx->cm_id);
554err1:
513 down(&ctx_id_mutex); 555 down(&ctx_id_mutex);
514 idr_remove(&ctx_id_table, ctx->id); 556 idr_remove(&ctx_id_table, ctx->id);
515 up(&ctx_id_mutex); 557 up(&ctx_id_mutex);
516
517 if (!IS_ERR(ctx->cm_id))
518 ib_destroy_cm_id(ctx->cm_id);
519
520 kfree(ctx); 558 kfree(ctx);
521 return result; 559 return result;
522} 560}
@@ -1184,9 +1222,6 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
1184 if (copy_from_user(&hdr, buf, sizeof(hdr))) 1222 if (copy_from_user(&hdr, buf, sizeof(hdr)))
1185 return -EFAULT; 1223 return -EFAULT;
1186 1224
1187 ucm_dbg("Write. cmd <%d> in <%d> out <%d> len <%Zu>\n",
1188 hdr.cmd, hdr.in, hdr.out, len);
1189
1190 if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) 1225 if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
1191 return -EINVAL; 1226 return -EINVAL;
1192 1227
@@ -1231,8 +1266,7 @@ static int ib_ucm_open(struct inode *inode, struct file *filp)
1231 1266
1232 filp->private_data = file; 1267 filp->private_data = file;
1233 file->filp = filp; 1268 file->filp = filp;
1234 1269 file->device = container_of(inode->i_cdev, struct ib_ucm_device, dev);
1235 ucm_dbg("Created struct\n");
1236 1270
1237 return 0; 1271 return 0;
1238} 1272}
@@ -1263,7 +1297,17 @@ static int ib_ucm_close(struct inode *inode, struct file *filp)
1263 return 0; 1297 return 0;
1264} 1298}
1265 1299
1266static struct file_operations ib_ucm_fops = { 1300static void ib_ucm_release_class_dev(struct class_device *class_dev)
1301{
1302 struct ib_ucm_device *dev;
1303
1304 dev = container_of(class_dev, struct ib_ucm_device, class_dev);
1305 cdev_del(&dev->dev);
1306 clear_bit(dev->devnum, dev_map);
1307 kfree(dev);
1308}
1309
1310static struct file_operations ucm_fops = {
1267 .owner = THIS_MODULE, 1311 .owner = THIS_MODULE,
1268 .open = ib_ucm_open, 1312 .open = ib_ucm_open,
1269 .release = ib_ucm_close, 1313 .release = ib_ucm_close,
@@ -1271,55 +1315,142 @@ static struct file_operations ib_ucm_fops = {
1271 .poll = ib_ucm_poll, 1315 .poll = ib_ucm_poll,
1272}; 1316};
1273 1317
1318static struct class ucm_class = {
1319 .name = "infiniband_cm",
1320 .release = ib_ucm_release_class_dev
1321};
1274 1322
1275static struct class *ib_ucm_class; 1323static ssize_t show_dev(struct class_device *class_dev, char *buf)
1276static struct cdev ib_ucm_cdev; 1324{
1325 struct ib_ucm_device *dev;
1326
1327 dev = container_of(class_dev, struct ib_ucm_device, class_dev);
1328 return print_dev_t(buf, dev->dev.dev);
1329}
1330static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL);
1277 1331
1278static int __init ib_ucm_init(void) 1332static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
1279{ 1333{
1280 int result; 1334 struct ib_ucm_device *dev;
1335
1336 dev = container_of(class_dev, struct ib_ucm_device, class_dev);
1337 return sprintf(buf, "%s\n", dev->ib_dev->name);
1338}
1339static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
1281 1340
1282 result = register_chrdev_region(IB_UCM_DEV, 1, "infiniband_cm"); 1341static void ib_ucm_add_one(struct ib_device *device)
1283 if (result) { 1342{
1284 ucm_dbg("Error <%d> registering dev\n", result); 1343 struct ib_ucm_device *ucm_dev;
1285 goto err_chr; 1344
1286 } 1345 if (!device->alloc_ucontext)
1346 return;
1347
1348 ucm_dev = kmalloc(sizeof *ucm_dev, GFP_KERNEL);
1349 if (!ucm_dev)
1350 return;
1287 1351
1288 cdev_init(&ib_ucm_cdev, &ib_ucm_fops); 1352 memset(ucm_dev, 0, sizeof *ucm_dev);
1353 ucm_dev->ib_dev = device;
1354
1355 ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
1356 if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES)
1357 goto err;
1358
1359 set_bit(ucm_dev->devnum, dev_map);
1360
1361 cdev_init(&ucm_dev->dev, &ucm_fops);
1362 ucm_dev->dev.owner = THIS_MODULE;
1363 kobject_set_name(&ucm_dev->dev.kobj, "ucm%d", ucm_dev->devnum);
1364 if (cdev_add(&ucm_dev->dev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1))
1365 goto err;
1289 1366
1290 result = cdev_add(&ib_ucm_cdev, IB_UCM_DEV, 1); 1367 ucm_dev->class_dev.class = &ucm_class;
1291 if (result) { 1368 ucm_dev->class_dev.dev = device->dma_device;
1292 ucm_dbg("Error <%d> adding cdev\n", result); 1369 snprintf(ucm_dev->class_dev.class_id, BUS_ID_SIZE, "ucm%d",
1370 ucm_dev->devnum);
1371 if (class_device_register(&ucm_dev->class_dev))
1293 goto err_cdev; 1372 goto err_cdev;
1294 }
1295 1373
1296 ib_ucm_class = class_create(THIS_MODULE, "infiniband_cm"); 1374 if (class_device_create_file(&ucm_dev->class_dev,
1297 if (IS_ERR(ib_ucm_class)) { 1375 &class_device_attr_dev))
1298 result = PTR_ERR(ib_ucm_class); 1376 goto err_class;
1299 ucm_dbg("Error <%d> creating class\n", result); 1377 if (class_device_create_file(&ucm_dev->class_dev,
1378 &class_device_attr_ibdev))
1300 goto err_class; 1379 goto err_class;
1380
1381 ib_set_client_data(device, &ucm_client, ucm_dev);
1382 return;
1383
1384err_class:
1385 class_device_unregister(&ucm_dev->class_dev);
1386err_cdev:
1387 cdev_del(&ucm_dev->dev);
1388 clear_bit(ucm_dev->devnum, dev_map);
1389err:
1390 kfree(ucm_dev);
1391 return;
1392}
1393
1394static void ib_ucm_remove_one(struct ib_device *device)
1395{
1396 struct ib_ucm_device *ucm_dev = ib_get_client_data(device, &ucm_client);
1397
1398 if (!ucm_dev)
1399 return;
1400
1401 class_device_unregister(&ucm_dev->class_dev);
1402}
1403
1404static ssize_t show_abi_version(struct class *class, char *buf)
1405{
1406 return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION);
1407}
1408static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
1409
1410static int __init ib_ucm_init(void)
1411{
1412 int ret;
1413
1414 ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES,
1415 "infiniband_cm");
1416 if (ret) {
1417 printk(KERN_ERR "ucm: couldn't register device number\n");
1418 goto err;
1301 } 1419 }
1302 1420
1303 class_device_create(ib_ucm_class, NULL, IB_UCM_DEV, NULL, "ucm"); 1421 ret = class_register(&ucm_class);
1422 if (ret) {
1423 printk(KERN_ERR "ucm: couldn't create class infiniband_cm\n");
1424 goto err_chrdev;
1425 }
1304 1426
1305 idr_init(&ctx_id_table); 1427 ret = class_create_file(&ucm_class, &class_attr_abi_version);
1306 init_MUTEX(&ctx_id_mutex); 1428 if (ret) {
1429 printk(KERN_ERR "ucm: couldn't create abi_version attribute\n");
1430 goto err_class;
1431 }
1307 1432
1433 ret = ib_register_client(&ucm_client);
1434 if (ret) {
1435 printk(KERN_ERR "ucm: couldn't register client\n");
1436 goto err_class;
1437 }
1308 return 0; 1438 return 0;
1439
1309err_class: 1440err_class:
1310 cdev_del(&ib_ucm_cdev); 1441 class_unregister(&ucm_class);
1311err_cdev: 1442err_chrdev:
1312 unregister_chrdev_region(IB_UCM_DEV, 1); 1443 unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
1313err_chr: 1444err:
1314 return result; 1445 return ret;
1315} 1446}
1316 1447
1317static void __exit ib_ucm_cleanup(void) 1448static void __exit ib_ucm_cleanup(void)
1318{ 1449{
1319 class_device_destroy(ib_ucm_class, IB_UCM_DEV); 1450 ib_unregister_client(&ucm_client);
1320 class_destroy(ib_ucm_class); 1451 class_unregister(&ucm_class);
1321 cdev_del(&ib_ucm_cdev); 1452 unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
1322 unregister_chrdev_region(IB_UCM_DEV, 1); 1453 idr_destroy(&ctx_id_table);
1323} 1454}
1324 1455
1325module_init(ib_ucm_init); 1456module_init(ib_ucm_init);
diff --git a/drivers/infiniband/core/ucm.h b/drivers/infiniband/core/ucm.h
deleted file mode 100644
index f46f37bc1201..000000000000
--- a/drivers/infiniband/core/ucm.h
+++ /dev/null
@@ -1,83 +0,0 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Intel Corporation. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 * $Id: ucm.h 2208 2005-04-22 23:24:31Z libor $
34 */
35
36#ifndef UCM_H
37#define UCM_H
38
39#include <linux/fs.h>
40#include <linux/device.h>
41#include <linux/cdev.h>
42#include <linux/idr.h>
43
44#include <rdma/ib_cm.h>
45#include <rdma/ib_user_cm.h>
46
47struct ib_ucm_file {
48 struct semaphore mutex;
49 struct file *filp;
50
51 struct list_head ctxs; /* list of active connections */
52 struct list_head events; /* list of pending events */
53 wait_queue_head_t poll_wait;
54};
55
56struct ib_ucm_context {
57 int id;
58 wait_queue_head_t wait;
59 atomic_t ref;
60 int events_reported;
61
62 struct ib_ucm_file *file;
63 struct ib_cm_id *cm_id;
64 __u64 uid;
65
66 struct list_head events; /* list of pending events. */
67 struct list_head file_list; /* member in file ctx list */
68};
69
70struct ib_ucm_event {
71 struct ib_ucm_context *ctx;
72 struct list_head file_list; /* member in file event list */
73 struct list_head ctx_list; /* member in ctx event list */
74
75 struct ib_cm_id *cm_id;
76 struct ib_ucm_event_resp resp;
77 void *data;
78 void *info;
79 int data_len;
80 int info_len;
81};
82
83#endif /* UCM_H */
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index a64d6b4dcc16..9e49816d7282 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -64,18 +64,39 @@ enum {
64 IB_UMAD_MINOR_BASE = 0 64 IB_UMAD_MINOR_BASE = 0
65}; 65};
66 66
67/*
68 * Our lifetime rules for these structs are the following: each time a
69 * device special file is opened, we look up the corresponding struct
70 * ib_umad_port by minor in the umad_port[] table while holding the
71 * port_lock. If this lookup succeeds, we take a reference on the
72 * ib_umad_port's struct ib_umad_device while still holding the
73 * port_lock; if the lookup fails, we fail the open(). We drop these
74 * references in the corresponding close().
75 *
76 * In addition to references coming from open character devices, there
77 * is one more reference to each ib_umad_device representing the
78 * module's reference taken when allocating the ib_umad_device in
79 * ib_umad_add_one().
80 *
81 * When destroying an ib_umad_device, we clear all of its
82 * ib_umad_ports from umad_port[] while holding port_lock before
83 * dropping the module's reference to the ib_umad_device. This is
84 * always safe because any open() calls will either succeed and obtain
85 * a reference before we clear the umad_port[] entries, or fail after
86 * we clear the umad_port[] entries.
87 */
88
67struct ib_umad_port { 89struct ib_umad_port {
68 int devnum; 90 struct cdev *dev;
69 struct cdev dev; 91 struct class_device *class_dev;
70 struct class_device class_dev;
71 92
72 int sm_devnum; 93 struct cdev *sm_dev;
73 struct cdev sm_dev; 94 struct class_device *sm_class_dev;
74 struct class_device sm_class_dev;
75 struct semaphore sm_sem; 95 struct semaphore sm_sem;
76 96
77 struct ib_device *ib_dev; 97 struct ib_device *ib_dev;
78 struct ib_umad_device *umad_dev; 98 struct ib_umad_device *umad_dev;
99 int dev_num;
79 u8 port_num; 100 u8 port_num;
80}; 101};
81 102
@@ -96,21 +117,31 @@ struct ib_umad_file {
96}; 117};
97 118
98struct ib_umad_packet { 119struct ib_umad_packet {
99 struct ib_ah *ah;
100 struct ib_mad_send_buf *msg; 120 struct ib_mad_send_buf *msg;
101 struct list_head list; 121 struct list_head list;
102 int length; 122 int length;
103 DECLARE_PCI_UNMAP_ADDR(mapping)
104 struct ib_user_mad mad; 123 struct ib_user_mad mad;
105}; 124};
106 125
126static struct class *umad_class;
127
107static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); 128static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
108static spinlock_t map_lock; 129
130static DEFINE_SPINLOCK(port_lock);
131static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS];
109static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS * 2); 132static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS * 2);
110 133
111static void ib_umad_add_one(struct ib_device *device); 134static void ib_umad_add_one(struct ib_device *device);
112static void ib_umad_remove_one(struct ib_device *device); 135static void ib_umad_remove_one(struct ib_device *device);
113 136
137static void ib_umad_release_dev(struct kref *ref)
138{
139 struct ib_umad_device *dev =
140 container_of(ref, struct ib_umad_device, ref);
141
142 kfree(dev);
143}
144
114static int queue_packet(struct ib_umad_file *file, 145static int queue_packet(struct ib_umad_file *file,
115 struct ib_mad_agent *agent, 146 struct ib_mad_agent *agent,
116 struct ib_umad_packet *packet) 147 struct ib_umad_packet *packet)
@@ -139,22 +170,19 @@ static void send_handler(struct ib_mad_agent *agent,
139 struct ib_mad_send_wc *send_wc) 170 struct ib_mad_send_wc *send_wc)
140{ 171{
141 struct ib_umad_file *file = agent->context; 172 struct ib_umad_file *file = agent->context;
142 struct ib_umad_packet *timeout, *packet = 173 struct ib_umad_packet *timeout;
143 (void *) (unsigned long) send_wc->wr_id; 174 struct ib_umad_packet *packet = send_wc->send_buf->context[0];
144 175
145 ib_destroy_ah(packet->msg->send_wr.wr.ud.ah); 176 ib_destroy_ah(packet->msg->ah);
146 ib_free_send_mad(packet->msg); 177 ib_free_send_mad(packet->msg);
147 178
148 if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { 179 if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
149 timeout = kmalloc(sizeof *timeout + sizeof (struct ib_mad_hdr), 180 timeout = kzalloc(sizeof *timeout + IB_MGMT_MAD_HDR, GFP_KERNEL);
150 GFP_KERNEL);
151 if (!timeout) 181 if (!timeout)
152 goto out; 182 goto out;
153 183
154 memset(timeout, 0, sizeof *timeout + sizeof (struct ib_mad_hdr)); 184 timeout->length = IB_MGMT_MAD_HDR;
155 185 timeout->mad.hdr.id = packet->mad.hdr.id;
156 timeout->length = sizeof (struct ib_mad_hdr);
157 timeout->mad.hdr.id = packet->mad.hdr.id;
158 timeout->mad.hdr.status = ETIMEDOUT; 186 timeout->mad.hdr.status = ETIMEDOUT;
159 memcpy(timeout->mad.data, packet->mad.data, 187 memcpy(timeout->mad.data, packet->mad.data,
160 sizeof (struct ib_mad_hdr)); 188 sizeof (struct ib_mad_hdr));
@@ -177,11 +205,10 @@ static void recv_handler(struct ib_mad_agent *agent,
177 goto out; 205 goto out;
178 206
179 length = mad_recv_wc->mad_len; 207 length = mad_recv_wc->mad_len;
180 packet = kmalloc(sizeof *packet + length, GFP_KERNEL); 208 packet = kzalloc(sizeof *packet + length, GFP_KERNEL);
181 if (!packet) 209 if (!packet)
182 goto out; 210 goto out;
183 211
184 memset(packet, 0, sizeof *packet + length);
185 packet->length = length; 212 packet->length = length;
186 213
187 ib_coalesce_recv_mad(mad_recv_wc, packet->mad.data); 214 ib_coalesce_recv_mad(mad_recv_wc, packet->mad.data);
@@ -247,7 +274,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
247 else 274 else
248 ret = -ENOSPC; 275 ret = -ENOSPC;
249 } else if (copy_to_user(buf, &packet->mad, 276 } else if (copy_to_user(buf, &packet->mad,
250 packet->length + sizeof (struct ib_user_mad))) 277 packet->length + sizeof (struct ib_user_mad)))
251 ret = -EFAULT; 278 ret = -EFAULT;
252 else 279 else
253 ret = packet->length + sizeof (struct ib_user_mad); 280 ret = packet->length + sizeof (struct ib_user_mad);
@@ -268,26 +295,23 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
268 struct ib_umad_packet *packet; 295 struct ib_umad_packet *packet;
269 struct ib_mad_agent *agent; 296 struct ib_mad_agent *agent;
270 struct ib_ah_attr ah_attr; 297 struct ib_ah_attr ah_attr;
271 struct ib_send_wr *bad_wr; 298 struct ib_ah *ah;
272 struct ib_rmpp_mad *rmpp_mad; 299 struct ib_rmpp_mad *rmpp_mad;
273 u8 method; 300 u8 method;
274 __be64 *tid; 301 __be64 *tid;
275 int ret, length, hdr_len, data_len, rmpp_hdr_size; 302 int ret, length, hdr_len, copy_offset;
276 int rmpp_active = 0; 303 int rmpp_active = 0;
277 304
278 if (count < sizeof (struct ib_user_mad)) 305 if (count < sizeof (struct ib_user_mad))
279 return -EINVAL; 306 return -EINVAL;
280 307
281 length = count - sizeof (struct ib_user_mad); 308 length = count - sizeof (struct ib_user_mad);
282 packet = kmalloc(sizeof *packet + sizeof(struct ib_mad_hdr) + 309 packet = kmalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
283 sizeof(struct ib_rmpp_hdr), GFP_KERNEL);
284 if (!packet) 310 if (!packet)
285 return -ENOMEM; 311 return -ENOMEM;
286 312
287 if (copy_from_user(&packet->mad, buf, 313 if (copy_from_user(&packet->mad, buf,
288 sizeof (struct ib_user_mad) + 314 sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)) {
289 sizeof(struct ib_mad_hdr) +
290 sizeof(struct ib_rmpp_hdr))) {
291 ret = -EFAULT; 315 ret = -EFAULT;
292 goto err; 316 goto err;
293 } 317 }
@@ -298,8 +322,6 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
298 goto err; 322 goto err;
299 } 323 }
300 324
301 packet->length = length;
302
303 down_read(&file->agent_mutex); 325 down_read(&file->agent_mutex);
304 326
305 agent = file->agent[packet->mad.hdr.id]; 327 agent = file->agent[packet->mad.hdr.id];
@@ -321,9 +343,9 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
321 ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; 343 ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class;
322 } 344 }
323 345
324 packet->ah = ib_create_ah(agent->qp->pd, &ah_attr); 346 ah = ib_create_ah(agent->qp->pd, &ah_attr);
325 if (IS_ERR(packet->ah)) { 347 if (IS_ERR(ah)) {
326 ret = PTR_ERR(packet->ah); 348 ret = PTR_ERR(ah);
327 goto err_up; 349 goto err_up;
328 } 350 }
329 351
@@ -337,64 +359,44 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
337 359
338 /* Validate that the management class can support RMPP */ 360 /* Validate that the management class can support RMPP */
339 if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) { 361 if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) {
340 hdr_len = offsetof(struct ib_sa_mad, data); 362 hdr_len = IB_MGMT_SA_HDR;
341 data_len = length - hdr_len;
342 } else if ((rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && 363 } else if ((rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
343 (rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) { 364 (rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) {
344 hdr_len = offsetof(struct ib_vendor_mad, data); 365 hdr_len = IB_MGMT_VENDOR_HDR;
345 data_len = length - hdr_len;
346 } else { 366 } else {
347 ret = -EINVAL; 367 ret = -EINVAL;
348 goto err_ah; 368 goto err_ah;
349 } 369 }
350 rmpp_active = 1; 370 rmpp_active = 1;
371 copy_offset = IB_MGMT_RMPP_HDR;
351 } else { 372 } else {
352 if (length > sizeof(struct ib_mad)) { 373 hdr_len = IB_MGMT_MAD_HDR;
353 ret = -EINVAL; 374 copy_offset = IB_MGMT_MAD_HDR;
354 goto err_ah;
355 }
356 hdr_len = offsetof(struct ib_mad, data);
357 data_len = length - hdr_len;
358 } 375 }
359 376
360 packet->msg = ib_create_send_mad(agent, 377 packet->msg = ib_create_send_mad(agent,
361 be32_to_cpu(packet->mad.hdr.qpn), 378 be32_to_cpu(packet->mad.hdr.qpn),
362 0, packet->ah, rmpp_active, 379 0, rmpp_active,
363 hdr_len, data_len, 380 hdr_len, length - hdr_len,
364 GFP_KERNEL); 381 GFP_KERNEL);
365 if (IS_ERR(packet->msg)) { 382 if (IS_ERR(packet->msg)) {
366 ret = PTR_ERR(packet->msg); 383 ret = PTR_ERR(packet->msg);
367 goto err_ah; 384 goto err_ah;
368 } 385 }
369 386
370 packet->msg->send_wr.wr.ud.timeout_ms = packet->mad.hdr.timeout_ms; 387 packet->msg->ah = ah;
371 packet->msg->send_wr.wr.ud.retries = packet->mad.hdr.retries; 388 packet->msg->timeout_ms = packet->mad.hdr.timeout_ms;
372 389 packet->msg->retries = packet->mad.hdr.retries;
373 /* Override send WR WRID initialized in ib_create_send_mad */ 390 packet->msg->context[0] = packet;
374 packet->msg->send_wr.wr_id = (unsigned long) packet;
375
376 if (!rmpp_active) {
377 /* Copy message from user into send buffer */
378 if (copy_from_user(packet->msg->mad,
379 buf + sizeof(struct ib_user_mad), length)) {
380 ret = -EFAULT;
381 goto err_msg;
382 }
383 } else {
384 rmpp_hdr_size = sizeof(struct ib_mad_hdr) +
385 sizeof(struct ib_rmpp_hdr);
386 391
387 /* Only copy MAD headers (RMPP header in place) */ 392 /* Copy MAD headers (RMPP header in place) */
388 memcpy(packet->msg->mad, packet->mad.data, 393 memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR);
389 sizeof(struct ib_mad_hdr)); 394 /* Now, copy rest of message from user into send buffer */
390 395 if (copy_from_user(packet->msg->mad + copy_offset,
391 /* Now, copy rest of message from user into send buffer */ 396 buf + sizeof (struct ib_user_mad) + copy_offset,
392 if (copy_from_user(((struct ib_rmpp_mad *) packet->msg->mad)->data, 397 length - copy_offset)) {
393 buf + sizeof (struct ib_user_mad) + rmpp_hdr_size, 398 ret = -EFAULT;
394 length - rmpp_hdr_size)) { 399 goto err_msg;
395 ret = -EFAULT;
396 goto err_msg;
397 }
398 } 400 }
399 401
400 /* 402 /*
@@ -403,29 +405,29 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
403 * transaction ID matches the agent being used to send the 405 * transaction ID matches the agent being used to send the
404 * MAD. 406 * MAD.
405 */ 407 */
406 method = packet->msg->mad->mad_hdr.method; 408 method = ((struct ib_mad_hdr *) packet->msg->mad)->method;
407 409
408 if (!(method & IB_MGMT_METHOD_RESP) && 410 if (!(method & IB_MGMT_METHOD_RESP) &&
409 method != IB_MGMT_METHOD_TRAP_REPRESS && 411 method != IB_MGMT_METHOD_TRAP_REPRESS &&
410 method != IB_MGMT_METHOD_SEND) { 412 method != IB_MGMT_METHOD_SEND) {
411 tid = &packet->msg->mad->mad_hdr.tid; 413 tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid;
412 *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | 414 *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 |
413 (be64_to_cpup(tid) & 0xffffffff)); 415 (be64_to_cpup(tid) & 0xffffffff));
414 } 416 }
415 417
416 ret = ib_post_send_mad(agent, &packet->msg->send_wr, &bad_wr); 418 ret = ib_post_send_mad(packet->msg, NULL);
417 if (ret) 419 if (ret)
418 goto err_msg; 420 goto err_msg;
419 421
420 up_read(&file->agent_mutex); 422 up_read(&file->agent_mutex);
421 423
422 return sizeof (struct ib_user_mad_hdr) + packet->length; 424 return count;
423 425
424err_msg: 426err_msg:
425 ib_free_send_mad(packet->msg); 427 ib_free_send_mad(packet->msg);
426 428
427err_ah: 429err_ah:
428 ib_destroy_ah(packet->ah); 430 ib_destroy_ah(ah);
429 431
430err_up: 432err_up:
431 up_read(&file->agent_mutex); 433 up_read(&file->agent_mutex);
@@ -565,15 +567,23 @@ static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
565 567
566static int ib_umad_open(struct inode *inode, struct file *filp) 568static int ib_umad_open(struct inode *inode, struct file *filp)
567{ 569{
568 struct ib_umad_port *port = 570 struct ib_umad_port *port;
569 container_of(inode->i_cdev, struct ib_umad_port, dev);
570 struct ib_umad_file *file; 571 struct ib_umad_file *file;
571 572
572 file = kmalloc(sizeof *file, GFP_KERNEL); 573 spin_lock(&port_lock);
573 if (!file) 574 port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE];
574 return -ENOMEM; 575 if (port)
576 kref_get(&port->umad_dev->ref);
577 spin_unlock(&port_lock);
575 578
576 memset(file, 0, sizeof *file); 579 if (!port)
580 return -ENXIO;
581
582 file = kzalloc(sizeof *file, GFP_KERNEL);
583 if (!file) {
584 kref_put(&port->umad_dev->ref, ib_umad_release_dev);
585 return -ENOMEM;
586 }
577 587
578 spin_lock_init(&file->recv_lock); 588 spin_lock_init(&file->recv_lock);
579 init_rwsem(&file->agent_mutex); 589 init_rwsem(&file->agent_mutex);
@@ -589,6 +599,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
589static int ib_umad_close(struct inode *inode, struct file *filp) 599static int ib_umad_close(struct inode *inode, struct file *filp)
590{ 600{
591 struct ib_umad_file *file = filp->private_data; 601 struct ib_umad_file *file = filp->private_data;
602 struct ib_umad_device *dev = file->port->umad_dev;
592 struct ib_umad_packet *packet, *tmp; 603 struct ib_umad_packet *packet, *tmp;
593 int i; 604 int i;
594 605
@@ -603,6 +614,8 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
603 614
604 kfree(file); 615 kfree(file);
605 616
617 kref_put(&dev->ref, ib_umad_release_dev);
618
606 return 0; 619 return 0;
607} 620}
608 621
@@ -619,30 +632,46 @@ static struct file_operations umad_fops = {
619 632
620static int ib_umad_sm_open(struct inode *inode, struct file *filp) 633static int ib_umad_sm_open(struct inode *inode, struct file *filp)
621{ 634{
622 struct ib_umad_port *port = 635 struct ib_umad_port *port;
623 container_of(inode->i_cdev, struct ib_umad_port, sm_dev);
624 struct ib_port_modify props = { 636 struct ib_port_modify props = {
625 .set_port_cap_mask = IB_PORT_SM 637 .set_port_cap_mask = IB_PORT_SM
626 }; 638 };
627 int ret; 639 int ret;
628 640
641 spin_lock(&port_lock);
642 port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE - IB_UMAD_MAX_PORTS];
643 if (port)
644 kref_get(&port->umad_dev->ref);
645 spin_unlock(&port_lock);
646
647 if (!port)
648 return -ENXIO;
649
629 if (filp->f_flags & O_NONBLOCK) { 650 if (filp->f_flags & O_NONBLOCK) {
630 if (down_trylock(&port->sm_sem)) 651 if (down_trylock(&port->sm_sem)) {
631 return -EAGAIN; 652 ret = -EAGAIN;
653 goto fail;
654 }
632 } else { 655 } else {
633 if (down_interruptible(&port->sm_sem)) 656 if (down_interruptible(&port->sm_sem)) {
634 return -ERESTARTSYS; 657 ret = -ERESTARTSYS;
658 goto fail;
659 }
635 } 660 }
636 661
637 ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); 662 ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
638 if (ret) { 663 if (ret) {
639 up(&port->sm_sem); 664 up(&port->sm_sem);
640 return ret; 665 goto fail;
641 } 666 }
642 667
643 filp->private_data = port; 668 filp->private_data = port;
644 669
645 return 0; 670 return 0;
671
672fail:
673 kref_put(&port->umad_dev->ref, ib_umad_release_dev);
674 return ret;
646} 675}
647 676
648static int ib_umad_sm_close(struct inode *inode, struct file *filp) 677static int ib_umad_sm_close(struct inode *inode, struct file *filp)
@@ -656,6 +685,8 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)
656 ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); 685 ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
657 up(&port->sm_sem); 686 up(&port->sm_sem);
658 687
688 kref_put(&port->umad_dev->ref, ib_umad_release_dev);
689
659 return ret; 690 return ret;
660} 691}
661 692
@@ -671,21 +702,13 @@ static struct ib_client umad_client = {
671 .remove = ib_umad_remove_one 702 .remove = ib_umad_remove_one
672}; 703};
673 704
674static ssize_t show_dev(struct class_device *class_dev, char *buf)
675{
676 struct ib_umad_port *port = class_get_devdata(class_dev);
677
678 if (class_dev == &port->class_dev)
679 return print_dev_t(buf, port->dev.dev);
680 else
681 return print_dev_t(buf, port->sm_dev.dev);
682}
683static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL);
684
685static ssize_t show_ibdev(struct class_device *class_dev, char *buf) 705static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
686{ 706{
687 struct ib_umad_port *port = class_get_devdata(class_dev); 707 struct ib_umad_port *port = class_get_devdata(class_dev);
688 708
709 if (!port)
710 return -ENODEV;
711
689 return sprintf(buf, "%s\n", port->ib_dev->name); 712 return sprintf(buf, "%s\n", port->ib_dev->name);
690} 713}
691static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); 714static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
@@ -694,38 +717,13 @@ static ssize_t show_port(struct class_device *class_dev, char *buf)
694{ 717{
695 struct ib_umad_port *port = class_get_devdata(class_dev); 718 struct ib_umad_port *port = class_get_devdata(class_dev);
696 719
720 if (!port)
721 return -ENODEV;
722
697 return sprintf(buf, "%d\n", port->port_num); 723 return sprintf(buf, "%d\n", port->port_num);
698} 724}
699static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL); 725static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
700 726
701static void ib_umad_release_dev(struct kref *ref)
702{
703 struct ib_umad_device *dev =
704 container_of(ref, struct ib_umad_device, ref);
705
706 kfree(dev);
707}
708
709static void ib_umad_release_port(struct class_device *class_dev)
710{
711 struct ib_umad_port *port = class_get_devdata(class_dev);
712
713 if (class_dev == &port->class_dev) {
714 cdev_del(&port->dev);
715 clear_bit(port->devnum, dev_map);
716 } else {
717 cdev_del(&port->sm_dev);
718 clear_bit(port->sm_devnum, dev_map);
719 }
720
721 kref_put(&port->umad_dev->ref, ib_umad_release_dev);
722}
723
724static struct class umad_class = {
725 .name = "infiniband_mad",
726 .release = ib_umad_release_port
727};
728
729static ssize_t show_abi_version(struct class *class, char *buf) 727static ssize_t show_abi_version(struct class *class, char *buf)
730{ 728{
731 return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION); 729 return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION);
@@ -735,91 +733,102 @@ static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
735static int ib_umad_init_port(struct ib_device *device, int port_num, 733static int ib_umad_init_port(struct ib_device *device, int port_num,
736 struct ib_umad_port *port) 734 struct ib_umad_port *port)
737{ 735{
738 spin_lock(&map_lock); 736 spin_lock(&port_lock);
739 port->devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); 737 port->dev_num = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
740 if (port->devnum >= IB_UMAD_MAX_PORTS) { 738 if (port->dev_num >= IB_UMAD_MAX_PORTS) {
741 spin_unlock(&map_lock); 739 spin_unlock(&port_lock);
742 return -1; 740 return -1;
743 } 741 }
744 port->sm_devnum = find_next_zero_bit(dev_map, IB_UMAD_MAX_PORTS * 2, IB_UMAD_MAX_PORTS); 742 set_bit(port->dev_num, dev_map);
745 if (port->sm_devnum >= IB_UMAD_MAX_PORTS * 2) { 743 spin_unlock(&port_lock);
746 spin_unlock(&map_lock);
747 return -1;
748 }
749 set_bit(port->devnum, dev_map);
750 set_bit(port->sm_devnum, dev_map);
751 spin_unlock(&map_lock);
752 744
753 port->ib_dev = device; 745 port->ib_dev = device;
754 port->port_num = port_num; 746 port->port_num = port_num;
755 init_MUTEX(&port->sm_sem); 747 init_MUTEX(&port->sm_sem);
756 748
757 cdev_init(&port->dev, &umad_fops); 749 port->dev = cdev_alloc();
758 port->dev.owner = THIS_MODULE; 750 if (!port->dev)
759 kobject_set_name(&port->dev.kobj, "umad%d", port->devnum);
760 if (cdev_add(&port->dev, base_dev + port->devnum, 1))
761 return -1; 751 return -1;
762 752 port->dev->owner = THIS_MODULE;
763 port->class_dev.class = &umad_class; 753 port->dev->ops = &umad_fops;
764 port->class_dev.dev = device->dma_device; 754 kobject_set_name(&port->dev->kobj, "umad%d", port->dev_num);
765 755 if (cdev_add(port->dev, base_dev + port->dev_num, 1))
766 snprintf(port->class_dev.class_id, BUS_ID_SIZE, "umad%d", port->devnum);
767
768 if (class_device_register(&port->class_dev))
769 goto err_cdev; 756 goto err_cdev;
770 757
771 class_set_devdata(&port->class_dev, port); 758 port->class_dev = class_device_create(umad_class, port->dev->dev,
772 kref_get(&port->umad_dev->ref); 759 device->dma_device,
760 "umad%d", port->dev_num);
761 if (IS_ERR(port->class_dev))
762 goto err_cdev;
773 763
774 if (class_device_create_file(&port->class_dev, &class_device_attr_dev)) 764 if (class_device_create_file(port->class_dev, &class_device_attr_ibdev))
775 goto err_class;
776 if (class_device_create_file(&port->class_dev, &class_device_attr_ibdev))
777 goto err_class; 765 goto err_class;
778 if (class_device_create_file(&port->class_dev, &class_device_attr_port)) 766 if (class_device_create_file(port->class_dev, &class_device_attr_port))
779 goto err_class; 767 goto err_class;
780 768
781 cdev_init(&port->sm_dev, &umad_sm_fops); 769 port->sm_dev = cdev_alloc();
782 port->sm_dev.owner = THIS_MODULE; 770 if (!port->sm_dev)
783 kobject_set_name(&port->dev.kobj, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS); 771 goto err_class;
784 if (cdev_add(&port->sm_dev, base_dev + port->sm_devnum, 1)) 772 port->sm_dev->owner = THIS_MODULE;
785 return -1; 773 port->sm_dev->ops = &umad_sm_fops;
786 774 kobject_set_name(&port->dev->kobj, "issm%d", port->dev_num);
787 port->sm_class_dev.class = &umad_class; 775 if (cdev_add(port->sm_dev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1))
788 port->sm_class_dev.dev = device->dma_device; 776 goto err_sm_cdev;
789
790 snprintf(port->sm_class_dev.class_id, BUS_ID_SIZE, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS);
791 777
792 if (class_device_register(&port->sm_class_dev)) 778 port->sm_class_dev = class_device_create(umad_class, port->sm_dev->dev,
779 device->dma_device,
780 "issm%d", port->dev_num);
781 if (IS_ERR(port->sm_class_dev))
793 goto err_sm_cdev; 782 goto err_sm_cdev;
794 783
795 class_set_devdata(&port->sm_class_dev, port); 784 class_set_devdata(port->class_dev, port);
796 kref_get(&port->umad_dev->ref); 785 class_set_devdata(port->sm_class_dev, port);
797 786
798 if (class_device_create_file(&port->sm_class_dev, &class_device_attr_dev)) 787 if (class_device_create_file(port->sm_class_dev, &class_device_attr_ibdev))
799 goto err_sm_class;
800 if (class_device_create_file(&port->sm_class_dev, &class_device_attr_ibdev))
801 goto err_sm_class; 788 goto err_sm_class;
802 if (class_device_create_file(&port->sm_class_dev, &class_device_attr_port)) 789 if (class_device_create_file(port->sm_class_dev, &class_device_attr_port))
803 goto err_sm_class; 790 goto err_sm_class;
804 791
792 spin_lock(&port_lock);
793 umad_port[port->dev_num] = port;
794 spin_unlock(&port_lock);
795
805 return 0; 796 return 0;
806 797
807err_sm_class: 798err_sm_class:
808 class_device_unregister(&port->sm_class_dev); 799 class_device_destroy(umad_class, port->sm_dev->dev);
809 800
810err_sm_cdev: 801err_sm_cdev:
811 cdev_del(&port->sm_dev); 802 cdev_del(port->sm_dev);
812 803
813err_class: 804err_class:
814 class_device_unregister(&port->class_dev); 805 class_device_destroy(umad_class, port->dev->dev);
815 806
816err_cdev: 807err_cdev:
817 cdev_del(&port->dev); 808 cdev_del(port->dev);
818 clear_bit(port->devnum, dev_map); 809 clear_bit(port->dev_num, dev_map);
819 810
820 return -1; 811 return -1;
821} 812}
822 813
814static void ib_umad_kill_port(struct ib_umad_port *port)
815{
816 class_set_devdata(port->class_dev, NULL);
817 class_set_devdata(port->sm_class_dev, NULL);
818
819 class_device_destroy(umad_class, port->dev->dev);
820 class_device_destroy(umad_class, port->sm_dev->dev);
821
822 cdev_del(port->dev);
823 cdev_del(port->sm_dev);
824
825 spin_lock(&port_lock);
826 umad_port[port->dev_num] = NULL;
827 spin_unlock(&port_lock);
828
829 clear_bit(port->dev_num, dev_map);
830}
831
823static void ib_umad_add_one(struct ib_device *device) 832static void ib_umad_add_one(struct ib_device *device)
824{ 833{
825 struct ib_umad_device *umad_dev; 834 struct ib_umad_device *umad_dev;
@@ -832,15 +841,12 @@ static void ib_umad_add_one(struct ib_device *device)
832 e = device->phys_port_cnt; 841 e = device->phys_port_cnt;
833 } 842 }
834 843
835 umad_dev = kmalloc(sizeof *umad_dev + 844 umad_dev = kzalloc(sizeof *umad_dev +
836 (e - s + 1) * sizeof (struct ib_umad_port), 845 (e - s + 1) * sizeof (struct ib_umad_port),
837 GFP_KERNEL); 846 GFP_KERNEL);
838 if (!umad_dev) 847 if (!umad_dev)
839 return; 848 return;
840 849
841 memset(umad_dev, 0, sizeof *umad_dev +
842 (e - s + 1) * sizeof (struct ib_umad_port));
843
844 kref_init(&umad_dev->ref); 850 kref_init(&umad_dev->ref);
845 851
846 umad_dev->start_port = s; 852 umad_dev->start_port = s;
@@ -858,10 +864,8 @@ static void ib_umad_add_one(struct ib_device *device)
858 return; 864 return;
859 865
860err: 866err:
861 while (--i >= s) { 867 while (--i >= s)
862 class_device_unregister(&umad_dev->port[i - s].class_dev); 868 ib_umad_kill_port(&umad_dev->port[i]);
863 class_device_unregister(&umad_dev->port[i - s].sm_class_dev);
864 }
865 869
866 kref_put(&umad_dev->ref, ib_umad_release_dev); 870 kref_put(&umad_dev->ref, ib_umad_release_dev);
867} 871}
@@ -874,10 +878,8 @@ static void ib_umad_remove_one(struct ib_device *device)
874 if (!umad_dev) 878 if (!umad_dev)
875 return; 879 return;
876 880
877 for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) { 881 for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i)
878 class_device_unregister(&umad_dev->port[i].class_dev); 882 ib_umad_kill_port(&umad_dev->port[i]);
879 class_device_unregister(&umad_dev->port[i].sm_class_dev);
880 }
881 883
882 kref_put(&umad_dev->ref, ib_umad_release_dev); 884 kref_put(&umad_dev->ref, ib_umad_release_dev);
883} 885}
@@ -886,8 +888,6 @@ static int __init ib_umad_init(void)
886{ 888{
887 int ret; 889 int ret;
888 890
889 spin_lock_init(&map_lock);
890
891 ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2, 891 ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2,
892 "infiniband_mad"); 892 "infiniband_mad");
893 if (ret) { 893 if (ret) {
@@ -895,13 +895,14 @@ static int __init ib_umad_init(void)
895 goto out; 895 goto out;
896 } 896 }
897 897
898 ret = class_register(&umad_class); 898 umad_class = class_create(THIS_MODULE, "infiniband_mad");
899 if (ret) { 899 if (IS_ERR(umad_class)) {
900 ret = PTR_ERR(umad_class);
900 printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n"); 901 printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n");
901 goto out_chrdev; 902 goto out_chrdev;
902 } 903 }
903 904
904 ret = class_create_file(&umad_class, &class_attr_abi_version); 905 ret = class_create_file(umad_class, &class_attr_abi_version);
905 if (ret) { 906 if (ret) {
906 printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n"); 907 printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n");
907 goto out_class; 908 goto out_class;
@@ -916,7 +917,7 @@ static int __init ib_umad_init(void)
916 return 0; 917 return 0;
917 918
918out_class: 919out_class:
919 class_unregister(&umad_class); 920 class_destroy(umad_class);
920 921
921out_chrdev: 922out_chrdev:
922 unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); 923 unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
@@ -928,7 +929,7 @@ out:
928static void __exit ib_umad_cleanup(void) 929static void __exit ib_umad_cleanup(void)
929{ 930{
930 ib_unregister_client(&umad_client); 931 ib_unregister_client(&umad_client);
931 class_unregister(&umad_class); 932 class_destroy(umad_class);
932 unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); 933 unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
933} 934}
934 935
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index cc124344dd2c..031cdf3c066d 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -3,6 +3,7 @@
3 * Copyright (c) 2005 Cisco Systems. All rights reserved. 3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
6 * Copyright (c) 2005 PathScale, Inc. All rights reserved.
6 * 7 *
7 * This software is available to you under a choice of one of two 8 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU 9 * licenses. You may choose to be licensed under the terms of the GNU
@@ -38,29 +39,47 @@
38#ifndef UVERBS_H 39#ifndef UVERBS_H
39#define UVERBS_H 40#define UVERBS_H
40 41
41/* Include device.h and fs.h until cdev.h is self-sufficient */
42#include <linux/fs.h>
43#include <linux/device.h>
44#include <linux/cdev.h>
45#include <linux/kref.h> 42#include <linux/kref.h>
46#include <linux/idr.h> 43#include <linux/idr.h>
47 44
48#include <rdma/ib_verbs.h> 45#include <rdma/ib_verbs.h>
49#include <rdma/ib_user_verbs.h> 46#include <rdma/ib_user_verbs.h>
50 47
48/*
49 * Our lifetime rules for these structs are the following:
50 *
51 * struct ib_uverbs_device: One reference is held by the module and
52 * released in ib_uverbs_remove_one(). Another reference is taken by
53 * ib_uverbs_open() each time the character special file is opened,
54 * and released in ib_uverbs_release_file() when the file is released.
55 *
56 * struct ib_uverbs_file: One reference is held by the VFS and
57 * released when the file is closed. Another reference is taken when
58 * an asynchronous event queue file is created and released when the
59 * event file is closed.
60 *
61 * struct ib_uverbs_event_file: One reference is held by the VFS and
62 * released when the file is closed. For asynchronous event files,
63 * another reference is held by the corresponding main context file
64 * and released when that file is closed. For completion event files,
65 * a reference is taken when a CQ is created that uses the file, and
66 * released when the CQ is destroyed.
67 */
68
51struct ib_uverbs_device { 69struct ib_uverbs_device {
70 struct kref ref;
52 int devnum; 71 int devnum;
53 struct cdev dev; 72 struct cdev *dev;
54 struct class_device class_dev; 73 struct class_device *class_dev;
55 struct ib_device *ib_dev; 74 struct ib_device *ib_dev;
56 int num_comp; 75 int num_comp_vectors;
57}; 76};
58 77
59struct ib_uverbs_event_file { 78struct ib_uverbs_event_file {
60 struct kref ref; 79 struct kref ref;
80 struct file *file;
61 struct ib_uverbs_file *uverbs_file; 81 struct ib_uverbs_file *uverbs_file;
62 spinlock_t lock; 82 spinlock_t lock;
63 int fd;
64 int is_async; 83 int is_async;
65 wait_queue_head_t poll_wait; 84 wait_queue_head_t poll_wait;
66 struct fasync_struct *async_queue; 85 struct fasync_struct *async_queue;
@@ -73,8 +92,7 @@ struct ib_uverbs_file {
73 struct ib_uverbs_device *device; 92 struct ib_uverbs_device *device;
74 struct ib_ucontext *ucontext; 93 struct ib_ucontext *ucontext;
75 struct ib_event_handler event_handler; 94 struct ib_event_handler event_handler;
76 struct ib_uverbs_event_file async_file; 95 struct ib_uverbs_event_file *async_file;
77 struct ib_uverbs_event_file comp_file[1];
78}; 96};
79 97
80struct ib_uverbs_event { 98struct ib_uverbs_event {
@@ -110,10 +128,23 @@ extern struct idr ib_uverbs_cq_idr;
110extern struct idr ib_uverbs_qp_idr; 128extern struct idr ib_uverbs_qp_idr;
111extern struct idr ib_uverbs_srq_idr; 129extern struct idr ib_uverbs_srq_idr;
112 130
131struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
132 int is_async, int *fd);
133void ib_uverbs_release_event_file(struct kref *ref);
134struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
135
136void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
137 struct ib_uverbs_event_file *ev_file,
138 struct ib_ucq_object *uobj);
139void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
140 struct ib_uevent_object *uobj);
141
113void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); 142void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
114void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); 143void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
115void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); 144void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
116void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); 145void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
146void ib_uverbs_event_handler(struct ib_event_handler *handler,
147 struct ib_event *event);
117 148
118int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, 149int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
119 void *addr, size_t size, int write); 150 void *addr, size_t size, int write);
@@ -125,21 +156,26 @@ void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem);
125 const char __user *buf, int in_len, \ 156 const char __user *buf, int in_len, \
126 int out_len) 157 int out_len)
127 158
128IB_UVERBS_DECLARE_CMD(query_params);
129IB_UVERBS_DECLARE_CMD(get_context); 159IB_UVERBS_DECLARE_CMD(get_context);
130IB_UVERBS_DECLARE_CMD(query_device); 160IB_UVERBS_DECLARE_CMD(query_device);
131IB_UVERBS_DECLARE_CMD(query_port); 161IB_UVERBS_DECLARE_CMD(query_port);
132IB_UVERBS_DECLARE_CMD(query_gid);
133IB_UVERBS_DECLARE_CMD(query_pkey);
134IB_UVERBS_DECLARE_CMD(alloc_pd); 162IB_UVERBS_DECLARE_CMD(alloc_pd);
135IB_UVERBS_DECLARE_CMD(dealloc_pd); 163IB_UVERBS_DECLARE_CMD(dealloc_pd);
136IB_UVERBS_DECLARE_CMD(reg_mr); 164IB_UVERBS_DECLARE_CMD(reg_mr);
137IB_UVERBS_DECLARE_CMD(dereg_mr); 165IB_UVERBS_DECLARE_CMD(dereg_mr);
166IB_UVERBS_DECLARE_CMD(create_comp_channel);
138IB_UVERBS_DECLARE_CMD(create_cq); 167IB_UVERBS_DECLARE_CMD(create_cq);
168IB_UVERBS_DECLARE_CMD(poll_cq);
169IB_UVERBS_DECLARE_CMD(req_notify_cq);
139IB_UVERBS_DECLARE_CMD(destroy_cq); 170IB_UVERBS_DECLARE_CMD(destroy_cq);
140IB_UVERBS_DECLARE_CMD(create_qp); 171IB_UVERBS_DECLARE_CMD(create_qp);
141IB_UVERBS_DECLARE_CMD(modify_qp); 172IB_UVERBS_DECLARE_CMD(modify_qp);
142IB_UVERBS_DECLARE_CMD(destroy_qp); 173IB_UVERBS_DECLARE_CMD(destroy_qp);
174IB_UVERBS_DECLARE_CMD(post_send);
175IB_UVERBS_DECLARE_CMD(post_recv);
176IB_UVERBS_DECLARE_CMD(post_srq_recv);
177IB_UVERBS_DECLARE_CMD(create_ah);
178IB_UVERBS_DECLARE_CMD(destroy_ah);
143IB_UVERBS_DECLARE_CMD(attach_mcast); 179IB_UVERBS_DECLARE_CMD(attach_mcast);
144IB_UVERBS_DECLARE_CMD(detach_mcast); 180IB_UVERBS_DECLARE_CMD(detach_mcast);
145IB_UVERBS_DECLARE_CMD(create_srq); 181IB_UVERBS_DECLARE_CMD(create_srq);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 562445165d2b..8c89abc8c764 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved. 3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * Copyright (c) 2005 PathScale, Inc. All rights reserved.
4 * 5 *
5 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 7 * licenses. You may choose to be licensed under the terms of the GNU
@@ -33,6 +34,9 @@
33 * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $ 34 * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $
34 */ 35 */
35 36
37#include <linux/file.h>
38#include <linux/fs.h>
39
36#include <asm/uaccess.h> 40#include <asm/uaccess.h>
37 41
38#include "uverbs.h" 42#include "uverbs.h"
@@ -45,29 +49,6 @@
45 (udata)->outlen = (olen); \ 49 (udata)->outlen = (olen); \
46 } while (0) 50 } while (0)
47 51
48ssize_t ib_uverbs_query_params(struct ib_uverbs_file *file,
49 const char __user *buf,
50 int in_len, int out_len)
51{
52 struct ib_uverbs_query_params cmd;
53 struct ib_uverbs_query_params_resp resp;
54
55 if (out_len < sizeof resp)
56 return -ENOSPC;
57
58 if (copy_from_user(&cmd, buf, sizeof cmd))
59 return -EFAULT;
60
61 memset(&resp, 0, sizeof resp);
62
63 resp.num_cq_events = file->device->num_comp;
64
65 if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp))
66 return -EFAULT;
67
68 return in_len;
69}
70
71ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, 52ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
72 const char __user *buf, 53 const char __user *buf,
73 int in_len, int out_len) 54 int in_len, int out_len)
@@ -77,7 +58,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
77 struct ib_udata udata; 58 struct ib_udata udata;
78 struct ib_device *ibdev = file->device->ib_dev; 59 struct ib_device *ibdev = file->device->ib_dev;
79 struct ib_ucontext *ucontext; 60 struct ib_ucontext *ucontext;
80 int i; 61 struct file *filp;
81 int ret; 62 int ret;
82 63
83 if (out_len < sizeof resp) 64 if (out_len < sizeof resp)
@@ -110,26 +91,42 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
110 INIT_LIST_HEAD(&ucontext->srq_list); 91 INIT_LIST_HEAD(&ucontext->srq_list);
111 INIT_LIST_HEAD(&ucontext->ah_list); 92 INIT_LIST_HEAD(&ucontext->ah_list);
112 93
113 resp.async_fd = file->async_file.fd; 94 resp.num_comp_vectors = file->device->num_comp_vectors;
114 for (i = 0; i < file->device->num_comp; ++i) 95
115 if (copy_to_user((void __user *) (unsigned long) cmd.cq_fd_tab + 96 filp = ib_uverbs_alloc_event_file(file, 1, &resp.async_fd);
116 i * sizeof (__u32), 97 if (IS_ERR(filp)) {
117 &file->comp_file[i].fd, sizeof (__u32))) { 98 ret = PTR_ERR(filp);
118 ret = -EFAULT; 99 goto err_free;
119 goto err_free; 100 }
120 }
121 101
122 if (copy_to_user((void __user *) (unsigned long) cmd.response, 102 if (copy_to_user((void __user *) (unsigned long) cmd.response,
123 &resp, sizeof resp)) { 103 &resp, sizeof resp)) {
124 ret = -EFAULT; 104 ret = -EFAULT;
125 goto err_free; 105 goto err_file;
126 } 106 }
127 107
108 file->async_file = filp->private_data;
109
110 INIT_IB_EVENT_HANDLER(&file->event_handler, file->device->ib_dev,
111 ib_uverbs_event_handler);
112 ret = ib_register_event_handler(&file->event_handler);
113 if (ret)
114 goto err_file;
115
116 kref_get(&file->async_file->ref);
117 kref_get(&file->ref);
128 file->ucontext = ucontext; 118 file->ucontext = ucontext;
119
120 fd_install(resp.async_fd, filp);
121
129 up(&file->mutex); 122 up(&file->mutex);
130 123
131 return in_len; 124 return in_len;
132 125
126err_file:
127 put_unused_fd(resp.async_fd);
128 fput(filp);
129
133err_free: 130err_free:
134 ibdev->dealloc_ucontext(ucontext); 131 ibdev->dealloc_ucontext(ucontext);
135 132
@@ -255,62 +252,6 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
255 return in_len; 252 return in_len;
256} 253}
257 254
258ssize_t ib_uverbs_query_gid(struct ib_uverbs_file *file,
259 const char __user *buf,
260 int in_len, int out_len)
261{
262 struct ib_uverbs_query_gid cmd;
263 struct ib_uverbs_query_gid_resp resp;
264 int ret;
265
266 if (out_len < sizeof resp)
267 return -ENOSPC;
268
269 if (copy_from_user(&cmd, buf, sizeof cmd))
270 return -EFAULT;
271
272 memset(&resp, 0, sizeof resp);
273
274 ret = ib_query_gid(file->device->ib_dev, cmd.port_num, cmd.index,
275 (union ib_gid *) resp.gid);
276 if (ret)
277 return ret;
278
279 if (copy_to_user((void __user *) (unsigned long) cmd.response,
280 &resp, sizeof resp))
281 return -EFAULT;
282
283 return in_len;
284}
285
286ssize_t ib_uverbs_query_pkey(struct ib_uverbs_file *file,
287 const char __user *buf,
288 int in_len, int out_len)
289{
290 struct ib_uverbs_query_pkey cmd;
291 struct ib_uverbs_query_pkey_resp resp;
292 int ret;
293
294 if (out_len < sizeof resp)
295 return -ENOSPC;
296
297 if (copy_from_user(&cmd, buf, sizeof cmd))
298 return -EFAULT;
299
300 memset(&resp, 0, sizeof resp);
301
302 ret = ib_query_pkey(file->device->ib_dev, cmd.port_num, cmd.index,
303 &resp.pkey);
304 if (ret)
305 return ret;
306
307 if (copy_to_user((void __user *) (unsigned long) cmd.response,
308 &resp, sizeof resp))
309 return -EFAULT;
310
311 return in_len;
312}
313
314ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, 255ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
315 const char __user *buf, 256 const char __user *buf,
316 int in_len, int out_len) 257 int in_len, int out_len)
@@ -349,24 +290,20 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
349 pd->uobject = uobj; 290 pd->uobject = uobj;
350 atomic_set(&pd->usecnt, 0); 291 atomic_set(&pd->usecnt, 0);
351 292
293 down(&ib_uverbs_idr_mutex);
294
352retry: 295retry:
353 if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) { 296 if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) {
354 ret = -ENOMEM; 297 ret = -ENOMEM;
355 goto err_pd; 298 goto err_up;
356 } 299 }
357 300
358 down(&ib_uverbs_idr_mutex);
359 ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id); 301 ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id);
360 up(&ib_uverbs_idr_mutex);
361 302
362 if (ret == -EAGAIN) 303 if (ret == -EAGAIN)
363 goto retry; 304 goto retry;
364 if (ret) 305 if (ret)
365 goto err_pd; 306 goto err_up;
366
367 down(&file->mutex);
368 list_add_tail(&uobj->list, &file->ucontext->pd_list);
369 up(&file->mutex);
370 307
371 memset(&resp, 0, sizeof resp); 308 memset(&resp, 0, sizeof resp);
372 resp.pd_handle = uobj->id; 309 resp.pd_handle = uobj->id;
@@ -374,21 +311,22 @@ retry:
374 if (copy_to_user((void __user *) (unsigned long) cmd.response, 311 if (copy_to_user((void __user *) (unsigned long) cmd.response,
375 &resp, sizeof resp)) { 312 &resp, sizeof resp)) {
376 ret = -EFAULT; 313 ret = -EFAULT;
377 goto err_list; 314 goto err_idr;
378 } 315 }
379 316
380 return in_len; 317 down(&file->mutex);
381 318 list_add_tail(&uobj->list, &file->ucontext->pd_list);
382err_list:
383 down(&file->mutex);
384 list_del(&uobj->list);
385 up(&file->mutex); 319 up(&file->mutex);
386 320
387 down(&ib_uverbs_idr_mutex);
388 idr_remove(&ib_uverbs_pd_idr, uobj->id);
389 up(&ib_uverbs_idr_mutex); 321 up(&ib_uverbs_idr_mutex);
390 322
391err_pd: 323 return in_len;
324
325err_idr:
326 idr_remove(&ib_uverbs_pd_idr, uobj->id);
327
328err_up:
329 up(&ib_uverbs_idr_mutex);
392 ib_dealloc_pd(pd); 330 ib_dealloc_pd(pd);
393 331
394err: 332err:
@@ -459,6 +397,14 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
459 if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) 397 if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
460 return -EINVAL; 398 return -EINVAL;
461 399
400 /*
401 * Local write permission is required if remote write or
402 * remote atomic permission is also requested.
403 */
404 if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
405 !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
406 return -EINVAL;
407
462 obj = kmalloc(sizeof *obj, GFP_KERNEL); 408 obj = kmalloc(sizeof *obj, GFP_KERNEL);
463 if (!obj) 409 if (!obj)
464 return -ENOMEM; 410 return -ENOMEM;
@@ -524,24 +470,22 @@ retry:
524 470
525 resp.mr_handle = obj->uobject.id; 471 resp.mr_handle = obj->uobject.id;
526 472
527 down(&file->mutex);
528 list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
529 up(&file->mutex);
530
531 if (copy_to_user((void __user *) (unsigned long) cmd.response, 473 if (copy_to_user((void __user *) (unsigned long) cmd.response,
532 &resp, sizeof resp)) { 474 &resp, sizeof resp)) {
533 ret = -EFAULT; 475 ret = -EFAULT;
534 goto err_list; 476 goto err_idr;
535 } 477 }
536 478
479 down(&file->mutex);
480 list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
481 up(&file->mutex);
482
537 up(&ib_uverbs_idr_mutex); 483 up(&ib_uverbs_idr_mutex);
538 484
539 return in_len; 485 return in_len;
540 486
541err_list: 487err_idr:
542 down(&file->mutex); 488 idr_remove(&ib_uverbs_mr_idr, obj->uobject.id);
543 list_del(&obj->uobject.list);
544 up(&file->mutex);
545 489
546err_unreg: 490err_unreg:
547 ib_dereg_mr(mr); 491 ib_dereg_mr(mr);
@@ -595,6 +539,35 @@ out:
595 return ret ? ret : in_len; 539 return ret ? ret : in_len;
596} 540}
597 541
542ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
543 const char __user *buf, int in_len,
544 int out_len)
545{
546 struct ib_uverbs_create_comp_channel cmd;
547 struct ib_uverbs_create_comp_channel_resp resp;
548 struct file *filp;
549
550 if (out_len < sizeof resp)
551 return -ENOSPC;
552
553 if (copy_from_user(&cmd, buf, sizeof cmd))
554 return -EFAULT;
555
556 filp = ib_uverbs_alloc_event_file(file, 0, &resp.fd);
557 if (IS_ERR(filp))
558 return PTR_ERR(filp);
559
560 if (copy_to_user((void __user *) (unsigned long) cmd.response,
561 &resp, sizeof resp)) {
562 put_unused_fd(resp.fd);
563 fput(filp);
564 return -EFAULT;
565 }
566
567 fd_install(resp.fd, filp);
568 return in_len;
569}
570
598ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, 571ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
599 const char __user *buf, int in_len, 572 const char __user *buf, int in_len,
600 int out_len) 573 int out_len)
@@ -603,6 +576,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
603 struct ib_uverbs_create_cq_resp resp; 576 struct ib_uverbs_create_cq_resp resp;
604 struct ib_udata udata; 577 struct ib_udata udata;
605 struct ib_ucq_object *uobj; 578 struct ib_ucq_object *uobj;
579 struct ib_uverbs_event_file *ev_file = NULL;
606 struct ib_cq *cq; 580 struct ib_cq *cq;
607 int ret; 581 int ret;
608 582
@@ -616,9 +590,12 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
616 (unsigned long) cmd.response + sizeof resp, 590 (unsigned long) cmd.response + sizeof resp,
617 in_len - sizeof cmd, out_len - sizeof resp); 591 in_len - sizeof cmd, out_len - sizeof resp);
618 592
619 if (cmd.event_handler >= file->device->num_comp) 593 if (cmd.comp_vector >= file->device->num_comp_vectors)
620 return -EINVAL; 594 return -EINVAL;
621 595
596 if (cmd.comp_channel >= 0)
597 ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel);
598
622 uobj = kmalloc(sizeof *uobj, GFP_KERNEL); 599 uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
623 if (!uobj) 600 if (!uobj)
624 return -ENOMEM; 601 return -ENOMEM;
@@ -641,27 +618,23 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
641 cq->uobject = &uobj->uobject; 618 cq->uobject = &uobj->uobject;
642 cq->comp_handler = ib_uverbs_comp_handler; 619 cq->comp_handler = ib_uverbs_comp_handler;
643 cq->event_handler = ib_uverbs_cq_event_handler; 620 cq->event_handler = ib_uverbs_cq_event_handler;
644 cq->cq_context = file; 621 cq->cq_context = ev_file;
645 atomic_set(&cq->usecnt, 0); 622 atomic_set(&cq->usecnt, 0);
646 623
624 down(&ib_uverbs_idr_mutex);
625
647retry: 626retry:
648 if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) { 627 if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) {
649 ret = -ENOMEM; 628 ret = -ENOMEM;
650 goto err_cq; 629 goto err_up;
651 } 630 }
652 631
653 down(&ib_uverbs_idr_mutex);
654 ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->uobject.id); 632 ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->uobject.id);
655 up(&ib_uverbs_idr_mutex);
656 633
657 if (ret == -EAGAIN) 634 if (ret == -EAGAIN)
658 goto retry; 635 goto retry;
659 if (ret) 636 if (ret)
660 goto err_cq; 637 goto err_up;
661
662 down(&file->mutex);
663 list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list);
664 up(&file->mutex);
665 638
666 memset(&resp, 0, sizeof resp); 639 memset(&resp, 0, sizeof resp);
667 resp.cq_handle = uobj->uobject.id; 640 resp.cq_handle = uobj->uobject.id;
@@ -670,21 +643,22 @@ retry:
670 if (copy_to_user((void __user *) (unsigned long) cmd.response, 643 if (copy_to_user((void __user *) (unsigned long) cmd.response,
671 &resp, sizeof resp)) { 644 &resp, sizeof resp)) {
672 ret = -EFAULT; 645 ret = -EFAULT;
673 goto err_list; 646 goto err_idr;
674 } 647 }
675 648
676 return in_len; 649 down(&file->mutex);
677 650 list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list);
678err_list:
679 down(&file->mutex);
680 list_del(&uobj->uobject.list);
681 up(&file->mutex); 651 up(&file->mutex);
682 652
683 down(&ib_uverbs_idr_mutex);
684 idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id);
685 up(&ib_uverbs_idr_mutex); 653 up(&ib_uverbs_idr_mutex);
686 654
687err_cq: 655 return in_len;
656
657err_idr:
658 idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id);
659
660err_up:
661 up(&ib_uverbs_idr_mutex);
688 ib_destroy_cq(cq); 662 ib_destroy_cq(cq);
689 663
690err: 664err:
@@ -692,6 +666,93 @@ err:
692 return ret; 666 return ret;
693} 667}
694 668
669ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
670 const char __user *buf, int in_len,
671 int out_len)
672{
673 struct ib_uverbs_poll_cq cmd;
674 struct ib_uverbs_poll_cq_resp *resp;
675 struct ib_cq *cq;
676 struct ib_wc *wc;
677 int ret = 0;
678 int i;
679 int rsize;
680
681 if (copy_from_user(&cmd, buf, sizeof cmd))
682 return -EFAULT;
683
684 wc = kmalloc(cmd.ne * sizeof *wc, GFP_KERNEL);
685 if (!wc)
686 return -ENOMEM;
687
688 rsize = sizeof *resp + cmd.ne * sizeof(struct ib_uverbs_wc);
689 resp = kmalloc(rsize, GFP_KERNEL);
690 if (!resp) {
691 ret = -ENOMEM;
692 goto out_wc;
693 }
694
695 down(&ib_uverbs_idr_mutex);
696 cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
697 if (!cq || cq->uobject->context != file->ucontext) {
698 ret = -EINVAL;
699 goto out;
700 }
701
702 resp->count = ib_poll_cq(cq, cmd.ne, wc);
703
704 for (i = 0; i < resp->count; i++) {
705 resp->wc[i].wr_id = wc[i].wr_id;
706 resp->wc[i].status = wc[i].status;
707 resp->wc[i].opcode = wc[i].opcode;
708 resp->wc[i].vendor_err = wc[i].vendor_err;
709 resp->wc[i].byte_len = wc[i].byte_len;
710 resp->wc[i].imm_data = wc[i].imm_data;
711 resp->wc[i].qp_num = wc[i].qp_num;
712 resp->wc[i].src_qp = wc[i].src_qp;
713 resp->wc[i].wc_flags = wc[i].wc_flags;
714 resp->wc[i].pkey_index = wc[i].pkey_index;
715 resp->wc[i].slid = wc[i].slid;
716 resp->wc[i].sl = wc[i].sl;
717 resp->wc[i].dlid_path_bits = wc[i].dlid_path_bits;
718 resp->wc[i].port_num = wc[i].port_num;
719 }
720
721 if (copy_to_user((void __user *) (unsigned long) cmd.response, resp, rsize))
722 ret = -EFAULT;
723
724out:
725 up(&ib_uverbs_idr_mutex);
726 kfree(resp);
727
728out_wc:
729 kfree(wc);
730 return ret ? ret : in_len;
731}
732
733ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
734 const char __user *buf, int in_len,
735 int out_len)
736{
737 struct ib_uverbs_req_notify_cq cmd;
738 struct ib_cq *cq;
739 int ret = -EINVAL;
740
741 if (copy_from_user(&cmd, buf, sizeof cmd))
742 return -EFAULT;
743
744 down(&ib_uverbs_idr_mutex);
745 cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
746 if (cq && cq->uobject->context == file->ucontext) {
747 ib_req_notify_cq(cq, cmd.solicited_only ?
748 IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
749 ret = in_len;
750 }
751 up(&ib_uverbs_idr_mutex);
752
753 return ret;
754}
755
695ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, 756ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
696 const char __user *buf, int in_len, 757 const char __user *buf, int in_len,
697 int out_len) 758 int out_len)
@@ -700,7 +761,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
700 struct ib_uverbs_destroy_cq_resp resp; 761 struct ib_uverbs_destroy_cq_resp resp;
701 struct ib_cq *cq; 762 struct ib_cq *cq;
702 struct ib_ucq_object *uobj; 763 struct ib_ucq_object *uobj;
703 struct ib_uverbs_event *evt, *tmp; 764 struct ib_uverbs_event_file *ev_file;
704 u64 user_handle; 765 u64 user_handle;
705 int ret = -EINVAL; 766 int ret = -EINVAL;
706 767
@@ -716,7 +777,8 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
716 goto out; 777 goto out;
717 778
718 user_handle = cq->uobject->user_handle; 779 user_handle = cq->uobject->user_handle;
719 uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); 780 uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
781 ev_file = cq->cq_context;
720 782
721 ret = ib_destroy_cq(cq); 783 ret = ib_destroy_cq(cq);
722 if (ret) 784 if (ret)
@@ -728,19 +790,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
728 list_del(&uobj->uobject.list); 790 list_del(&uobj->uobject.list);
729 up(&file->mutex); 791 up(&file->mutex);
730 792
731 spin_lock_irq(&file->comp_file[0].lock); 793 ib_uverbs_release_ucq(file, ev_file, uobj);
732 list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
733 list_del(&evt->list);
734 kfree(evt);
735 }
736 spin_unlock_irq(&file->comp_file[0].lock);
737
738 spin_lock_irq(&file->async_file.lock);
739 list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
740 list_del(&evt->list);
741 kfree(evt);
742 }
743 spin_unlock_irq(&file->async_file.lock);
744 794
745 resp.comp_events_reported = uobj->comp_events_reported; 795 resp.comp_events_reported = uobj->comp_events_reported;
746 resp.async_events_reported = uobj->async_events_reported; 796 resp.async_events_reported = uobj->async_events_reported;
@@ -859,24 +909,22 @@ retry:
859 909
860 resp.qp_handle = uobj->uobject.id; 910 resp.qp_handle = uobj->uobject.id;
861 911
862 down(&file->mutex);
863 list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list);
864 up(&file->mutex);
865
866 if (copy_to_user((void __user *) (unsigned long) cmd.response, 912 if (copy_to_user((void __user *) (unsigned long) cmd.response,
867 &resp, sizeof resp)) { 913 &resp, sizeof resp)) {
868 ret = -EFAULT; 914 ret = -EFAULT;
869 goto err_list; 915 goto err_idr;
870 } 916 }
871 917
918 down(&file->mutex);
919 list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list);
920 up(&file->mutex);
921
872 up(&ib_uverbs_idr_mutex); 922 up(&ib_uverbs_idr_mutex);
873 923
874 return in_len; 924 return in_len;
875 925
876err_list: 926err_idr:
877 down(&file->mutex); 927 idr_remove(&ib_uverbs_qp_idr, uobj->uobject.id);
878 list_del(&uobj->uobject.list);
879 up(&file->mutex);
880 928
881err_destroy: 929err_destroy:
882 ib_destroy_qp(qp); 930 ib_destroy_qp(qp);
@@ -979,7 +1027,6 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
979 struct ib_uverbs_destroy_qp_resp resp; 1027 struct ib_uverbs_destroy_qp_resp resp;
980 struct ib_qp *qp; 1028 struct ib_qp *qp;
981 struct ib_uevent_object *uobj; 1029 struct ib_uevent_object *uobj;
982 struct ib_uverbs_event *evt, *tmp;
983 int ret = -EINVAL; 1030 int ret = -EINVAL;
984 1031
985 if (copy_from_user(&cmd, buf, sizeof cmd)) 1032 if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -1005,12 +1052,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
1005 list_del(&uobj->uobject.list); 1052 list_del(&uobj->uobject.list);
1006 up(&file->mutex); 1053 up(&file->mutex);
1007 1054
1008 spin_lock_irq(&file->async_file.lock); 1055 ib_uverbs_release_uevent(file, uobj);
1009 list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
1010 list_del(&evt->list);
1011 kfree(evt);
1012 }
1013 spin_unlock_irq(&file->async_file.lock);
1014 1056
1015 resp.events_reported = uobj->events_reported; 1057 resp.events_reported = uobj->events_reported;
1016 1058
@@ -1026,6 +1068,468 @@ out:
1026 return ret ? ret : in_len; 1068 return ret ? ret : in_len;
1027} 1069}
1028 1070
1071ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
1072 const char __user *buf, int in_len,
1073 int out_len)
1074{
1075 struct ib_uverbs_post_send cmd;
1076 struct ib_uverbs_post_send_resp resp;
1077 struct ib_uverbs_send_wr *user_wr;
1078 struct ib_send_wr *wr = NULL, *last, *next, *bad_wr;
1079 struct ib_qp *qp;
1080 int i, sg_ind;
1081 ssize_t ret = -EINVAL;
1082
1083 if (copy_from_user(&cmd, buf, sizeof cmd))
1084 return -EFAULT;
1085
1086 if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count +
1087 cmd.sge_count * sizeof (struct ib_uverbs_sge))
1088 return -EINVAL;
1089
1090 if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr))
1091 return -EINVAL;
1092
1093 user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL);
1094 if (!user_wr)
1095 return -ENOMEM;
1096
1097 down(&ib_uverbs_idr_mutex);
1098
1099 qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
1100 if (!qp || qp->uobject->context != file->ucontext)
1101 goto out;
1102
1103 sg_ind = 0;
1104 last = NULL;
1105 for (i = 0; i < cmd.wr_count; ++i) {
1106 if (copy_from_user(user_wr,
1107 buf + sizeof cmd + i * cmd.wqe_size,
1108 cmd.wqe_size)) {
1109 ret = -EFAULT;
1110 goto out;
1111 }
1112
1113 if (user_wr->num_sge + sg_ind > cmd.sge_count) {
1114 ret = -EINVAL;
1115 goto out;
1116 }
1117
1118 next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
1119 user_wr->num_sge * sizeof (struct ib_sge),
1120 GFP_KERNEL);
1121 if (!next) {
1122 ret = -ENOMEM;
1123 goto out;
1124 }
1125
1126 if (!last)
1127 wr = next;
1128 else
1129 last->next = next;
1130 last = next;
1131
1132 next->next = NULL;
1133 next->wr_id = user_wr->wr_id;
1134 next->num_sge = user_wr->num_sge;
1135 next->opcode = user_wr->opcode;
1136 next->send_flags = user_wr->send_flags;
1137 next->imm_data = user_wr->imm_data;
1138
1139 if (qp->qp_type == IB_QPT_UD) {
1140 next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr,
1141 user_wr->wr.ud.ah);
1142 if (!next->wr.ud.ah) {
1143 ret = -EINVAL;
1144 goto out;
1145 }
1146 next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn;
1147 next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
1148 } else {
1149 switch (next->opcode) {
1150 case IB_WR_RDMA_WRITE:
1151 case IB_WR_RDMA_WRITE_WITH_IMM:
1152 case IB_WR_RDMA_READ:
1153 next->wr.rdma.remote_addr =
1154 user_wr->wr.rdma.remote_addr;
1155 next->wr.rdma.rkey =
1156 user_wr->wr.rdma.rkey;
1157 break;
1158 case IB_WR_ATOMIC_CMP_AND_SWP:
1159 case IB_WR_ATOMIC_FETCH_AND_ADD:
1160 next->wr.atomic.remote_addr =
1161 user_wr->wr.atomic.remote_addr;
1162 next->wr.atomic.compare_add =
1163 user_wr->wr.atomic.compare_add;
1164 next->wr.atomic.swap = user_wr->wr.atomic.swap;
1165 next->wr.atomic.rkey = user_wr->wr.atomic.rkey;
1166 break;
1167 default:
1168 break;
1169 }
1170 }
1171
1172 if (next->num_sge) {
1173 next->sg_list = (void *) next +
1174 ALIGN(sizeof *next, sizeof (struct ib_sge));
1175 if (copy_from_user(next->sg_list,
1176 buf + sizeof cmd +
1177 cmd.wr_count * cmd.wqe_size +
1178 sg_ind * sizeof (struct ib_sge),
1179 next->num_sge * sizeof (struct ib_sge))) {
1180 ret = -EFAULT;
1181 goto out;
1182 }
1183 sg_ind += next->num_sge;
1184 } else
1185 next->sg_list = NULL;
1186 }
1187
1188 resp.bad_wr = 0;
1189 ret = qp->device->post_send(qp, wr, &bad_wr);
1190 if (ret)
1191 for (next = wr; next; next = next->next) {
1192 ++resp.bad_wr;
1193 if (next == bad_wr)
1194 break;
1195 }
1196
1197 if (copy_to_user((void __user *) (unsigned long) cmd.response,
1198 &resp, sizeof resp))
1199 ret = -EFAULT;
1200
1201out:
1202 up(&ib_uverbs_idr_mutex);
1203
1204 while (wr) {
1205 next = wr->next;
1206 kfree(wr);
1207 wr = next;
1208 }
1209
1210 kfree(user_wr);
1211
1212 return ret ? ret : in_len;
1213}
1214
1215static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
1216 int in_len,
1217 u32 wr_count,
1218 u32 sge_count,
1219 u32 wqe_size)
1220{
1221 struct ib_uverbs_recv_wr *user_wr;
1222 struct ib_recv_wr *wr = NULL, *last, *next;
1223 int sg_ind;
1224 int i;
1225 int ret;
1226
1227 if (in_len < wqe_size * wr_count +
1228 sge_count * sizeof (struct ib_uverbs_sge))
1229 return ERR_PTR(-EINVAL);
1230
1231 if (wqe_size < sizeof (struct ib_uverbs_recv_wr))
1232 return ERR_PTR(-EINVAL);
1233
1234 user_wr = kmalloc(wqe_size, GFP_KERNEL);
1235 if (!user_wr)
1236 return ERR_PTR(-ENOMEM);
1237
1238 sg_ind = 0;
1239 last = NULL;
1240 for (i = 0; i < wr_count; ++i) {
1241 if (copy_from_user(user_wr, buf + i * wqe_size,
1242 wqe_size)) {
1243 ret = -EFAULT;
1244 goto err;
1245 }
1246
1247 if (user_wr->num_sge + sg_ind > sge_count) {
1248 ret = -EINVAL;
1249 goto err;
1250 }
1251
1252 next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
1253 user_wr->num_sge * sizeof (struct ib_sge),
1254 GFP_KERNEL);
1255 if (!next) {
1256 ret = -ENOMEM;
1257 goto err;
1258 }
1259
1260 if (!last)
1261 wr = next;
1262 else
1263 last->next = next;
1264 last = next;
1265
1266 next->next = NULL;
1267 next->wr_id = user_wr->wr_id;
1268 next->num_sge = user_wr->num_sge;
1269
1270 if (next->num_sge) {
1271 next->sg_list = (void *) next +
1272 ALIGN(sizeof *next, sizeof (struct ib_sge));
1273 if (copy_from_user(next->sg_list,
1274 buf + wr_count * wqe_size +
1275 sg_ind * sizeof (struct ib_sge),
1276 next->num_sge * sizeof (struct ib_sge))) {
1277 ret = -EFAULT;
1278 goto err;
1279 }
1280 sg_ind += next->num_sge;
1281 } else
1282 next->sg_list = NULL;
1283 }
1284
1285 kfree(user_wr);
1286 return wr;
1287
1288err:
1289 kfree(user_wr);
1290
1291 while (wr) {
1292 next = wr->next;
1293 kfree(wr);
1294 wr = next;
1295 }
1296
1297 return ERR_PTR(ret);
1298}
1299
1300ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
1301 const char __user *buf, int in_len,
1302 int out_len)
1303{
1304 struct ib_uverbs_post_recv cmd;
1305 struct ib_uverbs_post_recv_resp resp;
1306 struct ib_recv_wr *wr, *next, *bad_wr;
1307 struct ib_qp *qp;
1308 ssize_t ret = -EINVAL;
1309
1310 if (copy_from_user(&cmd, buf, sizeof cmd))
1311 return -EFAULT;
1312
1313 wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
1314 in_len - sizeof cmd, cmd.wr_count,
1315 cmd.sge_count, cmd.wqe_size);
1316 if (IS_ERR(wr))
1317 return PTR_ERR(wr);
1318
1319 down(&ib_uverbs_idr_mutex);
1320
1321 qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
1322 if (!qp || qp->uobject->context != file->ucontext)
1323 goto out;
1324
1325 resp.bad_wr = 0;
1326 ret = qp->device->post_recv(qp, wr, &bad_wr);
1327 if (ret)
1328 for (next = wr; next; next = next->next) {
1329 ++resp.bad_wr;
1330 if (next == bad_wr)
1331 break;
1332 }
1333
1334
1335 if (copy_to_user((void __user *) (unsigned long) cmd.response,
1336 &resp, sizeof resp))
1337 ret = -EFAULT;
1338
1339out:
1340 up(&ib_uverbs_idr_mutex);
1341
1342 while (wr) {
1343 next = wr->next;
1344 kfree(wr);
1345 wr = next;
1346 }
1347
1348 return ret ? ret : in_len;
1349}
1350
1351ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
1352 const char __user *buf, int in_len,
1353 int out_len)
1354{
1355 struct ib_uverbs_post_srq_recv cmd;
1356 struct ib_uverbs_post_srq_recv_resp resp;
1357 struct ib_recv_wr *wr, *next, *bad_wr;
1358 struct ib_srq *srq;
1359 ssize_t ret = -EINVAL;
1360
1361 if (copy_from_user(&cmd, buf, sizeof cmd))
1362 return -EFAULT;
1363
1364 wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
1365 in_len - sizeof cmd, cmd.wr_count,
1366 cmd.sge_count, cmd.wqe_size);
1367 if (IS_ERR(wr))
1368 return PTR_ERR(wr);
1369
1370 down(&ib_uverbs_idr_mutex);
1371
1372 srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle);
1373 if (!srq || srq->uobject->context != file->ucontext)
1374 goto out;
1375
1376 resp.bad_wr = 0;
1377 ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
1378 if (ret)
1379 for (next = wr; next; next = next->next) {
1380 ++resp.bad_wr;
1381 if (next == bad_wr)
1382 break;
1383 }
1384
1385
1386 if (copy_to_user((void __user *) (unsigned long) cmd.response,
1387 &resp, sizeof resp))
1388 ret = -EFAULT;
1389
1390out:
1391 up(&ib_uverbs_idr_mutex);
1392
1393 while (wr) {
1394 next = wr->next;
1395 kfree(wr);
1396 wr = next;
1397 }
1398
1399 return ret ? ret : in_len;
1400}
1401
1402ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
1403 const char __user *buf, int in_len,
1404 int out_len)
1405{
1406 struct ib_uverbs_create_ah cmd;
1407 struct ib_uverbs_create_ah_resp resp;
1408 struct ib_uobject *uobj;
1409 struct ib_pd *pd;
1410 struct ib_ah *ah;
1411 struct ib_ah_attr attr;
1412 int ret;
1413
1414 if (out_len < sizeof resp)
1415 return -ENOSPC;
1416
1417 if (copy_from_user(&cmd, buf, sizeof cmd))
1418 return -EFAULT;
1419
1420 uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
1421 if (!uobj)
1422 return -ENOMEM;
1423
1424 down(&ib_uverbs_idr_mutex);
1425
1426 pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
1427 if (!pd || pd->uobject->context != file->ucontext) {
1428 ret = -EINVAL;
1429 goto err_up;
1430 }
1431
1432 uobj->user_handle = cmd.user_handle;
1433 uobj->context = file->ucontext;
1434
1435 attr.dlid = cmd.attr.dlid;
1436 attr.sl = cmd.attr.sl;
1437 attr.src_path_bits = cmd.attr.src_path_bits;
1438 attr.static_rate = cmd.attr.static_rate;
1439 attr.port_num = cmd.attr.port_num;
1440 attr.grh.flow_label = cmd.attr.grh.flow_label;
1441 attr.grh.sgid_index = cmd.attr.grh.sgid_index;
1442 attr.grh.hop_limit = cmd.attr.grh.hop_limit;
1443 attr.grh.traffic_class = cmd.attr.grh.traffic_class;
1444 memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
1445
1446 ah = ib_create_ah(pd, &attr);
1447 if (IS_ERR(ah)) {
1448 ret = PTR_ERR(ah);
1449 goto err_up;
1450 }
1451
1452 ah->uobject = uobj;
1453
1454retry:
1455 if (!idr_pre_get(&ib_uverbs_ah_idr, GFP_KERNEL)) {
1456 ret = -ENOMEM;
1457 goto err_destroy;
1458 }
1459
1460 ret = idr_get_new(&ib_uverbs_ah_idr, ah, &uobj->id);
1461
1462 if (ret == -EAGAIN)
1463 goto retry;
1464 if (ret)
1465 goto err_destroy;
1466
1467 resp.ah_handle = uobj->id;
1468
1469 if (copy_to_user((void __user *) (unsigned long) cmd.response,
1470 &resp, sizeof resp)) {
1471 ret = -EFAULT;
1472 goto err_idr;
1473 }
1474
1475 down(&file->mutex);
1476 list_add_tail(&uobj->list, &file->ucontext->ah_list);
1477 up(&file->mutex);
1478
1479 up(&ib_uverbs_idr_mutex);
1480
1481 return in_len;
1482
1483err_idr:
1484 idr_remove(&ib_uverbs_ah_idr, uobj->id);
1485
1486err_destroy:
1487 ib_destroy_ah(ah);
1488
1489err_up:
1490 up(&ib_uverbs_idr_mutex);
1491
1492 kfree(uobj);
1493 return ret;
1494}
1495
1496ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
1497 const char __user *buf, int in_len, int out_len)
1498{
1499 struct ib_uverbs_destroy_ah cmd;
1500 struct ib_ah *ah;
1501 struct ib_uobject *uobj;
1502 int ret = -EINVAL;
1503
1504 if (copy_from_user(&cmd, buf, sizeof cmd))
1505 return -EFAULT;
1506
1507 down(&ib_uverbs_idr_mutex);
1508
1509 ah = idr_find(&ib_uverbs_ah_idr, cmd.ah_handle);
1510 if (!ah || ah->uobject->context != file->ucontext)
1511 goto out;
1512
1513 uobj = ah->uobject;
1514
1515 ret = ib_destroy_ah(ah);
1516 if (ret)
1517 goto out;
1518
1519 idr_remove(&ib_uverbs_ah_idr, cmd.ah_handle);
1520
1521 down(&file->mutex);
1522 list_del(&uobj->list);
1523 up(&file->mutex);
1524
1525 kfree(uobj);
1526
1527out:
1528 up(&ib_uverbs_idr_mutex);
1529
1530 return ret ? ret : in_len;
1531}
1532
1029ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, 1533ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
1030 const char __user *buf, int in_len, 1534 const char __user *buf, int in_len,
1031 int out_len) 1535 int out_len)
@@ -1148,24 +1652,22 @@ retry:
1148 1652
1149 resp.srq_handle = uobj->uobject.id; 1653 resp.srq_handle = uobj->uobject.id;
1150 1654
1151 down(&file->mutex);
1152 list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list);
1153 up(&file->mutex);
1154
1155 if (copy_to_user((void __user *) (unsigned long) cmd.response, 1655 if (copy_to_user((void __user *) (unsigned long) cmd.response,
1156 &resp, sizeof resp)) { 1656 &resp, sizeof resp)) {
1157 ret = -EFAULT; 1657 ret = -EFAULT;
1158 goto err_list; 1658 goto err_idr;
1159 } 1659 }
1160 1660
1661 down(&file->mutex);
1662 list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list);
1663 up(&file->mutex);
1664
1161 up(&ib_uverbs_idr_mutex); 1665 up(&ib_uverbs_idr_mutex);
1162 1666
1163 return in_len; 1667 return in_len;
1164 1668
1165err_list: 1669err_idr:
1166 down(&file->mutex); 1670 idr_remove(&ib_uverbs_srq_idr, uobj->uobject.id);
1167 list_del(&uobj->uobject.list);
1168 up(&file->mutex);
1169 1671
1170err_destroy: 1672err_destroy:
1171 ib_destroy_srq(srq); 1673 ib_destroy_srq(srq);
@@ -1217,7 +1719,6 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
1217 struct ib_uverbs_destroy_srq_resp resp; 1719 struct ib_uverbs_destroy_srq_resp resp;
1218 struct ib_srq *srq; 1720 struct ib_srq *srq;
1219 struct ib_uevent_object *uobj; 1721 struct ib_uevent_object *uobj;
1220 struct ib_uverbs_event *evt, *tmp;
1221 int ret = -EINVAL; 1722 int ret = -EINVAL;
1222 1723
1223 if (copy_from_user(&cmd, buf, sizeof cmd)) 1724 if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -1243,12 +1744,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
1243 list_del(&uobj->uobject.list); 1744 list_del(&uobj->uobject.list);
1244 up(&file->mutex); 1745 up(&file->mutex);
1245 1746
1246 spin_lock_irq(&file->async_file.lock); 1747 ib_uverbs_release_uevent(file, uobj);
1247 list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
1248 list_del(&evt->list);
1249 kfree(evt);
1250 }
1251 spin_unlock_irq(&file->async_file.lock);
1252 1748
1253 resp.events_reported = uobj->events_reported; 1749 resp.events_reported = uobj->events_reported;
1254 1750
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 12511808de21..ac08d2c93ea1 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -3,6 +3,7 @@
3 * Copyright (c) 2005 Cisco Systems. All rights reserved. 3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
6 * Copyright (c) 2005 PathScale, Inc. All rights reserved.
6 * 7 *
7 * This software is available to you under a choice of one of two 8 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU 9 * licenses. You may choose to be licensed under the terms of the GNU
@@ -43,6 +44,7 @@
43#include <linux/poll.h> 44#include <linux/poll.h>
44#include <linux/file.h> 45#include <linux/file.h>
45#include <linux/mount.h> 46#include <linux/mount.h>
47#include <linux/cdev.h>
46 48
47#include <asm/uaccess.h> 49#include <asm/uaccess.h>
48 50
@@ -62,6 +64,8 @@ enum {
62 64
63#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) 65#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
64 66
67static struct class *uverbs_class;
68
65DECLARE_MUTEX(ib_uverbs_idr_mutex); 69DECLARE_MUTEX(ib_uverbs_idr_mutex);
66DEFINE_IDR(ib_uverbs_pd_idr); 70DEFINE_IDR(ib_uverbs_pd_idr);
67DEFINE_IDR(ib_uverbs_mr_idr); 71DEFINE_IDR(ib_uverbs_mr_idr);
@@ -72,31 +76,37 @@ DEFINE_IDR(ib_uverbs_qp_idr);
72DEFINE_IDR(ib_uverbs_srq_idr); 76DEFINE_IDR(ib_uverbs_srq_idr);
73 77
74static spinlock_t map_lock; 78static spinlock_t map_lock;
79static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES];
75static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); 80static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
76 81
77static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, 82static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
78 const char __user *buf, int in_len, 83 const char __user *buf, int in_len,
79 int out_len) = { 84 int out_len) = {
80 [IB_USER_VERBS_CMD_QUERY_PARAMS] = ib_uverbs_query_params, 85 [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
81 [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, 86 [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
82 [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, 87 [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
83 [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, 88 [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
84 [IB_USER_VERBS_CMD_QUERY_GID] = ib_uverbs_query_gid, 89 [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
85 [IB_USER_VERBS_CMD_QUERY_PKEY] = ib_uverbs_query_pkey, 90 [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
86 [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, 91 [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
87 [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, 92 [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
88 [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, 93 [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
89 [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, 94 [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
90 [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, 95 [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
91 [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, 96 [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
92 [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, 97 [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
93 [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, 98 [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
94 [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, 99 [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
95 [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, 100 [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
96 [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, 101 [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
97 [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, 102 [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
98 [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, 103 [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
99 [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, 104 [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
105 [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
106 [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
107 [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
108 [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
109 [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
100}; 110};
101 111
102static struct vfsmount *uverbs_event_mnt; 112static struct vfsmount *uverbs_event_mnt;
@@ -104,7 +114,54 @@ static struct vfsmount *uverbs_event_mnt;
104static void ib_uverbs_add_one(struct ib_device *device); 114static void ib_uverbs_add_one(struct ib_device *device);
105static void ib_uverbs_remove_one(struct ib_device *device); 115static void ib_uverbs_remove_one(struct ib_device *device);
106 116
107static int ib_dealloc_ucontext(struct ib_ucontext *context) 117static void ib_uverbs_release_dev(struct kref *ref)
118{
119 struct ib_uverbs_device *dev =
120 container_of(ref, struct ib_uverbs_device, ref);
121
122 kfree(dev);
123}
124
125void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
126 struct ib_uverbs_event_file *ev_file,
127 struct ib_ucq_object *uobj)
128{
129 struct ib_uverbs_event *evt, *tmp;
130
131 if (ev_file) {
132 spin_lock_irq(&ev_file->lock);
133 list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
134 list_del(&evt->list);
135 kfree(evt);
136 }
137 spin_unlock_irq(&ev_file->lock);
138
139 kref_put(&ev_file->ref, ib_uverbs_release_event_file);
140 }
141
142 spin_lock_irq(&file->async_file->lock);
143 list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
144 list_del(&evt->list);
145 kfree(evt);
146 }
147 spin_unlock_irq(&file->async_file->lock);
148}
149
150void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
151 struct ib_uevent_object *uobj)
152{
153 struct ib_uverbs_event *evt, *tmp;
154
155 spin_lock_irq(&file->async_file->lock);
156 list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
157 list_del(&evt->list);
158 kfree(evt);
159 }
160 spin_unlock_irq(&file->async_file->lock);
161}
162
163static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
164 struct ib_ucontext *context)
108{ 165{
109 struct ib_uobject *uobj, *tmp; 166 struct ib_uobject *uobj, *tmp;
110 167
@@ -113,30 +170,46 @@ static int ib_dealloc_ucontext(struct ib_ucontext *context)
113 170
114 down(&ib_uverbs_idr_mutex); 171 down(&ib_uverbs_idr_mutex);
115 172
116 /* XXX Free AHs */ 173 list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
174 struct ib_ah *ah = idr_find(&ib_uverbs_ah_idr, uobj->id);
175 idr_remove(&ib_uverbs_ah_idr, uobj->id);
176 ib_destroy_ah(ah);
177 list_del(&uobj->list);
178 kfree(uobj);
179 }
117 180
118 list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { 181 list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
119 struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id); 182 struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id);
183 struct ib_uevent_object *uevent =
184 container_of(uobj, struct ib_uevent_object, uobject);
120 idr_remove(&ib_uverbs_qp_idr, uobj->id); 185 idr_remove(&ib_uverbs_qp_idr, uobj->id);
121 ib_destroy_qp(qp); 186 ib_destroy_qp(qp);
122 list_del(&uobj->list); 187 list_del(&uobj->list);
123 kfree(container_of(uobj, struct ib_uevent_object, uobject)); 188 ib_uverbs_release_uevent(file, uevent);
189 kfree(uevent);
124 } 190 }
125 191
126 list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { 192 list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
127 struct ib_cq *cq = idr_find(&ib_uverbs_cq_idr, uobj->id); 193 struct ib_cq *cq = idr_find(&ib_uverbs_cq_idr, uobj->id);
194 struct ib_uverbs_event_file *ev_file = cq->cq_context;
195 struct ib_ucq_object *ucq =
196 container_of(uobj, struct ib_ucq_object, uobject);
128 idr_remove(&ib_uverbs_cq_idr, uobj->id); 197 idr_remove(&ib_uverbs_cq_idr, uobj->id);
129 ib_destroy_cq(cq); 198 ib_destroy_cq(cq);
130 list_del(&uobj->list); 199 list_del(&uobj->list);
131 kfree(container_of(uobj, struct ib_ucq_object, uobject)); 200 ib_uverbs_release_ucq(file, ev_file, ucq);
201 kfree(ucq);
132 } 202 }
133 203
134 list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { 204 list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
135 struct ib_srq *srq = idr_find(&ib_uverbs_srq_idr, uobj->id); 205 struct ib_srq *srq = idr_find(&ib_uverbs_srq_idr, uobj->id);
206 struct ib_uevent_object *uevent =
207 container_of(uobj, struct ib_uevent_object, uobject);
136 idr_remove(&ib_uverbs_srq_idr, uobj->id); 208 idr_remove(&ib_uverbs_srq_idr, uobj->id);
137 ib_destroy_srq(srq); 209 ib_destroy_srq(srq);
138 list_del(&uobj->list); 210 list_del(&uobj->list);
139 kfree(container_of(uobj, struct ib_uevent_object, uobject)); 211 ib_uverbs_release_uevent(file, uevent);
212 kfree(uevent);
140 } 213 }
141 214
142 /* XXX Free MWs */ 215 /* XXX Free MWs */
@@ -175,6 +248,8 @@ static void ib_uverbs_release_file(struct kref *ref)
175 container_of(ref, struct ib_uverbs_file, ref); 248 container_of(ref, struct ib_uverbs_file, ref);
176 249
177 module_put(file->device->ib_dev->owner); 250 module_put(file->device->ib_dev->owner);
251 kref_put(&file->device->ref, ib_uverbs_release_dev);
252
178 kfree(file); 253 kfree(file);
179} 254}
180 255
@@ -188,25 +263,19 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
188 263
189 spin_lock_irq(&file->lock); 264 spin_lock_irq(&file->lock);
190 265
191 while (list_empty(&file->event_list) && file->fd >= 0) { 266 while (list_empty(&file->event_list)) {
192 spin_unlock_irq(&file->lock); 267 spin_unlock_irq(&file->lock);
193 268
194 if (filp->f_flags & O_NONBLOCK) 269 if (filp->f_flags & O_NONBLOCK)
195 return -EAGAIN; 270 return -EAGAIN;
196 271
197 if (wait_event_interruptible(file->poll_wait, 272 if (wait_event_interruptible(file->poll_wait,
198 !list_empty(&file->event_list) || 273 !list_empty(&file->event_list)))
199 file->fd < 0))
200 return -ERESTARTSYS; 274 return -ERESTARTSYS;
201 275
202 spin_lock_irq(&file->lock); 276 spin_lock_irq(&file->lock);
203 } 277 }
204 278
205 if (file->fd < 0) {
206 spin_unlock_irq(&file->lock);
207 return -ENODEV;
208 }
209
210 event = list_entry(file->event_list.next, struct ib_uverbs_event, list); 279 event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
211 280
212 if (file->is_async) 281 if (file->is_async)
@@ -248,26 +317,19 @@ static unsigned int ib_uverbs_event_poll(struct file *filp,
248 poll_wait(filp, &file->poll_wait, wait); 317 poll_wait(filp, &file->poll_wait, wait);
249 318
250 spin_lock_irq(&file->lock); 319 spin_lock_irq(&file->lock);
251 if (file->fd < 0) 320 if (!list_empty(&file->event_list))
252 pollflags = POLLERR;
253 else if (!list_empty(&file->event_list))
254 pollflags = POLLIN | POLLRDNORM; 321 pollflags = POLLIN | POLLRDNORM;
255 spin_unlock_irq(&file->lock); 322 spin_unlock_irq(&file->lock);
256 323
257 return pollflags; 324 return pollflags;
258} 325}
259 326
260static void ib_uverbs_event_release(struct ib_uverbs_event_file *file) 327void ib_uverbs_release_event_file(struct kref *ref)
261{ 328{
262 struct ib_uverbs_event *entry, *tmp; 329 struct ib_uverbs_event_file *file =
330 container_of(ref, struct ib_uverbs_event_file, ref);
263 331
264 spin_lock_irq(&file->lock); 332 kfree(file);
265 if (file->fd != -1) {
266 file->fd = -1;
267 list_for_each_entry_safe(entry, tmp, &file->event_list, list)
268 kfree(entry);
269 }
270 spin_unlock_irq(&file->lock);
271} 333}
272 334
273static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) 335static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
@@ -280,21 +342,30 @@ static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
280static int ib_uverbs_event_close(struct inode *inode, struct file *filp) 342static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
281{ 343{
282 struct ib_uverbs_event_file *file = filp->private_data; 344 struct ib_uverbs_event_file *file = filp->private_data;
345 struct ib_uverbs_event *entry, *tmp;
346
347 spin_lock_irq(&file->lock);
348 file->file = NULL;
349 list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
350 if (entry->counter)
351 list_del(&entry->obj_list);
352 kfree(entry);
353 }
354 spin_unlock_irq(&file->lock);
283 355
284 ib_uverbs_event_release(file);
285 ib_uverbs_event_fasync(-1, filp, 0); 356 ib_uverbs_event_fasync(-1, filp, 0);
286 kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); 357
358 if (file->is_async) {
359 ib_unregister_event_handler(&file->uverbs_file->event_handler);
360 kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
361 }
362 kref_put(&file->ref, ib_uverbs_release_event_file);
287 363
288 return 0; 364 return 0;
289} 365}
290 366
291static struct file_operations uverbs_event_fops = { 367static struct file_operations uverbs_event_fops = {
292 /* 368 .owner = THIS_MODULE,
293 * No .owner field since we artificially create event files,
294 * so there is no increment to the module reference count in
295 * the open path. All event files come from a uverbs command
296 * file, which already takes a module reference, so this is OK.
297 */
298 .read = ib_uverbs_event_read, 369 .read = ib_uverbs_event_read,
299 .poll = ib_uverbs_event_poll, 370 .poll = ib_uverbs_event_poll,
300 .release = ib_uverbs_event_close, 371 .release = ib_uverbs_event_close,
@@ -303,27 +374,37 @@ static struct file_operations uverbs_event_fops = {
303 374
304void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) 375void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
305{ 376{
306 struct ib_uverbs_file *file = cq_context; 377 struct ib_uverbs_event_file *file = cq_context;
307 struct ib_ucq_object *uobj; 378 struct ib_ucq_object *uobj;
308 struct ib_uverbs_event *entry; 379 struct ib_uverbs_event *entry;
309 unsigned long flags; 380 unsigned long flags;
381
382 if (!file)
383 return;
384
385 spin_lock_irqsave(&file->lock, flags);
386 if (!file->file) {
387 spin_unlock_irqrestore(&file->lock, flags);
388 return;
389 }
310 390
311 entry = kmalloc(sizeof *entry, GFP_ATOMIC); 391 entry = kmalloc(sizeof *entry, GFP_ATOMIC);
312 if (!entry) 392 if (!entry) {
393 spin_unlock_irqrestore(&file->lock, flags);
313 return; 394 return;
395 }
314 396
315 uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); 397 uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
316 398
317 entry->desc.comp.cq_handle = cq->uobject->user_handle; 399 entry->desc.comp.cq_handle = cq->uobject->user_handle;
318 entry->counter = &uobj->comp_events_reported; 400 entry->counter = &uobj->comp_events_reported;
319 401
320 spin_lock_irqsave(&file->comp_file[0].lock, flags); 402 list_add_tail(&entry->list, &file->event_list);
321 list_add_tail(&entry->list, &file->comp_file[0].event_list);
322 list_add_tail(&entry->obj_list, &uobj->comp_list); 403 list_add_tail(&entry->obj_list, &uobj->comp_list);
323 spin_unlock_irqrestore(&file->comp_file[0].lock, flags); 404 spin_unlock_irqrestore(&file->lock, flags);
324 405
325 wake_up_interruptible(&file->comp_file[0].poll_wait); 406 wake_up_interruptible(&file->poll_wait);
326 kill_fasync(&file->comp_file[0].async_queue, SIGIO, POLL_IN); 407 kill_fasync(&file->async_queue, SIGIO, POLL_IN);
327} 408}
328 409
329static void ib_uverbs_async_handler(struct ib_uverbs_file *file, 410static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
@@ -334,32 +415,40 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
334 struct ib_uverbs_event *entry; 415 struct ib_uverbs_event *entry;
335 unsigned long flags; 416 unsigned long flags;
336 417
418 spin_lock_irqsave(&file->async_file->lock, flags);
419 if (!file->async_file->file) {
420 spin_unlock_irqrestore(&file->async_file->lock, flags);
421 return;
422 }
423
337 entry = kmalloc(sizeof *entry, GFP_ATOMIC); 424 entry = kmalloc(sizeof *entry, GFP_ATOMIC);
338 if (!entry) 425 if (!entry) {
426 spin_unlock_irqrestore(&file->async_file->lock, flags);
339 return; 427 return;
428 }
340 429
341 entry->desc.async.element = element; 430 entry->desc.async.element = element;
342 entry->desc.async.event_type = event; 431 entry->desc.async.event_type = event;
343 entry->counter = counter; 432 entry->counter = counter;
344 433
345 spin_lock_irqsave(&file->async_file.lock, flags); 434 list_add_tail(&entry->list, &file->async_file->event_list);
346 list_add_tail(&entry->list, &file->async_file.event_list);
347 if (obj_list) 435 if (obj_list)
348 list_add_tail(&entry->obj_list, obj_list); 436 list_add_tail(&entry->obj_list, obj_list);
349 spin_unlock_irqrestore(&file->async_file.lock, flags); 437 spin_unlock_irqrestore(&file->async_file->lock, flags);
350 438
351 wake_up_interruptible(&file->async_file.poll_wait); 439 wake_up_interruptible(&file->async_file->poll_wait);
352 kill_fasync(&file->async_file.async_queue, SIGIO, POLL_IN); 440 kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
353} 441}
354 442
355void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) 443void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
356{ 444{
445 struct ib_uverbs_event_file *ev_file = context_ptr;
357 struct ib_ucq_object *uobj; 446 struct ib_ucq_object *uobj;
358 447
359 uobj = container_of(event->element.cq->uobject, 448 uobj = container_of(event->element.cq->uobject,
360 struct ib_ucq_object, uobject); 449 struct ib_ucq_object, uobject);
361 450
362 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, 451 ib_uverbs_async_handler(ev_file->uverbs_file, uobj->uobject.user_handle,
363 event->event, &uobj->async_list, 452 event->event, &uobj->async_list,
364 &uobj->async_events_reported); 453 &uobj->async_events_reported);
365 454
@@ -389,8 +478,8 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
389 &uobj->events_reported); 478 &uobj->events_reported);
390} 479}
391 480
392static void ib_uverbs_event_handler(struct ib_event_handler *handler, 481void ib_uverbs_event_handler(struct ib_event_handler *handler,
393 struct ib_event *event) 482 struct ib_event *event)
394{ 483{
395 struct ib_uverbs_file *file = 484 struct ib_uverbs_file *file =
396 container_of(handler, struct ib_uverbs_file, event_handler); 485 container_of(handler, struct ib_uverbs_file, event_handler);
@@ -399,38 +488,90 @@ static void ib_uverbs_event_handler(struct ib_event_handler *handler,
399 NULL, NULL); 488 NULL, NULL);
400} 489}
401 490
402static int ib_uverbs_event_init(struct ib_uverbs_event_file *file, 491struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
403 struct ib_uverbs_file *uverbs_file) 492 int is_async, int *fd)
404{ 493{
494 struct ib_uverbs_event_file *ev_file;
405 struct file *filp; 495 struct file *filp;
496 int ret;
406 497
407 spin_lock_init(&file->lock); 498 ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL);
408 INIT_LIST_HEAD(&file->event_list); 499 if (!ev_file)
409 init_waitqueue_head(&file->poll_wait); 500 return ERR_PTR(-ENOMEM);
410 file->uverbs_file = uverbs_file; 501
411 file->async_queue = NULL; 502 kref_init(&ev_file->ref);
412 503 spin_lock_init(&ev_file->lock);
413 file->fd = get_unused_fd(); 504 INIT_LIST_HEAD(&ev_file->event_list);
414 if (file->fd < 0) 505 init_waitqueue_head(&ev_file->poll_wait);
415 return file->fd; 506 ev_file->uverbs_file = uverbs_file;
507 ev_file->async_queue = NULL;
508 ev_file->is_async = is_async;
509
510 *fd = get_unused_fd();
511 if (*fd < 0) {
512 ret = *fd;
513 goto err;
514 }
416 515
417 filp = get_empty_filp(); 516 filp = get_empty_filp();
418 if (!filp) { 517 if (!filp) {
419 put_unused_fd(file->fd); 518 ret = -ENFILE;
420 return -ENFILE; 519 goto err_fd;
421 } 520 }
422 521
423 filp->f_op = &uverbs_event_fops; 522 ev_file->file = filp;
523
524 /*
525 * fops_get() can't fail here, because we're coming from a
526 * system call on a uverbs file, which will already have a
527 * module reference.
528 */
529 filp->f_op = fops_get(&uverbs_event_fops);
424 filp->f_vfsmnt = mntget(uverbs_event_mnt); 530 filp->f_vfsmnt = mntget(uverbs_event_mnt);
425 filp->f_dentry = dget(uverbs_event_mnt->mnt_root); 531 filp->f_dentry = dget(uverbs_event_mnt->mnt_root);
426 filp->f_mapping = filp->f_dentry->d_inode->i_mapping; 532 filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
427 filp->f_flags = O_RDONLY; 533 filp->f_flags = O_RDONLY;
428 filp->f_mode = FMODE_READ; 534 filp->f_mode = FMODE_READ;
429 filp->private_data = file; 535 filp->private_data = ev_file;
430 536
431 fd_install(file->fd, filp); 537 return filp;
432 538
433 return 0; 539err_fd:
540 put_unused_fd(*fd);
541
542err:
543 kfree(ev_file);
544 return ERR_PTR(ret);
545}
546
547/*
548 * Look up a completion event file by FD. If lookup is successful,
549 * takes a ref to the event file struct that it returns; if
550 * unsuccessful, returns NULL.
551 */
552struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
553{
554 struct ib_uverbs_event_file *ev_file = NULL;
555 struct file *filp;
556
557 filp = fget(fd);
558 if (!filp)
559 return NULL;
560
561 if (filp->f_op != &uverbs_event_fops)
562 goto out;
563
564 ev_file = filp->private_data;
565 if (ev_file->is_async) {
566 ev_file = NULL;
567 goto out;
568 }
569
570 kref_get(&ev_file->ref);
571
572out:
573 fput(filp);
574 return ev_file;
434} 575}
435 576
436static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, 577static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
@@ -450,11 +591,11 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
450 591
451 if (hdr.command < 0 || 592 if (hdr.command < 0 ||
452 hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || 593 hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
453 !uverbs_cmd_table[hdr.command]) 594 !uverbs_cmd_table[hdr.command] ||
595 !(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
454 return -EINVAL; 596 return -EINVAL;
455 597
456 if (!file->ucontext && 598 if (!file->ucontext &&
457 hdr.command != IB_USER_VERBS_CMD_QUERY_PARAMS &&
458 hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT) 599 hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
459 return -EINVAL; 600 return -EINVAL;
460 601
@@ -474,84 +615,57 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
474 615
475static int ib_uverbs_open(struct inode *inode, struct file *filp) 616static int ib_uverbs_open(struct inode *inode, struct file *filp)
476{ 617{
477 struct ib_uverbs_device *dev = 618 struct ib_uverbs_device *dev;
478 container_of(inode->i_cdev, struct ib_uverbs_device, dev);
479 struct ib_uverbs_file *file; 619 struct ib_uverbs_file *file;
480 int i = 0;
481 int ret; 620 int ret;
482 621
483 if (!try_module_get(dev->ib_dev->owner)) 622 spin_lock(&map_lock);
484 return -ENODEV; 623 dev = dev_table[iminor(inode) - IB_UVERBS_BASE_MINOR];
624 if (dev)
625 kref_get(&dev->ref);
626 spin_unlock(&map_lock);
627
628 if (!dev)
629 return -ENXIO;
630
631 if (!try_module_get(dev->ib_dev->owner)) {
632 ret = -ENODEV;
633 goto err;
634 }
485 635
486 file = kmalloc(sizeof *file + 636 file = kmalloc(sizeof *file, GFP_KERNEL);
487 (dev->num_comp - 1) * sizeof (struct ib_uverbs_event_file),
488 GFP_KERNEL);
489 if (!file) { 637 if (!file) {
490 ret = -ENOMEM; 638 ret = -ENOMEM;
491 goto err; 639 goto err_module;
492 } 640 }
493 641
494 file->device = dev; 642 file->device = dev;
643 file->ucontext = NULL;
644 file->async_file = NULL;
495 kref_init(&file->ref); 645 kref_init(&file->ref);
496 init_MUTEX(&file->mutex); 646 init_MUTEX(&file->mutex);
497 647
498 file->ucontext = NULL;
499
500 kref_get(&file->ref);
501 ret = ib_uverbs_event_init(&file->async_file, file);
502 if (ret)
503 goto err_kref;
504
505 file->async_file.is_async = 1;
506
507 for (i = 0; i < dev->num_comp; ++i) {
508 kref_get(&file->ref);
509 ret = ib_uverbs_event_init(&file->comp_file[i], file);
510 if (ret)
511 goto err_async;
512 file->comp_file[i].is_async = 0;
513 }
514
515
516 filp->private_data = file; 648 filp->private_data = file;
517 649
518 INIT_IB_EVENT_HANDLER(&file->event_handler, dev->ib_dev,
519 ib_uverbs_event_handler);
520 if (ib_register_event_handler(&file->event_handler))
521 goto err_async;
522
523 return 0; 650 return 0;
524 651
525err_async: 652err_module:
526 while (i--) 653 module_put(dev->ib_dev->owner);
527 ib_uverbs_event_release(&file->comp_file[i]);
528
529 ib_uverbs_event_release(&file->async_file);
530
531err_kref:
532 /*
533 * One extra kref_put() because we took a reference before the
534 * event file creation that failed and got us here.
535 */
536 kref_put(&file->ref, ib_uverbs_release_file);
537 kref_put(&file->ref, ib_uverbs_release_file);
538 654
539err: 655err:
540 module_put(dev->ib_dev->owner); 656 kref_put(&dev->ref, ib_uverbs_release_dev);
657
541 return ret; 658 return ret;
542} 659}
543 660
544static int ib_uverbs_close(struct inode *inode, struct file *filp) 661static int ib_uverbs_close(struct inode *inode, struct file *filp)
545{ 662{
546 struct ib_uverbs_file *file = filp->private_data; 663 struct ib_uverbs_file *file = filp->private_data;
547 int i;
548 664
549 ib_unregister_event_handler(&file->event_handler); 665 ib_uverbs_cleanup_ucontext(file, file->ucontext);
550 ib_uverbs_event_release(&file->async_file);
551 ib_dealloc_ucontext(file->ucontext);
552 666
553 for (i = 0; i < file->device->num_comp; ++i) 667 if (file->async_file)
554 ib_uverbs_event_release(&file->comp_file[i]); 668 kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
555 669
556 kref_put(&file->ref, ib_uverbs_release_file); 670 kref_put(&file->ref, ib_uverbs_release_file);
557 671
@@ -581,27 +695,25 @@ static struct ib_client uverbs_client = {
581 695
582static ssize_t show_ibdev(struct class_device *class_dev, char *buf) 696static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
583{ 697{
584 struct ib_uverbs_device *dev = 698 struct ib_uverbs_device *dev = class_get_devdata(class_dev);
585 container_of(class_dev, struct ib_uverbs_device, class_dev); 699
700 if (!dev)
701 return -ENODEV;
586 702
587 return sprintf(buf, "%s\n", dev->ib_dev->name); 703 return sprintf(buf, "%s\n", dev->ib_dev->name);
588} 704}
589static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); 705static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
590 706
591static void ib_uverbs_release_class_dev(struct class_device *class_dev) 707static ssize_t show_dev_abi_version(struct class_device *class_dev, char *buf)
592{ 708{
593 struct ib_uverbs_device *dev = 709 struct ib_uverbs_device *dev = class_get_devdata(class_dev);
594 container_of(class_dev, struct ib_uverbs_device, class_dev);
595 710
596 cdev_del(&dev->dev); 711 if (!dev)
597 clear_bit(dev->devnum, dev_map); 712 return -ENODEV;
598 kfree(dev);
599}
600 713
601static struct class uverbs_class = { 714 return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver);
602 .name = "infiniband_verbs", 715}
603 .release = ib_uverbs_release_class_dev 716static CLASS_DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
604};
605 717
606static ssize_t show_abi_version(struct class *class, char *buf) 718static ssize_t show_abi_version(struct class *class, char *buf)
607{ 719{
@@ -622,6 +734,8 @@ static void ib_uverbs_add_one(struct ib_device *device)
622 734
623 memset(uverbs_dev, 0, sizeof *uverbs_dev); 735 memset(uverbs_dev, 0, sizeof *uverbs_dev);
624 736
737 kref_init(&uverbs_dev->ref);
738
625 spin_lock(&map_lock); 739 spin_lock(&map_lock);
626 uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); 740 uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
627 if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) { 741 if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) {
@@ -631,41 +745,48 @@ static void ib_uverbs_add_one(struct ib_device *device)
631 set_bit(uverbs_dev->devnum, dev_map); 745 set_bit(uverbs_dev->devnum, dev_map);
632 spin_unlock(&map_lock); 746 spin_unlock(&map_lock);
633 747
634 uverbs_dev->ib_dev = device; 748 uverbs_dev->ib_dev = device;
635 uverbs_dev->num_comp = 1; 749 uverbs_dev->num_comp_vectors = 1;
636 750
637 if (device->mmap) 751 uverbs_dev->dev = cdev_alloc();
638 cdev_init(&uverbs_dev->dev, &uverbs_mmap_fops); 752 if (!uverbs_dev->dev)
639 else
640 cdev_init(&uverbs_dev->dev, &uverbs_fops);
641 uverbs_dev->dev.owner = THIS_MODULE;
642 kobject_set_name(&uverbs_dev->dev.kobj, "uverbs%d", uverbs_dev->devnum);
643 if (cdev_add(&uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1))
644 goto err; 753 goto err;
754 uverbs_dev->dev->owner = THIS_MODULE;
755 uverbs_dev->dev->ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
756 kobject_set_name(&uverbs_dev->dev->kobj, "uverbs%d", uverbs_dev->devnum);
757 if (cdev_add(uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1))
758 goto err_cdev;
645 759
646 uverbs_dev->class_dev.class = &uverbs_class; 760 uverbs_dev->class_dev = class_device_create(uverbs_class, uverbs_dev->dev->dev,
647 uverbs_dev->class_dev.dev = device->dma_device; 761 device->dma_device,
648 uverbs_dev->class_dev.devt = uverbs_dev->dev.dev; 762 "uverbs%d", uverbs_dev->devnum);
649 snprintf(uverbs_dev->class_dev.class_id, BUS_ID_SIZE, "uverbs%d", uverbs_dev->devnum); 763 if (IS_ERR(uverbs_dev->class_dev))
650 if (class_device_register(&uverbs_dev->class_dev))
651 goto err_cdev; 764 goto err_cdev;
652 765
653 if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_ibdev)) 766 class_set_devdata(uverbs_dev->class_dev, uverbs_dev);
767
768 if (class_device_create_file(uverbs_dev->class_dev, &class_device_attr_ibdev))
654 goto err_class; 769 goto err_class;
770 if (class_device_create_file(uverbs_dev->class_dev, &class_device_attr_abi_version))
771 goto err_class;
772
773 spin_lock(&map_lock);
774 dev_table[uverbs_dev->devnum] = uverbs_dev;
775 spin_unlock(&map_lock);
655 776
656 ib_set_client_data(device, &uverbs_client, uverbs_dev); 777 ib_set_client_data(device, &uverbs_client, uverbs_dev);
657 778
658 return; 779 return;
659 780
660err_class: 781err_class:
661 class_device_unregister(&uverbs_dev->class_dev); 782 class_device_destroy(uverbs_class, uverbs_dev->dev->dev);
662 783
663err_cdev: 784err_cdev:
664 cdev_del(&uverbs_dev->dev); 785 cdev_del(uverbs_dev->dev);
665 clear_bit(uverbs_dev->devnum, dev_map); 786 clear_bit(uverbs_dev->devnum, dev_map);
666 787
667err: 788err:
668 kfree(uverbs_dev); 789 kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
669 return; 790 return;
670} 791}
671 792
@@ -676,7 +797,16 @@ static void ib_uverbs_remove_one(struct ib_device *device)
676 if (!uverbs_dev) 797 if (!uverbs_dev)
677 return; 798 return;
678 799
679 class_device_unregister(&uverbs_dev->class_dev); 800 class_set_devdata(uverbs_dev->class_dev, NULL);
801 class_device_destroy(uverbs_class, uverbs_dev->dev->dev);
802 cdev_del(uverbs_dev->dev);
803
804 spin_lock(&map_lock);
805 dev_table[uverbs_dev->devnum] = NULL;
806 spin_unlock(&map_lock);
807
808 clear_bit(uverbs_dev->devnum, dev_map);
809 kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
680} 810}
681 811
682static struct super_block *uverbs_event_get_sb(struct file_system_type *fs_type, int flags, 812static struct super_block *uverbs_event_get_sb(struct file_system_type *fs_type, int flags,
@@ -706,13 +836,14 @@ static int __init ib_uverbs_init(void)
706 goto out; 836 goto out;
707 } 837 }
708 838
709 ret = class_register(&uverbs_class); 839 uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
710 if (ret) { 840 if (IS_ERR(uverbs_class)) {
841 ret = PTR_ERR(uverbs_class);
711 printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n"); 842 printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n");
712 goto out_chrdev; 843 goto out_chrdev;
713 } 844 }
714 845
715 ret = class_create_file(&uverbs_class, &class_attr_abi_version); 846 ret = class_create_file(uverbs_class, &class_attr_abi_version);
716 if (ret) { 847 if (ret) {
717 printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n"); 848 printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
718 goto out_class; 849 goto out_class;
@@ -746,7 +877,7 @@ out_fs:
746 unregister_filesystem(&uverbs_event_fs); 877 unregister_filesystem(&uverbs_event_fs);
747 878
748out_class: 879out_class:
749 class_unregister(&uverbs_class); 880 class_destroy(uverbs_class);
750 881
751out_chrdev: 882out_chrdev:
752 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); 883 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
@@ -760,8 +891,15 @@ static void __exit ib_uverbs_cleanup(void)
760 ib_unregister_client(&uverbs_client); 891 ib_unregister_client(&uverbs_client);
761 mntput(uverbs_event_mnt); 892 mntput(uverbs_event_mnt);
762 unregister_filesystem(&uverbs_event_fs); 893 unregister_filesystem(&uverbs_event_fs);
763 class_unregister(&uverbs_class); 894 class_destroy(uverbs_class);
764 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); 895 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
896 idr_destroy(&ib_uverbs_pd_idr);
897 idr_destroy(&ib_uverbs_mr_idr);
898 idr_destroy(&ib_uverbs_mw_idr);
899 idr_destroy(&ib_uverbs_ah_idr);
900 idr_destroy(&ib_uverbs_cq_idr);
901 idr_destroy(&ib_uverbs_qp_idr);
902 idr_destroy(&ib_uverbs_srq_idr);
765} 903}
766 904
767module_init(ib_uverbs_init); 905module_init(ib_uverbs_init);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 5081d903e561..72d3ef786db5 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -523,16 +523,22 @@ EXPORT_SYMBOL(ib_dealloc_fmr);
523 523
524int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) 524int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
525{ 525{
526 return qp->device->attach_mcast ? 526 if (!qp->device->attach_mcast)
527 qp->device->attach_mcast(qp, gid, lid) : 527 return -ENOSYS;
528 -ENOSYS; 528 if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
529 return -EINVAL;
530
531 return qp->device->attach_mcast(qp, gid, lid);
529} 532}
530EXPORT_SYMBOL(ib_attach_mcast); 533EXPORT_SYMBOL(ib_attach_mcast);
531 534
532int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) 535int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
533{ 536{
534 return qp->device->detach_mcast ? 537 if (!qp->device->detach_mcast)
535 qp->device->detach_mcast(qp, gid, lid) : 538 return -ENOSYS;
536 -ENOSYS; 539 if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
540 return -EINVAL;
541
542 return qp->device->detach_mcast(qp, gid, lid);
537} 543}
538EXPORT_SYMBOL(ib_detach_mcast); 544EXPORT_SYMBOL(ib_detach_mcast);
diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile
index c44f7bae5424..47ec5a7cba0b 100644
--- a/drivers/infiniband/hw/mthca/Makefile
+++ b/drivers/infiniband/hw/mthca/Makefile
@@ -7,4 +7,5 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o
7ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \ 7ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \
8 mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \ 8 mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \
9 mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \ 9 mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \
10 mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o 10 mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o \
11 mthca_catas.o
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
new file mode 100644
index 000000000000..7ac52af43b99
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -0,0 +1,153 @@
1/*
2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id$
33 */
34
35#include "mthca_dev.h"
36
37enum {
38 MTHCA_CATAS_POLL_INTERVAL = 5 * HZ,
39
40 MTHCA_CATAS_TYPE_INTERNAL = 0,
41 MTHCA_CATAS_TYPE_UPLINK = 3,
42 MTHCA_CATAS_TYPE_DDR = 4,
43 MTHCA_CATAS_TYPE_PARITY = 5,
44};
45
46static DEFINE_SPINLOCK(catas_lock);
47
48static void handle_catas(struct mthca_dev *dev)
49{
50 struct ib_event event;
51 const char *type;
52 int i;
53
54 event.device = &dev->ib_dev;
55 event.event = IB_EVENT_DEVICE_FATAL;
56 event.element.port_num = 0;
57
58 ib_dispatch_event(&event);
59
60 switch (swab32(readl(dev->catas_err.map)) >> 24) {
61 case MTHCA_CATAS_TYPE_INTERNAL:
62 type = "internal error";
63 break;
64 case MTHCA_CATAS_TYPE_UPLINK:
65 type = "uplink bus error";
66 break;
67 case MTHCA_CATAS_TYPE_DDR:
68 type = "DDR data error";
69 break;
70 case MTHCA_CATAS_TYPE_PARITY:
71 type = "internal parity error";
72 break;
73 default:
74 type = "unknown error";
75 break;
76 }
77
78 mthca_err(dev, "Catastrophic error detected: %s\n", type);
79 for (i = 0; i < dev->catas_err.size; ++i)
80 mthca_err(dev, " buf[%02x]: %08x\n",
81 i, swab32(readl(dev->catas_err.map + i)));
82}
83
84static void poll_catas(unsigned long dev_ptr)
85{
86 struct mthca_dev *dev = (struct mthca_dev *) dev_ptr;
87 unsigned long flags;
88 int i;
89
90 for (i = 0; i < dev->catas_err.size; ++i)
91 if (readl(dev->catas_err.map + i)) {
92 handle_catas(dev);
93 return;
94 }
95
96 spin_lock_irqsave(&catas_lock, flags);
97 if (dev->catas_err.stop)
98 mod_timer(&dev->catas_err.timer,
99 jiffies + MTHCA_CATAS_POLL_INTERVAL);
100 spin_unlock_irqrestore(&catas_lock, flags);
101
102 return;
103}
104
105void mthca_start_catas_poll(struct mthca_dev *dev)
106{
107 unsigned long addr;
108
109 init_timer(&dev->catas_err.timer);
110 dev->catas_err.stop = 0;
111 dev->catas_err.map = NULL;
112
113 addr = pci_resource_start(dev->pdev, 0) +
114 ((pci_resource_len(dev->pdev, 0) - 1) &
115 dev->catas_err.addr);
116
117 if (!request_mem_region(addr, dev->catas_err.size * 4,
118 DRV_NAME)) {
119 mthca_warn(dev, "couldn't request catastrophic error region "
120 "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4);
121 return;
122 }
123
124 dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4);
125 if (!dev->catas_err.map) {
126 mthca_warn(dev, "couldn't map catastrophic error region "
127 "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4);
128 release_mem_region(addr, dev->catas_err.size * 4);
129 return;
130 }
131
132 dev->catas_err.timer.data = (unsigned long) dev;
133 dev->catas_err.timer.function = poll_catas;
134 dev->catas_err.timer.expires = jiffies + MTHCA_CATAS_POLL_INTERVAL;
135 add_timer(&dev->catas_err.timer);
136}
137
138void mthca_stop_catas_poll(struct mthca_dev *dev)
139{
140 spin_lock_irq(&catas_lock);
141 dev->catas_err.stop = 1;
142 spin_unlock_irq(&catas_lock);
143
144 del_timer_sync(&dev->catas_err.timer);
145
146 if (dev->catas_err.map) {
147 iounmap(dev->catas_err.map);
148 release_mem_region(pci_resource_start(dev->pdev, 0) +
149 ((pci_resource_len(dev->pdev, 0) - 1) &
150 dev->catas_err.addr),
151 dev->catas_err.size * 4);
152 }
153}
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 378646b5a1b8..49f211d55df7 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
4 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * 5 *
5 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 7 * licenses. You may choose to be licensed under the terms of the GNU
@@ -706,9 +707,13 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
706 707
707 MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET); 708 MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET);
708 dev->cmd.max_cmds = 1 << lg; 709 dev->cmd.max_cmds = 1 << lg;
710 MTHCA_GET(dev->catas_err.addr, outbox, QUERY_FW_ERR_START_OFFSET);
711 MTHCA_GET(dev->catas_err.size, outbox, QUERY_FW_ERR_SIZE_OFFSET);
709 712
710 mthca_dbg(dev, "FW version %012llx, max commands %d\n", 713 mthca_dbg(dev, "FW version %012llx, max commands %d\n",
711 (unsigned long long) dev->fw_ver, dev->cmd.max_cmds); 714 (unsigned long long) dev->fw_ver, dev->cmd.max_cmds);
715 mthca_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x\n",
716 (unsigned long long) dev->catas_err.addr, dev->catas_err.size);
712 717
713 if (mthca_is_memfree(dev)) { 718 if (mthca_is_memfree(dev)) {
714 MTHCA_GET(dev->fw.arbel.fw_pages, outbox, QUERY_FW_SIZE_OFFSET); 719 MTHCA_GET(dev->fw.arbel.fw_pages, outbox, QUERY_FW_SIZE_OFFSET);
@@ -933,9 +938,9 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
933 goto out; 938 goto out;
934 939
935 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET); 940 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET);
936 dev_lim->max_srq_sz = 1 << field; 941 dev_lim->max_srq_sz = (1 << field) - 1;
937 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET); 942 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET);
938 dev_lim->max_qp_sz = 1 << field; 943 dev_lim->max_qp_sz = (1 << field) - 1;
939 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET); 944 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET);
940 dev_lim->reserved_qps = 1 << (field & 0xf); 945 dev_lim->reserved_qps = 1 << (field & 0xf);
941 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET); 946 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET);
@@ -1045,6 +1050,8 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
1045 dev_lim->max_pds, dev_lim->reserved_pds, dev_lim->reserved_uars); 1050 dev_lim->max_pds, dev_lim->reserved_pds, dev_lim->reserved_uars);
1046 mthca_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n", 1051 mthca_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n",
1047 dev_lim->max_pds, dev_lim->reserved_mgms); 1052 dev_lim->max_pds, dev_lim->reserved_mgms);
1053 mthca_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n",
1054 dev_lim->max_cq_sz, dev_lim->max_qp_sz, dev_lim->max_srq_sz);
1048 1055
1049 mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags); 1056 mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags);
1050 1057
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 7bff5a8425f4..7e68bd4a3780 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -83,6 +83,8 @@ enum {
83 /* Arbel FW gives us these, but we need them for Tavor */ 83 /* Arbel FW gives us these, but we need them for Tavor */
84 MTHCA_MPT_ENTRY_SIZE = 0x40, 84 MTHCA_MPT_ENTRY_SIZE = 0x40,
85 MTHCA_MTT_SEG_SIZE = 0x40, 85 MTHCA_MTT_SEG_SIZE = 0x40,
86
87 MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2)
86}; 88};
87 89
88enum { 90enum {
@@ -128,12 +130,16 @@ struct mthca_limits {
128 int num_uars; 130 int num_uars;
129 int max_sg; 131 int max_sg;
130 int num_qps; 132 int num_qps;
133 int max_wqes;
134 int max_qp_init_rdma;
131 int reserved_qps; 135 int reserved_qps;
132 int num_srqs; 136 int num_srqs;
137 int max_srq_wqes;
133 int reserved_srqs; 138 int reserved_srqs;
134 int num_eecs; 139 int num_eecs;
135 int reserved_eecs; 140 int reserved_eecs;
136 int num_cqs; 141 int num_cqs;
142 int max_cqes;
137 int reserved_cqs; 143 int reserved_cqs;
138 int num_eqs; 144 int num_eqs;
139 int reserved_eqs; 145 int reserved_eqs;
@@ -148,6 +154,7 @@ struct mthca_limits {
148 int reserved_mcgs; 154 int reserved_mcgs;
149 int num_pds; 155 int num_pds;
150 int reserved_pds; 156 int reserved_pds;
157 u32 flags;
151 u8 port_width_cap; 158 u8 port_width_cap;
152}; 159};
153 160
@@ -251,6 +258,14 @@ struct mthca_mcg_table {
251 struct mthca_icm_table *table; 258 struct mthca_icm_table *table;
252}; 259};
253 260
261struct mthca_catas_err {
262 u64 addr;
263 u32 __iomem *map;
264 unsigned long stop;
265 u32 size;
266 struct timer_list timer;
267};
268
254struct mthca_dev { 269struct mthca_dev {
255 struct ib_device ib_dev; 270 struct ib_device ib_dev;
256 struct pci_dev *pdev; 271 struct pci_dev *pdev;
@@ -311,6 +326,8 @@ struct mthca_dev {
311 struct mthca_av_table av_table; 326 struct mthca_av_table av_table;
312 struct mthca_mcg_table mcg_table; 327 struct mthca_mcg_table mcg_table;
313 328
329 struct mthca_catas_err catas_err;
330
314 struct mthca_uar driver_uar; 331 struct mthca_uar driver_uar;
315 struct mthca_db_table *db_tab; 332 struct mthca_db_table *db_tab;
316 struct mthca_pd driver_pd; 333 struct mthca_pd driver_pd;
@@ -398,6 +415,9 @@ void mthca_cleanup_mcg_table(struct mthca_dev *dev);
398int mthca_register_device(struct mthca_dev *dev); 415int mthca_register_device(struct mthca_dev *dev);
399void mthca_unregister_device(struct mthca_dev *dev); 416void mthca_unregister_device(struct mthca_dev *dev);
400 417
418void mthca_start_catas_poll(struct mthca_dev *dev);
419void mthca_stop_catas_poll(struct mthca_dev *dev);
420
401int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar); 421int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
402void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar); 422void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
403 423
@@ -447,6 +467,8 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
447int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, 467int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
448 struct ib_srq_attr *attr, struct mthca_srq *srq); 468 struct ib_srq_attr *attr, struct mthca_srq *srq);
449void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); 469void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
470int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
471 enum ib_srq_attr_mask attr_mask);
450void mthca_srq_event(struct mthca_dev *dev, u32 srqn, 472void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
451 enum ib_event_type event_type); 473 enum ib_event_type event_type);
452void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); 474void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr);
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 8dfafda5ed24..e5a047a6dbeb 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -83,7 +83,8 @@ enum {
83 MTHCA_EVENT_TYPE_PATH_MIG = 0x01, 83 MTHCA_EVENT_TYPE_PATH_MIG = 0x01,
84 MTHCA_EVENT_TYPE_COMM_EST = 0x02, 84 MTHCA_EVENT_TYPE_COMM_EST = 0x02,
85 MTHCA_EVENT_TYPE_SQ_DRAINED = 0x03, 85 MTHCA_EVENT_TYPE_SQ_DRAINED = 0x03,
86 MTHCA_EVENT_TYPE_SRQ_LAST_WQE = 0x13, 86 MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13,
87 MTHCA_EVENT_TYPE_SRQ_LIMIT = 0x14,
87 MTHCA_EVENT_TYPE_CQ_ERROR = 0x04, 88 MTHCA_EVENT_TYPE_CQ_ERROR = 0x04,
88 MTHCA_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, 89 MTHCA_EVENT_TYPE_WQ_CATAS_ERROR = 0x05,
89 MTHCA_EVENT_TYPE_EEC_CATAS_ERROR = 0x06, 90 MTHCA_EVENT_TYPE_EEC_CATAS_ERROR = 0x06,
@@ -110,8 +111,9 @@ enum {
110 (1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR) | \ 111 (1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR) | \
111 (1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE) | \ 112 (1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE) | \
112 (1ULL << MTHCA_EVENT_TYPE_ECC_DETECT)) 113 (1ULL << MTHCA_EVENT_TYPE_ECC_DETECT))
113#define MTHCA_SRQ_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \ 114#define MTHCA_SRQ_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \
114 (1ULL << MTHCA_EVENT_TYPE_SRQ_LAST_WQE) 115 (1ULL << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE) | \
116 (1ULL << MTHCA_EVENT_TYPE_SRQ_LIMIT))
115#define MTHCA_CMD_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_CMD) 117#define MTHCA_CMD_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_CMD)
116 118
117#define MTHCA_EQ_DB_INC_CI (1 << 24) 119#define MTHCA_EQ_DB_INC_CI (1 << 24)
@@ -142,6 +144,9 @@ struct mthca_eqe {
142 __be32 qpn; 144 __be32 qpn;
143 } __attribute__((packed)) qp; 145 } __attribute__((packed)) qp;
144 struct { 146 struct {
147 __be32 srqn;
148 } __attribute__((packed)) srq;
149 struct {
145 __be32 cqn; 150 __be32 cqn;
146 u32 reserved1; 151 u32 reserved1;
147 u8 reserved2[3]; 152 u8 reserved2[3];
@@ -305,6 +310,16 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
305 IB_EVENT_SQ_DRAINED); 310 IB_EVENT_SQ_DRAINED);
306 break; 311 break;
307 312
313 case MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE:
314 mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
315 IB_EVENT_QP_LAST_WQE_REACHED);
316 break;
317
318 case MTHCA_EVENT_TYPE_SRQ_LIMIT:
319 mthca_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff,
320 IB_EVENT_SRQ_LIMIT_REACHED);
321 break;
322
308 case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR: 323 case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR:
309 mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, 324 mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
310 IB_EVENT_QP_FATAL); 325 IB_EVENT_QP_FATAL);
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 9804174f7f3c..8561b297a19b 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -46,11 +46,6 @@ enum {
46 MTHCA_VENDOR_CLASS2 = 0xa 46 MTHCA_VENDOR_CLASS2 = 0xa
47}; 47};
48 48
49struct mthca_trap_mad {
50 struct ib_mad *mad;
51 DECLARE_PCI_UNMAP_ADDR(mapping)
52};
53
54static void update_sm_ah(struct mthca_dev *dev, 49static void update_sm_ah(struct mthca_dev *dev,
55 u8 port_num, u16 lid, u8 sl) 50 u8 port_num, u16 lid, u8 sl)
56{ 51{
@@ -116,49 +111,14 @@ static void forward_trap(struct mthca_dev *dev,
116 struct ib_mad *mad) 111 struct ib_mad *mad)
117{ 112{
118 int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED; 113 int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
119 struct mthca_trap_mad *tmad; 114 struct ib_mad_send_buf *send_buf;
120 struct ib_sge gather_list;
121 struct ib_send_wr *bad_wr, wr = {
122 .opcode = IB_WR_SEND,
123 .sg_list = &gather_list,
124 .num_sge = 1,
125 .send_flags = IB_SEND_SIGNALED,
126 .wr = {
127 .ud = {
128 .remote_qpn = qpn,
129 .remote_qkey = qpn ? IB_QP1_QKEY : 0,
130 .timeout_ms = 0
131 }
132 }
133 };
134 struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn]; 115 struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
135 int ret; 116 int ret;
136 unsigned long flags; 117 unsigned long flags;
137 118
138 if (agent) { 119 if (agent) {
139 tmad = kmalloc(sizeof *tmad, GFP_KERNEL); 120 send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
140 if (!tmad) 121 IB_MGMT_MAD_DATA, GFP_ATOMIC);
141 return;
142
143 tmad->mad = kmalloc(sizeof *tmad->mad, GFP_KERNEL);
144 if (!tmad->mad) {
145 kfree(tmad);
146 return;
147 }
148
149 memcpy(tmad->mad, mad, sizeof *mad);
150
151 wr.wr.ud.mad_hdr = &tmad->mad->mad_hdr;
152 wr.wr_id = (unsigned long) tmad;
153
154 gather_list.addr = dma_map_single(agent->device->dma_device,
155 tmad->mad,
156 sizeof *tmad->mad,
157 DMA_TO_DEVICE);
158 gather_list.length = sizeof *tmad->mad;
159 gather_list.lkey = to_mpd(agent->qp->pd)->ntmr.ibmr.lkey;
160 pci_unmap_addr_set(tmad, mapping, gather_list.addr);
161
162 /* 122 /*
163 * We rely here on the fact that MLX QPs don't use the 123 * We rely here on the fact that MLX QPs don't use the
164 * address handle after the send is posted (this is 124 * address handle after the send is posted (this is
@@ -166,21 +126,15 @@ static void forward_trap(struct mthca_dev *dev,
166 * it's OK for our devices). 126 * it's OK for our devices).
167 */ 127 */
168 spin_lock_irqsave(&dev->sm_lock, flags); 128 spin_lock_irqsave(&dev->sm_lock, flags);
169 wr.wr.ud.ah = dev->sm_ah[port_num - 1]; 129 memcpy(send_buf->mad, mad, sizeof *mad);
170 if (wr.wr.ud.ah) 130 if ((send_buf->ah = dev->sm_ah[port_num - 1]))
171 ret = ib_post_send_mad(agent, &wr, &bad_wr); 131 ret = ib_post_send_mad(send_buf, NULL);
172 else 132 else
173 ret = -EINVAL; 133 ret = -EINVAL;
174 spin_unlock_irqrestore(&dev->sm_lock, flags); 134 spin_unlock_irqrestore(&dev->sm_lock, flags);
175 135
176 if (ret) { 136 if (ret)
177 dma_unmap_single(agent->device->dma_device, 137 ib_free_send_mad(send_buf);
178 pci_unmap_addr(tmad, mapping),
179 sizeof *tmad->mad,
180 DMA_TO_DEVICE);
181 kfree(tmad->mad);
182 kfree(tmad);
183 }
184 } 138 }
185} 139}
186 140
@@ -267,15 +221,7 @@ int mthca_process_mad(struct ib_device *ibdev,
267static void send_handler(struct ib_mad_agent *agent, 221static void send_handler(struct ib_mad_agent *agent,
268 struct ib_mad_send_wc *mad_send_wc) 222 struct ib_mad_send_wc *mad_send_wc)
269{ 223{
270 struct mthca_trap_mad *tmad = 224 ib_free_send_mad(mad_send_wc->send_buf);
271 (void *) (unsigned long) mad_send_wc->wr_id;
272
273 dma_unmap_single(agent->device->dma_device,
274 pci_unmap_addr(tmad, mapping),
275 sizeof *tmad->mad,
276 DMA_TO_DEVICE);
277 kfree(tmad->mad);
278 kfree(tmad);
279} 225}
280 226
281int mthca_create_agents(struct mthca_dev *dev) 227int mthca_create_agents(struct mthca_dev *dev)
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 23a3f56c7899..883d1e5a79bc 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -162,9 +162,18 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim
162 mdev->limits.pkey_table_len = dev_lim->max_pkeys; 162 mdev->limits.pkey_table_len = dev_lim->max_pkeys;
163 mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay; 163 mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay;
164 mdev->limits.max_sg = dev_lim->max_sg; 164 mdev->limits.max_sg = dev_lim->max_sg;
165 mdev->limits.max_wqes = dev_lim->max_qp_sz;
166 mdev->limits.max_qp_init_rdma = dev_lim->max_requester_per_qp;
165 mdev->limits.reserved_qps = dev_lim->reserved_qps; 167 mdev->limits.reserved_qps = dev_lim->reserved_qps;
168 mdev->limits.max_srq_wqes = dev_lim->max_srq_sz;
166 mdev->limits.reserved_srqs = dev_lim->reserved_srqs; 169 mdev->limits.reserved_srqs = dev_lim->reserved_srqs;
167 mdev->limits.reserved_eecs = dev_lim->reserved_eecs; 170 mdev->limits.reserved_eecs = dev_lim->reserved_eecs;
171 /*
172 * Subtract 1 from the limit because we need to allocate a
173 * spare CQE so the HCA HW can tell the difference between an
174 * empty CQ and a full CQ.
175 */
176 mdev->limits.max_cqes = dev_lim->max_cq_sz - 1;
168 mdev->limits.reserved_cqs = dev_lim->reserved_cqs; 177 mdev->limits.reserved_cqs = dev_lim->reserved_cqs;
169 mdev->limits.reserved_eqs = dev_lim->reserved_eqs; 178 mdev->limits.reserved_eqs = dev_lim->reserved_eqs;
170 mdev->limits.reserved_mtts = dev_lim->reserved_mtts; 179 mdev->limits.reserved_mtts = dev_lim->reserved_mtts;
@@ -172,6 +181,7 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim
172 mdev->limits.reserved_uars = dev_lim->reserved_uars; 181 mdev->limits.reserved_uars = dev_lim->reserved_uars;
173 mdev->limits.reserved_pds = dev_lim->reserved_pds; 182 mdev->limits.reserved_pds = dev_lim->reserved_pds;
174 mdev->limits.port_width_cap = dev_lim->max_port_width; 183 mdev->limits.port_width_cap = dev_lim->max_port_width;
184 mdev->limits.flags = dev_lim->flags;
175 185
176 /* IB_DEVICE_RESIZE_MAX_WR not supported by driver. 186 /* IB_DEVICE_RESIZE_MAX_WR not supported by driver.
177 May be doable since hardware supports it for SRQ. 187 May be doable since hardware supports it for SRQ.
@@ -1186,6 +1196,7 @@ MODULE_DEVICE_TABLE(pci, mthca_pci_table);
1186 1196
1187static struct pci_driver mthca_driver = { 1197static struct pci_driver mthca_driver = {
1188 .name = DRV_NAME, 1198 .name = DRV_NAME,
1199 .owner = THIS_MODULE,
1189 .id_table = mthca_pci_table, 1200 .id_table = mthca_pci_table,
1190 .probe = mthca_init_one, 1201 .probe = mthca_init_one,
1191 .remove = __devexit_p(mthca_remove_one) 1202 .remove = __devexit_p(mthca_remove_one)
diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c
index a2707605f4c8..b47ea7daf088 100644
--- a/drivers/infiniband/hw/mthca/mthca_mcg.c
+++ b/drivers/infiniband/hw/mthca/mthca_mcg.c
@@ -37,10 +37,6 @@
37#include "mthca_dev.h" 37#include "mthca_dev.h"
38#include "mthca_cmd.h" 38#include "mthca_cmd.h"
39 39
40enum {
41 MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2)
42};
43
44struct mthca_mgm { 40struct mthca_mgm {
45 __be32 next_gid_index; 41 __be32 next_gid_index;
46 u32 reserved[3]; 42 u32 reserved[3];
@@ -189,7 +185,12 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
189 } 185 }
190 186
191 for (i = 0; i < MTHCA_QP_PER_MGM; ++i) 187 for (i = 0; i < MTHCA_QP_PER_MGM; ++i)
192 if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) { 188 if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) {
189 mthca_dbg(dev, "QP %06x already a member of MGM\n",
190 ibqp->qp_num);
191 err = 0;
192 goto out;
193 } else if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) {
193 mgm->qp[i] = cpu_to_be32(ibqp->qp_num | (1 << 31)); 194 mgm->qp[i] = cpu_to_be32(ibqp->qp_num | (1 << 31));
194 break; 195 break;
195 } 196 }
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 9ad8b3b6cfef..d72fe95cba08 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -487,7 +487,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
487 } 487 }
488} 488}
489 489
490int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db) 490int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
491 u32 qn, __be32 **db)
491{ 492{
492 int group; 493 int group;
493 int start, end, dir; 494 int start, end, dir;
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
index 29433f295253..4fdca26eea85 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -173,7 +173,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
173 173
174int mthca_init_db_tab(struct mthca_dev *dev); 174int mthca_init_db_tab(struct mthca_dev *dev);
175void mthca_cleanup_db_tab(struct mthca_dev *dev); 175void mthca_cleanup_db_tab(struct mthca_dev *dev);
176int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db); 176int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
177 u32 qn, __be32 **db);
177void mthca_free_db(struct mthca_dev *dev, int type, int db_index); 178void mthca_free_db(struct mthca_dev *dev, int type, int db_index);
178 179
179#endif /* MTHCA_MEMFREE_H */ 180#endif /* MTHCA_MEMFREE_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 3f5319a46577..1b9477edbd7b 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -37,6 +37,7 @@
37 */ 37 */
38 38
39#include <rdma/ib_smi.h> 39#include <rdma/ib_smi.h>
40#include <rdma/ib_user_verbs.h>
40#include <linux/mm.h> 41#include <linux/mm.h>
41 42
42#include "mthca_dev.h" 43#include "mthca_dev.h"
@@ -90,15 +91,26 @@ static int mthca_query_device(struct ib_device *ibdev,
90 91
91 props->max_mr_size = ~0ull; 92 props->max_mr_size = ~0ull;
92 props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps; 93 props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps;
93 props->max_qp_wr = 0xffff; 94 props->max_qp_wr = mdev->limits.max_wqes;
94 props->max_sge = mdev->limits.max_sg; 95 props->max_sge = mdev->limits.max_sg;
95 props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs; 96 props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs;
96 props->max_cqe = 0xffff; 97 props->max_cqe = mdev->limits.max_cqes;
97 props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws; 98 props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws;
98 props->max_pd = mdev->limits.num_pds - mdev->limits.reserved_pds; 99 props->max_pd = mdev->limits.num_pds - mdev->limits.reserved_pds;
99 props->max_qp_rd_atom = 1 << mdev->qp_table.rdb_shift; 100 props->max_qp_rd_atom = 1 << mdev->qp_table.rdb_shift;
100 props->max_qp_init_rd_atom = 1 << mdev->qp_table.rdb_shift; 101 props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma;
102 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
103 props->max_srq = mdev->limits.num_srqs - mdev->limits.reserved_srqs;
104 props->max_srq_wr = mdev->limits.max_srq_wqes;
105 props->max_srq_sge = mdev->limits.max_sg;
101 props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay; 106 props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay;
107 props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ?
108 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
109 props->max_pkeys = mdev->limits.pkey_table_len;
110 props->max_mcast_grp = mdev->limits.num_mgms + mdev->limits.num_amgms;
111 props->max_mcast_qp_attach = MTHCA_QP_PER_MGM;
112 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
113 props->max_mcast_grp;
102 114
103 err = 0; 115 err = 0;
104 out: 116 out:
@@ -150,9 +162,13 @@ static int mthca_query_port(struct ib_device *ibdev,
150 props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len; 162 props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len;
151 props->max_msg_sz = 0x80000000; 163 props->max_msg_sz = 0x80000000;
152 props->pkey_tbl_len = to_mdev(ibdev)->limits.pkey_table_len; 164 props->pkey_tbl_len = to_mdev(ibdev)->limits.pkey_table_len;
165 props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
153 props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); 166 props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
154 props->active_width = out_mad->data[31] & 0xf; 167 props->active_width = out_mad->data[31] & 0xf;
155 props->active_speed = out_mad->data[35] >> 4; 168 props->active_speed = out_mad->data[35] >> 4;
169 props->max_mtu = out_mad->data[41] & 0xf;
170 props->active_mtu = out_mad->data[36] >> 4;
171 props->subnet_timeout = out_mad->data[51] & 0x1f;
156 172
157 out: 173 out:
158 kfree(in_mad); 174 kfree(in_mad);
@@ -634,6 +650,9 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
634 int nent; 650 int nent;
635 int err; 651 int err;
636 652
653 if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes)
654 return ERR_PTR(-EINVAL);
655
637 if (context) { 656 if (context) {
638 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) 657 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
639 return ERR_PTR(-EFAULT); 658 return ERR_PTR(-EFAULT);
@@ -1058,6 +1077,26 @@ int mthca_register_device(struct mthca_dev *dev)
1058 strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX); 1077 strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
1059 dev->ib_dev.owner = THIS_MODULE; 1078 dev->ib_dev.owner = THIS_MODULE;
1060 1079
1080 dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION;
1081 dev->ib_dev.uverbs_cmd_mask =
1082 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
1083 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
1084 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
1085 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
1086 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
1087 (1ull << IB_USER_VERBS_CMD_REG_MR) |
1088 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
1089 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
1090 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
1091 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
1092 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
1093 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
1094 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
1095 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
1096 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
1097 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
1098 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
1099 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
1061 dev->ib_dev.node_type = IB_NODE_CA; 1100 dev->ib_dev.node_type = IB_NODE_CA;
1062 dev->ib_dev.phys_port_cnt = dev->limits.num_ports; 1101 dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
1063 dev->ib_dev.dma_device = &dev->pdev->dev; 1102 dev->ib_dev.dma_device = &dev->pdev->dev;
@@ -1077,6 +1116,7 @@ int mthca_register_device(struct mthca_dev *dev)
1077 1116
1078 if (dev->mthca_flags & MTHCA_FLAG_SRQ) { 1117 if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
1079 dev->ib_dev.create_srq = mthca_create_srq; 1118 dev->ib_dev.create_srq = mthca_create_srq;
1119 dev->ib_dev.modify_srq = mthca_modify_srq;
1080 dev->ib_dev.destroy_srq = mthca_destroy_srq; 1120 dev->ib_dev.destroy_srq = mthca_destroy_srq;
1081 1121
1082 if (mthca_is_memfree(dev)) 1122 if (mthca_is_memfree(dev))
@@ -1135,10 +1175,13 @@ int mthca_register_device(struct mthca_dev *dev)
1135 } 1175 }
1136 } 1176 }
1137 1177
1178 mthca_start_catas_poll(dev);
1179
1138 return 0; 1180 return 0;
1139} 1181}
1140 1182
1141void mthca_unregister_device(struct mthca_dev *dev) 1183void mthca_unregister_device(struct mthca_dev *dev)
1142{ 1184{
1185 mthca_stop_catas_poll(dev);
1143 ib_unregister_device(&dev->ib_dev); 1186 ib_unregister_device(&dev->ib_dev);
1144} 1187}
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 5fa00669f9b8..62ff091505da 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -338,8 +338,7 @@ static const struct {
338 [UC] = (IB_QP_AV | 338 [UC] = (IB_QP_AV |
339 IB_QP_PATH_MTU | 339 IB_QP_PATH_MTU |
340 IB_QP_DEST_QPN | 340 IB_QP_DEST_QPN |
341 IB_QP_RQ_PSN | 341 IB_QP_RQ_PSN),
342 IB_QP_MAX_DEST_RD_ATOMIC),
343 [RC] = (IB_QP_AV | 342 [RC] = (IB_QP_AV |
344 IB_QP_PATH_MTU | 343 IB_QP_PATH_MTU |
345 IB_QP_DEST_QPN | 344 IB_QP_DEST_QPN |
@@ -368,8 +367,7 @@ static const struct {
368 .trans = MTHCA_TRANS_RTR2RTS, 367 .trans = MTHCA_TRANS_RTR2RTS,
369 .req_param = { 368 .req_param = {
370 [UD] = IB_QP_SQ_PSN, 369 [UD] = IB_QP_SQ_PSN,
371 [UC] = (IB_QP_SQ_PSN | 370 [UC] = IB_QP_SQ_PSN,
372 IB_QP_MAX_QP_RD_ATOMIC),
373 [RC] = (IB_QP_TIMEOUT | 371 [RC] = (IB_QP_TIMEOUT |
374 IB_QP_RETRY_CNT | 372 IB_QP_RETRY_CNT |
375 IB_QP_RNR_RETRY | 373 IB_QP_RNR_RETRY |
@@ -446,8 +444,6 @@ static const struct {
446 [UD] = (IB_QP_PKEY_INDEX | 444 [UD] = (IB_QP_PKEY_INDEX |
447 IB_QP_QKEY), 445 IB_QP_QKEY),
448 [UC] = (IB_QP_AV | 446 [UC] = (IB_QP_AV |
449 IB_QP_MAX_QP_RD_ATOMIC |
450 IB_QP_MAX_DEST_RD_ATOMIC |
451 IB_QP_CUR_STATE | 447 IB_QP_CUR_STATE |
452 IB_QP_ALT_PATH | 448 IB_QP_ALT_PATH |
453 IB_QP_ACCESS_FLAGS | 449 IB_QP_ACCESS_FLAGS |
@@ -478,7 +474,7 @@ static const struct {
478 .opt_param = { 474 .opt_param = {
479 [UD] = (IB_QP_CUR_STATE | 475 [UD] = (IB_QP_CUR_STATE |
480 IB_QP_QKEY), 476 IB_QP_QKEY),
481 [UC] = (IB_QP_CUR_STATE), 477 [UC] = IB_QP_CUR_STATE,
482 [RC] = (IB_QP_CUR_STATE | 478 [RC] = (IB_QP_CUR_STATE |
483 IB_QP_MIN_RNR_TIMER), 479 IB_QP_MIN_RNR_TIMER),
484 [MLX] = (IB_QP_CUR_STATE | 480 [MLX] = (IB_QP_CUR_STATE |
@@ -1112,8 +1108,10 @@ static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
1112 struct mthca_qp *qp) 1108 struct mthca_qp *qp)
1113{ 1109{
1114 /* Sanity check QP size before proceeding */ 1110 /* Sanity check QP size before proceeding */
1115 if (cap->max_send_wr > 65536 || cap->max_recv_wr > 65536 || 1111 if (cap->max_send_wr > dev->limits.max_wqes ||
1116 cap->max_send_sge > 64 || cap->max_recv_sge > 64) 1112 cap->max_recv_wr > dev->limits.max_wqes ||
1113 cap->max_send_sge > dev->limits.max_sg ||
1114 cap->max_recv_sge > dev->limits.max_sg)
1117 return -EINVAL; 1115 return -EINVAL;
1118 1116
1119 if (mthca_is_memfree(dev)) { 1117 if (mthca_is_memfree(dev)) {
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 18998d48c53e..64f70aa1b3c0 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -186,7 +186,8 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
186 int err; 186 int err;
187 187
188 /* Sanity check SRQ size before proceeding */ 188 /* Sanity check SRQ size before proceeding */
189 if (attr->max_wr > 16 << 20 || attr->max_sge > 64) 189 if (attr->max_wr > dev->limits.max_srq_wqes ||
190 attr->max_sge > dev->limits.max_sg)
190 return -EINVAL; 191 return -EINVAL;
191 192
192 srq->max = attr->max_wr; 193 srq->max = attr->max_wr;
@@ -332,6 +333,29 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
332 mthca_free_mailbox(dev, mailbox); 333 mthca_free_mailbox(dev, mailbox);
333} 334}
334 335
336int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
337 enum ib_srq_attr_mask attr_mask)
338{
339 struct mthca_dev *dev = to_mdev(ibsrq->device);
340 struct mthca_srq *srq = to_msrq(ibsrq);
341 int ret;
342 u8 status;
343
344 /* We don't support resizing SRQs (yet?) */
345 if (attr_mask & IB_SRQ_MAX_WR)
346 return -EINVAL;
347
348 if (attr_mask & IB_SRQ_LIMIT) {
349 ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status);
350 if (ret)
351 return ret;
352 if (status)
353 return -EINVAL;
354 }
355
356 return 0;
357}
358
335void mthca_srq_event(struct mthca_dev *dev, u32 srqn, 359void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
336 enum ib_event_type event_type) 360 enum ib_event_type event_type)
337{ 361{
@@ -354,7 +378,7 @@ void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
354 378
355 event.device = &dev->ib_dev; 379 event.device = &dev->ib_dev;
356 event.event = event_type; 380 event.event = event_type;
357 event.element.srq = &srq->ibsrq; 381 event.element.srq = &srq->ibsrq;
358 srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context); 382 srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context);
359 383
360out: 384out:
@@ -415,6 +439,14 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
415 439
416 wqe = get_wqe(srq, ind); 440 wqe = get_wqe(srq, ind);
417 next_ind = *wqe_to_link(wqe); 441 next_ind = *wqe_to_link(wqe);
442
443 if (next_ind < 0) {
444 mthca_err(dev, "SRQ %06x full\n", srq->srqn);
445 err = -ENOMEM;
446 *bad_wr = wr;
447 break;
448 }
449
418 prev_wqe = srq->last; 450 prev_wqe = srq->last;
419 srq->last = wqe; 451 srq->last = wqe;
420 452
@@ -506,6 +538,13 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
506 wqe = get_wqe(srq, ind); 538 wqe = get_wqe(srq, ind);
507 next_ind = *wqe_to_link(wqe); 539 next_ind = *wqe_to_link(wqe);
508 540
541 if (next_ind < 0) {
542 mthca_err(dev, "SRQ %06x full\n", srq->srqn);
543 err = -ENOMEM;
544 *bad_wr = wr;
545 break;
546 }
547
509 ((struct mthca_next_seg *) wqe)->nda_op = 548 ((struct mthca_next_seg *) wqe)->nda_op =
510 cpu_to_be32((next_ind << srq->wqe_shift) | 1); 549 cpu_to_be32((next_ind << srq->wqe_shift) | 1);
511 ((struct mthca_next_seg *) wqe)->ee_nds = 0; 550 ((struct mthca_next_seg *) wqe)->ee_nds = 0;
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h
index 41613ec8a04e..bb015c6494c4 100644
--- a/drivers/infiniband/hw/mthca/mthca_user.h
+++ b/drivers/infiniband/hw/mthca/mthca_user.h
@@ -38,6 +38,12 @@
38#include <linux/types.h> 38#include <linux/types.h>
39 39
40/* 40/*
41 * Increment this value if any changes that break userspace ABI
42 * compatibility are made.
43 */
44#define MTHCA_UVERBS_ABI_VERSION 1
45
46/*
41 * Make sure that all structs defined in this file remain laid out so 47 * Make sure that all structs defined in this file remain laid out so
42 * that they pack the same way on 32-bit and 64-bit architectures (to 48 * that they pack the same way on 32-bit and 64-bit architectures (to
43 * avoid incompatibility between 32-bit userspace and 64-bit kernels). 49 * avoid incompatibility between 32-bit userspace and 64-bit kernels).
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 4ea1c1ca85bc..c994a916a58a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -100,7 +100,12 @@ struct ipoib_pseudoheader {
100 100
101struct ipoib_mcast; 101struct ipoib_mcast;
102 102
103struct ipoib_buf { 103struct ipoib_rx_buf {
104 struct sk_buff *skb;
105 dma_addr_t mapping;
106};
107
108struct ipoib_tx_buf {
104 struct sk_buff *skb; 109 struct sk_buff *skb;
105 DECLARE_PCI_UNMAP_ADDR(mapping) 110 DECLARE_PCI_UNMAP_ADDR(mapping)
106}; 111};
@@ -150,14 +155,14 @@ struct ipoib_dev_priv {
150 unsigned int admin_mtu; 155 unsigned int admin_mtu;
151 unsigned int mcast_mtu; 156 unsigned int mcast_mtu;
152 157
153 struct ipoib_buf *rx_ring; 158 struct ipoib_rx_buf *rx_ring;
154 159
155 spinlock_t tx_lock; 160 spinlock_t tx_lock;
156 struct ipoib_buf *tx_ring; 161 struct ipoib_tx_buf *tx_ring;
157 unsigned tx_head; 162 unsigned tx_head;
158 unsigned tx_tail; 163 unsigned tx_tail;
159 struct ib_sge tx_sge; 164 struct ib_sge tx_sge;
160 struct ib_send_wr tx_wr; 165 struct ib_send_wr tx_wr;
161 166
162 struct ib_wc ibwc[IPOIB_NUM_WC]; 167 struct ib_wc ibwc[IPOIB_NUM_WC];
163 168
@@ -277,7 +282,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
277int ipoib_mcast_detach(struct net_device *dev, u16 mlid, 282int ipoib_mcast_detach(struct net_device *dev, u16 mlid,
278 union ib_gid *mgid); 283 union ib_gid *mgid);
279 284
280int ipoib_qp_create(struct net_device *dev); 285int ipoib_init_qp(struct net_device *dev);
281int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca); 286int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
282void ipoib_transport_dev_cleanup(struct net_device *dev); 287void ipoib_transport_dev_cleanup(struct net_device *dev);
283 288
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index f7440096b5ed..192fef884e21 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -95,57 +95,65 @@ void ipoib_free_ah(struct kref *kref)
95 } 95 }
96} 96}
97 97
98static inline int ipoib_ib_receive(struct ipoib_dev_priv *priv, 98static int ipoib_ib_post_receive(struct net_device *dev, int id)
99 unsigned int wr_id,
100 dma_addr_t addr)
101{ 99{
102 struct ib_sge list = { 100 struct ipoib_dev_priv *priv = netdev_priv(dev);
103 .addr = addr, 101 struct ib_sge list;
104 .length = IPOIB_BUF_SIZE, 102 struct ib_recv_wr param;
105 .lkey = priv->mr->lkey,
106 };
107 struct ib_recv_wr param = {
108 .wr_id = wr_id | IPOIB_OP_RECV,
109 .sg_list = &list,
110 .num_sge = 1,
111 };
112 struct ib_recv_wr *bad_wr; 103 struct ib_recv_wr *bad_wr;
104 int ret;
105
106 list.addr = priv->rx_ring[id].mapping;
107 list.length = IPOIB_BUF_SIZE;
108 list.lkey = priv->mr->lkey;
109
110 param.next = NULL;
111 param.wr_id = id | IPOIB_OP_RECV;
112 param.sg_list = &list;
113 param.num_sge = 1;
114
115 ret = ib_post_recv(priv->qp, &param, &bad_wr);
116 if (unlikely(ret)) {
117 ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
118 dma_unmap_single(priv->ca->dma_device,
119 priv->rx_ring[id].mapping,
120 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
121 dev_kfree_skb_any(priv->rx_ring[id].skb);
122 priv->rx_ring[id].skb = NULL;
123 }
113 124
114 return ib_post_recv(priv->qp, &param, &bad_wr); 125 return ret;
115} 126}
116 127
117static int ipoib_ib_post_receive(struct net_device *dev, int id) 128static int ipoib_alloc_rx_skb(struct net_device *dev, int id)
118{ 129{
119 struct ipoib_dev_priv *priv = netdev_priv(dev); 130 struct ipoib_dev_priv *priv = netdev_priv(dev);
120 struct sk_buff *skb; 131 struct sk_buff *skb;
121 dma_addr_t addr; 132 dma_addr_t addr;
122 int ret;
123 133
124 skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4); 134 skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4);
125 if (!skb) { 135 if (!skb)
126 ipoib_warn(priv, "failed to allocate receive buffer\n");
127
128 priv->rx_ring[id].skb = NULL;
129 return -ENOMEM; 136 return -ENOMEM;
130 } 137
131 skb_reserve(skb, 4); /* 16 byte align IP header */ 138 /*
132 priv->rx_ring[id].skb = skb; 139 * IB will leave a 40 byte gap for a GRH and IPoIB adds a 4 byte
140 * header. So we need 4 more bytes to get to 48 and align the
141 * IP header to a multiple of 16.
142 */
143 skb_reserve(skb, 4);
144
133 addr = dma_map_single(priv->ca->dma_device, 145 addr = dma_map_single(priv->ca->dma_device,
134 skb->data, IPOIB_BUF_SIZE, 146 skb->data, IPOIB_BUF_SIZE,
135 DMA_FROM_DEVICE); 147 DMA_FROM_DEVICE);
136 pci_unmap_addr_set(&priv->rx_ring[id], mapping, addr); 148 if (unlikely(dma_mapping_error(addr))) {
137
138 ret = ipoib_ib_receive(priv, id, addr);
139 if (ret) {
140 ipoib_warn(priv, "ipoib_ib_receive failed for buf %d (%d)\n",
141 id, ret);
142 dma_unmap_single(priv->ca->dma_device, addr,
143 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
144 dev_kfree_skb_any(skb); 149 dev_kfree_skb_any(skb);
145 priv->rx_ring[id].skb = NULL; 150 return -EIO;
146 } 151 }
147 152
148 return ret; 153 priv->rx_ring[id].skb = skb;
154 priv->rx_ring[id].mapping = addr;
155
156 return 0;
149} 157}
150 158
151static int ipoib_ib_post_receives(struct net_device *dev) 159static int ipoib_ib_post_receives(struct net_device *dev)
@@ -154,6 +162,10 @@ static int ipoib_ib_post_receives(struct net_device *dev)
154 int i; 162 int i;
155 163
156 for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) { 164 for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) {
165 if (ipoib_alloc_rx_skb(dev, i)) {
166 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
167 return -ENOMEM;
168 }
157 if (ipoib_ib_post_receive(dev, i)) { 169 if (ipoib_ib_post_receive(dev, i)) {
158 ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i); 170 ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
159 return -EIO; 171 return -EIO;
@@ -176,28 +188,36 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
176 wr_id &= ~IPOIB_OP_RECV; 188 wr_id &= ~IPOIB_OP_RECV;
177 189
178 if (wr_id < IPOIB_RX_RING_SIZE) { 190 if (wr_id < IPOIB_RX_RING_SIZE) {
179 struct sk_buff *skb = priv->rx_ring[wr_id].skb; 191 struct sk_buff *skb = priv->rx_ring[wr_id].skb;
180 192 dma_addr_t addr = priv->rx_ring[wr_id].mapping;
181 priv->rx_ring[wr_id].skb = NULL;
182 193
183 dma_unmap_single(priv->ca->dma_device, 194 if (unlikely(wc->status != IB_WC_SUCCESS)) {
184 pci_unmap_addr(&priv->rx_ring[wr_id],
185 mapping),
186 IPOIB_BUF_SIZE,
187 DMA_FROM_DEVICE);
188
189 if (wc->status != IB_WC_SUCCESS) {
190 if (wc->status != IB_WC_WR_FLUSH_ERR) 195 if (wc->status != IB_WC_WR_FLUSH_ERR)
191 ipoib_warn(priv, "failed recv event " 196 ipoib_warn(priv, "failed recv event "
192 "(status=%d, wrid=%d vend_err %x)\n", 197 "(status=%d, wrid=%d vend_err %x)\n",
193 wc->status, wr_id, wc->vendor_err); 198 wc->status, wr_id, wc->vendor_err);
199 dma_unmap_single(priv->ca->dma_device, addr,
200 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
194 dev_kfree_skb_any(skb); 201 dev_kfree_skb_any(skb);
202 priv->rx_ring[wr_id].skb = NULL;
195 return; 203 return;
196 } 204 }
197 205
206 /*
207 * If we can't allocate a new RX buffer, dump
208 * this packet and reuse the old buffer.
209 */
210 if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
211 ++priv->stats.rx_dropped;
212 goto repost;
213 }
214
198 ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", 215 ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
199 wc->byte_len, wc->slid); 216 wc->byte_len, wc->slid);
200 217
218 dma_unmap_single(priv->ca->dma_device, addr,
219 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
220
201 skb_put(skb, wc->byte_len); 221 skb_put(skb, wc->byte_len);
202 skb_pull(skb, IB_GRH_BYTES); 222 skb_pull(skb, IB_GRH_BYTES);
203 223
@@ -220,8 +240,8 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
220 dev_kfree_skb_any(skb); 240 dev_kfree_skb_any(skb);
221 } 241 }
222 242
223 /* repost receive */ 243 repost:
224 if (ipoib_ib_post_receive(dev, wr_id)) 244 if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
225 ipoib_warn(priv, "ipoib_ib_post_receive failed " 245 ipoib_warn(priv, "ipoib_ib_post_receive failed "
226 "for buf %d\n", wr_id); 246 "for buf %d\n", wr_id);
227 } else 247 } else
@@ -229,7 +249,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
229 wr_id); 249 wr_id);
230 250
231 } else { 251 } else {
232 struct ipoib_buf *tx_req; 252 struct ipoib_tx_buf *tx_req;
233 unsigned long flags; 253 unsigned long flags;
234 254
235 if (wr_id >= IPOIB_TX_RING_SIZE) { 255 if (wr_id >= IPOIB_TX_RING_SIZE) {
@@ -302,7 +322,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
302 struct ipoib_ah *address, u32 qpn) 322 struct ipoib_ah *address, u32 qpn)
303{ 323{
304 struct ipoib_dev_priv *priv = netdev_priv(dev); 324 struct ipoib_dev_priv *priv = netdev_priv(dev);
305 struct ipoib_buf *tx_req; 325 struct ipoib_tx_buf *tx_req;
306 dma_addr_t addr; 326 dma_addr_t addr;
307 327
308 if (skb->len > dev->mtu + INFINIBAND_ALEN) { 328 if (skb->len > dev->mtu + INFINIBAND_ALEN) {
@@ -387,9 +407,9 @@ int ipoib_ib_dev_open(struct net_device *dev)
387 struct ipoib_dev_priv *priv = netdev_priv(dev); 407 struct ipoib_dev_priv *priv = netdev_priv(dev);
388 int ret; 408 int ret;
389 409
390 ret = ipoib_qp_create(dev); 410 ret = ipoib_init_qp(dev);
391 if (ret) { 411 if (ret) {
392 ipoib_warn(priv, "ipoib_qp_create returned %d\n", ret); 412 ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
393 return -1; 413 return -1;
394 } 414 }
395 415
@@ -468,7 +488,7 @@ int ipoib_ib_dev_stop(struct net_device *dev)
468 struct ib_qp_attr qp_attr; 488 struct ib_qp_attr qp_attr;
469 int attr_mask; 489 int attr_mask;
470 unsigned long begin; 490 unsigned long begin;
471 struct ipoib_buf *tx_req; 491 struct ipoib_tx_buf *tx_req;
472 int i; 492 int i;
473 493
474 /* Kill the existing QP and allocate a new one */ 494 /* Kill the existing QP and allocate a new one */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 6c5bf07489f4..cd4f42328dbe 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -637,8 +637,11 @@ static void ipoib_timeout(struct net_device *dev)
637{ 637{
638 struct ipoib_dev_priv *priv = netdev_priv(dev); 638 struct ipoib_dev_priv *priv = netdev_priv(dev);
639 639
640 ipoib_warn(priv, "transmit timeout: latency %ld\n", 640 ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
641 jiffies - dev->trans_start); 641 jiffies_to_msecs(jiffies - dev->trans_start));
642 ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n",
643 netif_queue_stopped(dev),
644 priv->tx_head, priv->tx_tail);
642 /* XXX reset QP, etc. */ 645 /* XXX reset QP, etc. */
643} 646}
644 647
@@ -729,7 +732,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
729 732
730 /* Allocate RX/TX "rings" to hold queued skbs */ 733 /* Allocate RX/TX "rings" to hold queued skbs */
731 734
732 priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf), 735 priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf),
733 GFP_KERNEL); 736 GFP_KERNEL);
734 if (!priv->rx_ring) { 737 if (!priv->rx_ring) {
735 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", 738 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
@@ -737,9 +740,9 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
737 goto out; 740 goto out;
738 } 741 }
739 memset(priv->rx_ring, 0, 742 memset(priv->rx_ring, 0,
740 IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf)); 743 IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf));
741 744
742 priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf), 745 priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf),
743 GFP_KERNEL); 746 GFP_KERNEL);
744 if (!priv->tx_ring) { 747 if (!priv->tx_ring) {
745 printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", 748 printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
@@ -747,7 +750,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
747 goto out_rx_ring_cleanup; 750 goto out_rx_ring_cleanup;
748 } 751 }
749 memset(priv->tx_ring, 0, 752 memset(priv->tx_ring, 0,
750 IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf)); 753 IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf));
751 754
752 /* priv->tx_head & tx_tail are already 0 */ 755 /* priv->tx_head & tx_tail are already 0 */
753 756
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 79f59d0563ed..b5902a7ec240 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -92,7 +92,7 @@ int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
92 return ret; 92 return ret;
93} 93}
94 94
95int ipoib_qp_create(struct net_device *dev) 95int ipoib_init_qp(struct net_device *dev)
96{ 96{
97 struct ipoib_dev_priv *priv = netdev_priv(dev); 97 struct ipoib_dev_priv *priv = netdev_priv(dev);
98 int ret; 98 int ret;
@@ -149,10 +149,11 @@ int ipoib_qp_create(struct net_device *dev)
149 return 0; 149 return 0;
150 150
151out_fail: 151out_fail:
152 ib_destroy_qp(priv->qp); 152 qp_attr.qp_state = IB_QPS_RESET;
153 priv->qp = NULL; 153 if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
154 ipoib_warn(priv, "Failed to modify QP to RESET state\n");
154 155
155 return -EINVAL; 156 return ret;
156} 157}
157 158
158int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) 159int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)