aboutsummaryrefslogtreecommitdiffstats
path: root/net/rds/iw_cm.c
diff options
context:
space:
mode:
authorAndy Grover <andy.grover@oracle.com>2009-02-24 10:30:36 -0500
committerDavid S. Miller <davem@davemloft.net>2009-02-27 02:39:33 -0500
commitfcd8b7c0ecf792dd824b2edcd63cb2c08563c340 (patch)
tree9ba10fe626cc9129f9138bc81b1fa61c74097e4f /net/rds/iw_cm.c
parente6babe4cc4ce48577d743cc0de19a214f2527956 (diff)
RDS: Add iWARP support
Support for iWARP NICs is implemented as a separate RDS transport from IB. The code, however, is very similar to IB (it was forked, basically.) so let's keep it in one changeset. The reason for this duplicationis that despite its similarity to IB, there are a number of places where it has different semantics. iwarp zcopy support is still under development, and giving it its own sandbox ensures that IB code isn't disrupted while iwarp changes. Over time these transports will re-converge. Signed-off-by: Andy Grover <andy.grover@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/rds/iw_cm.c')
-rw-r--r--net/rds/iw_cm.c750
1 files changed, 750 insertions, 0 deletions
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
new file mode 100644
index 000000000000..57ecb3d4b8a5
--- /dev/null
+++ b/net/rds/iw_cm.c
@@ -0,0 +1,750 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/vmalloc.h>
36
37#include "rds.h"
38#include "iw.h"
39
40/*
41 * Set the selected protocol version
42 */
43static void rds_iw_set_protocol(struct rds_connection *conn, unsigned int version)
44{
45 conn->c_version = version;
46}
47
48/*
49 * Set up flow control
50 */
51static void rds_iw_set_flow_control(struct rds_connection *conn, u32 credits)
52{
53 struct rds_iw_connection *ic = conn->c_transport_data;
54
55 if (rds_iw_sysctl_flow_control && credits != 0) {
56 /* We're doing flow control */
57 ic->i_flowctl = 1;
58 rds_iw_send_add_credits(conn, credits);
59 } else {
60 ic->i_flowctl = 0;
61 }
62}
63
64/*
65 * Connection established.
66 * We get here for both outgoing and incoming connection.
67 */
68void rds_iw_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event)
69{
70 const struct rds_iw_connect_private *dp = NULL;
71 struct rds_iw_connection *ic = conn->c_transport_data;
72 struct rds_iw_device *rds_iwdev;
73 int err;
74
75 if (event->param.conn.private_data_len) {
76 dp = event->param.conn.private_data;
77
78 rds_iw_set_protocol(conn,
79 RDS_PROTOCOL(dp->dp_protocol_major,
80 dp->dp_protocol_minor));
81 rds_iw_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
82 }
83
84 /* update ib_device with this local ipaddr & conn */
85 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
86 err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id);
87 if (err)
88 printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err);
89 err = rds_iw_add_conn(rds_iwdev, conn);
90 if (err)
91 printk(KERN_ERR "rds_iw_add_conn failed (%d)\n", err);
92
93 /* If the peer gave us the last packet it saw, process this as if
94 * we had received a regular ACK. */
95 if (dp && dp->dp_ack_seq)
96 rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);
97
98 printk(KERN_NOTICE "RDS/IW: connected to %pI4<->%pI4 version %u.%u%s\n",
99 &conn->c_laddr, &conn->c_faddr,
100 RDS_PROTOCOL_MAJOR(conn->c_version),
101 RDS_PROTOCOL_MINOR(conn->c_version),
102 ic->i_flowctl ? ", flow control" : "");
103
104 rds_connect_complete(conn);
105}
106
107static void rds_iw_cm_fill_conn_param(struct rds_connection *conn,
108 struct rdma_conn_param *conn_param,
109 struct rds_iw_connect_private *dp,
110 u32 protocol_version)
111{
112 struct rds_iw_connection *ic = conn->c_transport_data;
113
114 memset(conn_param, 0, sizeof(struct rdma_conn_param));
115 /* XXX tune these? */
116 conn_param->responder_resources = 1;
117 conn_param->initiator_depth = 1;
118
119 if (dp) {
120 memset(dp, 0, sizeof(*dp));
121 dp->dp_saddr = conn->c_laddr;
122 dp->dp_daddr = conn->c_faddr;
123 dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version);
124 dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version);
125 dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IW_SUPPORTED_PROTOCOLS);
126 dp->dp_ack_seq = rds_iw_piggyb_ack(ic);
127
128 /* Advertise flow control */
129 if (ic->i_flowctl) {
130 unsigned int credits;
131
132 credits = IB_GET_POST_CREDITS(atomic_read(&ic->i_credits));
133 dp->dp_credit = cpu_to_be32(credits);
134 atomic_sub(IB_SET_POST_CREDITS(credits), &ic->i_credits);
135 }
136
137 conn_param->private_data = dp;
138 conn_param->private_data_len = sizeof(*dp);
139 }
140}
141
142static void rds_iw_cq_event_handler(struct ib_event *event, void *data)
143{
144 rdsdebug("event %u data %p\n", event->event, data);
145}
146
147static void rds_iw_qp_event_handler(struct ib_event *event, void *data)
148{
149 struct rds_connection *conn = data;
150 struct rds_iw_connection *ic = conn->c_transport_data;
151
152 rdsdebug("conn %p ic %p event %u\n", conn, ic, event->event);
153
154 switch (event->event) {
155 case IB_EVENT_COMM_EST:
156 rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
157 break;
158 case IB_EVENT_QP_REQ_ERR:
159 case IB_EVENT_QP_FATAL:
160 default:
161 rds_iw_conn_error(conn, "RDS/IW: Fatal QP Event %u - connection %pI4->%pI4...reconnecting\n",
162 event->event, &conn->c_laddr,
163 &conn->c_faddr);
164 break;
165 }
166}
167
168/*
169 * Create a QP
170 */
171static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr,
172 struct rds_iw_device *rds_iwdev,
173 struct rds_iw_work_ring *send_ring,
174 void (*send_cq_handler)(struct ib_cq *, void *),
175 struct rds_iw_work_ring *recv_ring,
176 void (*recv_cq_handler)(struct ib_cq *, void *),
177 void *context)
178{
179 struct ib_device *dev = rds_iwdev->dev;
180 unsigned int send_size, recv_size;
181 int ret;
182
183 /* The offset of 1 is to accomodate the additional ACK WR. */
184 send_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_send_wr + 1);
185 recv_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_recv_wr + 1);
186 rds_iw_ring_resize(send_ring, send_size - 1);
187 rds_iw_ring_resize(recv_ring, recv_size - 1);
188
189 memset(attr, 0, sizeof(*attr));
190 attr->event_handler = rds_iw_qp_event_handler;
191 attr->qp_context = context;
192 attr->cap.max_send_wr = send_size;
193 attr->cap.max_recv_wr = recv_size;
194 attr->cap.max_send_sge = rds_iwdev->max_sge;
195 attr->cap.max_recv_sge = RDS_IW_RECV_SGE;
196 attr->sq_sig_type = IB_SIGNAL_REQ_WR;
197 attr->qp_type = IB_QPT_RC;
198
199 attr->send_cq = ib_create_cq(dev, send_cq_handler,
200 rds_iw_cq_event_handler,
201 context, send_size, 0);
202 if (IS_ERR(attr->send_cq)) {
203 ret = PTR_ERR(attr->send_cq);
204 attr->send_cq = NULL;
205 rdsdebug("ib_create_cq send failed: %d\n", ret);
206 goto out;
207 }
208
209 attr->recv_cq = ib_create_cq(dev, recv_cq_handler,
210 rds_iw_cq_event_handler,
211 context, recv_size, 0);
212 if (IS_ERR(attr->recv_cq)) {
213 ret = PTR_ERR(attr->recv_cq);
214 attr->recv_cq = NULL;
215 rdsdebug("ib_create_cq send failed: %d\n", ret);
216 goto out;
217 }
218
219 ret = ib_req_notify_cq(attr->send_cq, IB_CQ_NEXT_COMP);
220 if (ret) {
221 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
222 goto out;
223 }
224
225 ret = ib_req_notify_cq(attr->recv_cq, IB_CQ_SOLICITED);
226 if (ret) {
227 rdsdebug("ib_req_notify_cq recv failed: %d\n", ret);
228 goto out;
229 }
230
231out:
232 if (ret) {
233 if (attr->send_cq)
234 ib_destroy_cq(attr->send_cq);
235 if (attr->recv_cq)
236 ib_destroy_cq(attr->recv_cq);
237 }
238 return ret;
239}
240
241/*
242 * This needs to be very careful to not leave IS_ERR pointers around for
243 * cleanup to trip over.
244 */
245static int rds_iw_setup_qp(struct rds_connection *conn)
246{
247 struct rds_iw_connection *ic = conn->c_transport_data;
248 struct ib_device *dev = ic->i_cm_id->device;
249 struct ib_qp_init_attr attr;
250 struct rds_iw_device *rds_iwdev;
251 int ret;
252
253 /* rds_iw_add_one creates a rds_iw_device object per IB device,
254 * and allocates a protection domain, memory range and MR pool
255 * for each. If that fails for any reason, it will not register
256 * the rds_iwdev at all.
257 */
258 rds_iwdev = ib_get_client_data(dev, &rds_iw_client);
259 if (rds_iwdev == NULL) {
260 if (printk_ratelimit())
261 printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n",
262 dev->name);
263 return -EOPNOTSUPP;
264 }
265
266 /* Protection domain and memory range */
267 ic->i_pd = rds_iwdev->pd;
268 ic->i_mr = rds_iwdev->mr;
269
270 ret = rds_iw_init_qp_attrs(&attr, rds_iwdev,
271 &ic->i_send_ring, rds_iw_send_cq_comp_handler,
272 &ic->i_recv_ring, rds_iw_recv_cq_comp_handler,
273 conn);
274 if (ret < 0)
275 goto out;
276
277 ic->i_send_cq = attr.send_cq;
278 ic->i_recv_cq = attr.recv_cq;
279
280 /*
281 * XXX this can fail if max_*_wr is too large? Are we supposed
282 * to back off until we get a value that the hardware can support?
283 */
284 ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
285 if (ret) {
286 rdsdebug("rdma_create_qp failed: %d\n", ret);
287 goto out;
288 }
289
290 ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
291 ic->i_send_ring.w_nr *
292 sizeof(struct rds_header),
293 &ic->i_send_hdrs_dma, GFP_KERNEL);
294 if (ic->i_send_hdrs == NULL) {
295 ret = -ENOMEM;
296 rdsdebug("ib_dma_alloc_coherent send failed\n");
297 goto out;
298 }
299
300 ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
301 ic->i_recv_ring.w_nr *
302 sizeof(struct rds_header),
303 &ic->i_recv_hdrs_dma, GFP_KERNEL);
304 if (ic->i_recv_hdrs == NULL) {
305 ret = -ENOMEM;
306 rdsdebug("ib_dma_alloc_coherent recv failed\n");
307 goto out;
308 }
309
310 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
311 &ic->i_ack_dma, GFP_KERNEL);
312 if (ic->i_ack == NULL) {
313 ret = -ENOMEM;
314 rdsdebug("ib_dma_alloc_coherent ack failed\n");
315 goto out;
316 }
317
318 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work));
319 if (ic->i_sends == NULL) {
320 ret = -ENOMEM;
321 rdsdebug("send allocation failed\n");
322 goto out;
323 }
324 rds_iw_send_init_ring(ic);
325
326 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work));
327 if (ic->i_recvs == NULL) {
328 ret = -ENOMEM;
329 rdsdebug("recv allocation failed\n");
330 goto out;
331 }
332
333 rds_iw_recv_init_ring(ic);
334 rds_iw_recv_init_ack(ic);
335
336 /* Post receive buffers - as a side effect, this will update
337 * the posted credit count. */
338 rds_iw_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1);
339
340 rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr,
341 ic->i_send_cq, ic->i_recv_cq);
342
343out:
344 return ret;
345}
346
347static u32 rds_iw_protocol_compatible(const struct rds_iw_connect_private *dp)
348{
349 u16 common;
350 u32 version = 0;
351
352 /* rdma_cm private data is odd - when there is any private data in the
353 * request, we will be given a pretty large buffer without telling us the
354 * original size. The only way to tell the difference is by looking at
355 * the contents, which are initialized to zero.
356 * If the protocol version fields aren't set, this is a connection attempt
357 * from an older version. This could could be 3.0 or 2.0 - we can't tell.
358 * We really should have changed this for OFED 1.3 :-( */
359 if (dp->dp_protocol_major == 0)
360 return RDS_PROTOCOL_3_0;
361
362 common = be16_to_cpu(dp->dp_protocol_minor_mask) & RDS_IW_SUPPORTED_PROTOCOLS;
363 if (dp->dp_protocol_major == 3 && common) {
364 version = RDS_PROTOCOL_3_0;
365 while ((common >>= 1) != 0)
366 version++;
367 } else if (printk_ratelimit()) {
368 printk(KERN_NOTICE "RDS: Connection from %pI4 using "
369 "incompatible protocol version %u.%u\n",
370 &dp->dp_saddr,
371 dp->dp_protocol_major,
372 dp->dp_protocol_minor);
373 }
374 return version;
375}
376
377int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
378 struct rdma_cm_event *event)
379{
380 const struct rds_iw_connect_private *dp = event->param.conn.private_data;
381 struct rds_iw_connect_private dp_rep;
382 struct rds_connection *conn = NULL;
383 struct rds_iw_connection *ic = NULL;
384 struct rdma_conn_param conn_param;
385 struct rds_iw_device *rds_iwdev;
386 u32 version;
387 int err, destroy = 1;
388
389 /* Check whether the remote protocol version matches ours. */
390 version = rds_iw_protocol_compatible(dp);
391 if (!version)
392 goto out;
393
394 rdsdebug("saddr %pI4 daddr %pI4 RDSv%u.%u\n",
395 &dp->dp_saddr, &dp->dp_daddr,
396 RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version));
397
398 conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_iw_transport,
399 GFP_KERNEL);
400 if (IS_ERR(conn)) {
401 rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
402 conn = NULL;
403 goto out;
404 }
405
406 /*
407 * The connection request may occur while the
408 * previous connection exist, e.g. in case of failover.
409 * But as connections may be initiated simultaneously
410 * by both hosts, we have a random backoff mechanism -
411 * see the comment above rds_queue_reconnect()
412 */
413 mutex_lock(&conn->c_cm_lock);
414 if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
415 if (rds_conn_state(conn) == RDS_CONN_UP) {
416 rdsdebug("incoming connect while connecting\n");
417 rds_conn_drop(conn);
418 rds_iw_stats_inc(s_iw_listen_closed_stale);
419 } else
420 if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
421 /* Wait and see - our connect may still be succeeding */
422 rds_iw_stats_inc(s_iw_connect_raced);
423 }
424 mutex_unlock(&conn->c_cm_lock);
425 goto out;
426 }
427
428 ic = conn->c_transport_data;
429
430 rds_iw_set_protocol(conn, version);
431 rds_iw_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
432
433 /* If the peer gave us the last packet it saw, process this as if
434 * we had received a regular ACK. */
435 if (dp->dp_ack_seq)
436 rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);
437
438 BUG_ON(cm_id->context);
439 BUG_ON(ic->i_cm_id);
440
441 ic->i_cm_id = cm_id;
442 cm_id->context = conn;
443
444 rds_iwdev = ib_get_client_data(cm_id->device, &rds_iw_client);
445 ic->i_dma_local_lkey = rds_iwdev->dma_local_lkey;
446
447 /* We got halfway through setting up the ib_connection, if we
448 * fail now, we have to take the long route out of this mess. */
449 destroy = 0;
450
451 err = rds_iw_setup_qp(conn);
452 if (err) {
453 rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", err);
454 goto out;
455 }
456
457 rds_iw_cm_fill_conn_param(conn, &conn_param, &dp_rep, version);
458
459 /* rdma_accept() calls rdma_reject() internally if it fails */
460 err = rdma_accept(cm_id, &conn_param);
461 mutex_unlock(&conn->c_cm_lock);
462 if (err) {
463 rds_iw_conn_error(conn, "rdma_accept failed (%d)\n", err);
464 goto out;
465 }
466
467 return 0;
468
469out:
470 rdma_reject(cm_id, NULL, 0);
471 return destroy;
472}
473
474
475int rds_iw_cm_initiate_connect(struct rdma_cm_id *cm_id)
476{
477 struct rds_connection *conn = cm_id->context;
478 struct rds_iw_connection *ic = conn->c_transport_data;
479 struct rdma_conn_param conn_param;
480 struct rds_iw_connect_private dp;
481 int ret;
482
483 /* If the peer doesn't do protocol negotiation, we must
484 * default to RDSv3.0 */
485 rds_iw_set_protocol(conn, RDS_PROTOCOL_3_0);
486 ic->i_flowctl = rds_iw_sysctl_flow_control; /* advertise flow control */
487
488 ret = rds_iw_setup_qp(conn);
489 if (ret) {
490 rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", ret);
491 goto out;
492 }
493
494 rds_iw_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION);
495
496 ret = rdma_connect(cm_id, &conn_param);
497 if (ret)
498 rds_iw_conn_error(conn, "rdma_connect failed (%d)\n", ret);
499
500out:
501 /* Beware - returning non-zero tells the rdma_cm to destroy
502 * the cm_id. We should certainly not do it as long as we still
503 * "own" the cm_id. */
504 if (ret) {
505 struct rds_iw_connection *ic = conn->c_transport_data;
506
507 if (ic->i_cm_id == cm_id)
508 ret = 0;
509 }
510 return ret;
511}
512
513int rds_iw_conn_connect(struct rds_connection *conn)
514{
515 struct rds_iw_connection *ic = conn->c_transport_data;
516 struct rds_iw_device *rds_iwdev;
517 struct sockaddr_in src, dest;
518 int ret;
519
520 /* XXX I wonder what affect the port space has */
521 /* delegate cm event handler to rdma_transport */
522 ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
523 RDMA_PS_TCP);
524 if (IS_ERR(ic->i_cm_id)) {
525 ret = PTR_ERR(ic->i_cm_id);
526 ic->i_cm_id = NULL;
527 rdsdebug("rdma_create_id() failed: %d\n", ret);
528 goto out;
529 }
530
531 rdsdebug("created cm id %p for conn %p\n", ic->i_cm_id, conn);
532
533 src.sin_family = AF_INET;
534 src.sin_addr.s_addr = (__force u32)conn->c_laddr;
535 src.sin_port = (__force u16)htons(0);
536
537 /* First, bind to the local address and device. */
538 ret = rdma_bind_addr(ic->i_cm_id, (struct sockaddr *) &src);
539 if (ret) {
540 rdsdebug("rdma_bind_addr(%pI4) failed: %d\n",
541 &conn->c_laddr, ret);
542 rdma_destroy_id(ic->i_cm_id);
543 ic->i_cm_id = NULL;
544 goto out;
545 }
546
547 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
548 ic->i_dma_local_lkey = rds_iwdev->dma_local_lkey;
549
550 dest.sin_family = AF_INET;
551 dest.sin_addr.s_addr = (__force u32)conn->c_faddr;
552 dest.sin_port = (__force u16)htons(RDS_PORT);
553
554 ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src,
555 (struct sockaddr *)&dest,
556 RDS_RDMA_RESOLVE_TIMEOUT_MS);
557 if (ret) {
558 rdsdebug("addr resolve failed for cm id %p: %d\n", ic->i_cm_id,
559 ret);
560 rdma_destroy_id(ic->i_cm_id);
561 ic->i_cm_id = NULL;
562 }
563
564out:
565 return ret;
566}
567
568/*
569 * This is so careful about only cleaning up resources that were built up
570 * so that it can be called at any point during startup. In fact it
571 * can be called multiple times for a given connection.
572 */
573void rds_iw_conn_shutdown(struct rds_connection *conn)
574{
575 struct rds_iw_connection *ic = conn->c_transport_data;
576 int err = 0;
577 struct ib_qp_attr qp_attr;
578
579 rdsdebug("cm %p pd %p cq %p %p qp %p\n", ic->i_cm_id,
580 ic->i_pd, ic->i_send_cq, ic->i_recv_cq,
581 ic->i_cm_id ? ic->i_cm_id->qp : NULL);
582
583 if (ic->i_cm_id) {
584 struct ib_device *dev = ic->i_cm_id->device;
585
586 rdsdebug("disconnecting cm %p\n", ic->i_cm_id);
587 err = rdma_disconnect(ic->i_cm_id);
588 if (err) {
589 /* Actually this may happen quite frequently, when
590 * an outgoing connect raced with an incoming connect.
591 */
592 rdsdebug("rds_iw_conn_shutdown: failed to disconnect,"
593 " cm: %p err %d\n", ic->i_cm_id, err);
594 }
595
596 if (ic->i_cm_id->qp) {
597 qp_attr.qp_state = IB_QPS_ERR;
598 ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE);
599 }
600
601 wait_event(rds_iw_ring_empty_wait,
602 rds_iw_ring_empty(&ic->i_send_ring) &&
603 rds_iw_ring_empty(&ic->i_recv_ring));
604
605 if (ic->i_send_hdrs)
606 ib_dma_free_coherent(dev,
607 ic->i_send_ring.w_nr *
608 sizeof(struct rds_header),
609 ic->i_send_hdrs,
610 ic->i_send_hdrs_dma);
611
612 if (ic->i_recv_hdrs)
613 ib_dma_free_coherent(dev,
614 ic->i_recv_ring.w_nr *
615 sizeof(struct rds_header),
616 ic->i_recv_hdrs,
617 ic->i_recv_hdrs_dma);
618
619 if (ic->i_ack)
620 ib_dma_free_coherent(dev, sizeof(struct rds_header),
621 ic->i_ack, ic->i_ack_dma);
622
623 if (ic->i_sends)
624 rds_iw_send_clear_ring(ic);
625 if (ic->i_recvs)
626 rds_iw_recv_clear_ring(ic);
627
628 if (ic->i_cm_id->qp)
629 rdma_destroy_qp(ic->i_cm_id);
630 if (ic->i_send_cq)
631 ib_destroy_cq(ic->i_send_cq);
632 if (ic->i_recv_cq)
633 ib_destroy_cq(ic->i_recv_cq);
634
635 /*
636 * If associated with an rds_iw_device:
637 * Move connection back to the nodev list.
638 * Remove cm_id from the device cm_id list.
639 */
640 if (ic->rds_iwdev) {
641
642 spin_lock_irq(&ic->rds_iwdev->spinlock);
643 BUG_ON(list_empty(&ic->iw_node));
644 list_del(&ic->iw_node);
645 spin_unlock_irq(&ic->rds_iwdev->spinlock);
646
647 spin_lock_irq(&iw_nodev_conns_lock);
648 list_add_tail(&ic->iw_node, &iw_nodev_conns);
649 spin_unlock_irq(&iw_nodev_conns_lock);
650 rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id);
651 ic->rds_iwdev = NULL;
652 }
653
654 rdma_destroy_id(ic->i_cm_id);
655
656 ic->i_cm_id = NULL;
657 ic->i_pd = NULL;
658 ic->i_mr = NULL;
659 ic->i_send_cq = NULL;
660 ic->i_recv_cq = NULL;
661 ic->i_send_hdrs = NULL;
662 ic->i_recv_hdrs = NULL;
663 ic->i_ack = NULL;
664 }
665 BUG_ON(ic->rds_iwdev);
666
667 /* Clear pending transmit */
668 if (ic->i_rm) {
669 rds_message_put(ic->i_rm);
670 ic->i_rm = NULL;
671 }
672
673 /* Clear the ACK state */
674 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
675 rds_iw_set_64bit(&ic->i_ack_next, 0);
676 ic->i_ack_recv = 0;
677
678 /* Clear flow control state */
679 ic->i_flowctl = 0;
680 atomic_set(&ic->i_credits, 0);
681
682 rds_iw_ring_init(&ic->i_send_ring, rds_iw_sysctl_max_send_wr);
683 rds_iw_ring_init(&ic->i_recv_ring, rds_iw_sysctl_max_recv_wr);
684
685 if (ic->i_iwinc) {
686 rds_inc_put(&ic->i_iwinc->ii_inc);
687 ic->i_iwinc = NULL;
688 }
689
690 vfree(ic->i_sends);
691 ic->i_sends = NULL;
692 vfree(ic->i_recvs);
693 ic->i_recvs = NULL;
694 rdsdebug("shutdown complete\n");
695}
696
697int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
698{
699 struct rds_iw_connection *ic;
700 unsigned long flags;
701
702 /* XXX too lazy? */
703 ic = kzalloc(sizeof(struct rds_iw_connection), GFP_KERNEL);
704 if (ic == NULL)
705 return -ENOMEM;
706
707 INIT_LIST_HEAD(&ic->iw_node);
708 mutex_init(&ic->i_recv_mutex);
709
710 /*
711 * rds_iw_conn_shutdown() waits for these to be emptied so they
712 * must be initialized before it can be called.
713 */
714 rds_iw_ring_init(&ic->i_send_ring, rds_iw_sysctl_max_send_wr);
715 rds_iw_ring_init(&ic->i_recv_ring, rds_iw_sysctl_max_recv_wr);
716
717 ic->conn = conn;
718 conn->c_transport_data = ic;
719
720 spin_lock_irqsave(&iw_nodev_conns_lock, flags);
721 list_add_tail(&ic->iw_node, &iw_nodev_conns);
722 spin_unlock_irqrestore(&iw_nodev_conns_lock, flags);
723
724
725 rdsdebug("conn %p conn ic %p\n", conn, conn->c_transport_data);
726 return 0;
727}
728
729void rds_iw_conn_free(void *arg)
730{
731 struct rds_iw_connection *ic = arg;
732 rdsdebug("ic %p\n", ic);
733 list_del(&ic->iw_node);
734 kfree(ic);
735}
736
737/*
738 * An error occurred on the connection
739 */
740void
741__rds_iw_conn_error(struct rds_connection *conn, const char *fmt, ...)
742{
743 va_list ap;
744
745 rds_conn_drop(conn);
746
747 va_start(ap, fmt);
748 vprintk(fmt, ap);
749 va_end(ap);
750}