aboutsummaryrefslogtreecommitdiffstats
path: root/net/rds/connection.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/rds/connection.c')
-rw-r--r--net/rds/connection.c157
1 files changed, 114 insertions, 43 deletions
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 7619b671ca28..870992e08cae 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -37,7 +37,6 @@
37 37
38#include "rds.h" 38#include "rds.h"
39#include "loop.h" 39#include "loop.h"
40#include "rdma.h"
41 40
42#define RDS_CONNECTION_HASH_BITS 12 41#define RDS_CONNECTION_HASH_BITS 12
43#define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS) 42#define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS)
@@ -63,18 +62,7 @@ static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr)
63 var |= RDS_INFO_CONNECTION_FLAG_##suffix; \ 62 var |= RDS_INFO_CONNECTION_FLAG_##suffix; \
64} while (0) 63} while (0)
65 64
66static inline int rds_conn_is_sending(struct rds_connection *conn) 65/* rcu read lock must be held or the connection spinlock */
67{
68 int ret = 0;
69
70 if (!mutex_trylock(&conn->c_send_lock))
71 ret = 1;
72 else
73 mutex_unlock(&conn->c_send_lock);
74
75 return ret;
76}
77
78static struct rds_connection *rds_conn_lookup(struct hlist_head *head, 66static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
79 __be32 laddr, __be32 faddr, 67 __be32 laddr, __be32 faddr,
80 struct rds_transport *trans) 68 struct rds_transport *trans)
@@ -82,7 +70,7 @@ static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
82 struct rds_connection *conn, *ret = NULL; 70 struct rds_connection *conn, *ret = NULL;
83 struct hlist_node *pos; 71 struct hlist_node *pos;
84 72
85 hlist_for_each_entry(conn, pos, head, c_hash_node) { 73 hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
86 if (conn->c_faddr == faddr && conn->c_laddr == laddr && 74 if (conn->c_faddr == faddr && conn->c_laddr == laddr &&
87 conn->c_trans == trans) { 75 conn->c_trans == trans) {
88 ret = conn; 76 ret = conn;
@@ -129,10 +117,11 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
129{ 117{
130 struct rds_connection *conn, *parent = NULL; 118 struct rds_connection *conn, *parent = NULL;
131 struct hlist_head *head = rds_conn_bucket(laddr, faddr); 119 struct hlist_head *head = rds_conn_bucket(laddr, faddr);
120 struct rds_transport *loop_trans;
132 unsigned long flags; 121 unsigned long flags;
133 int ret; 122 int ret;
134 123
135 spin_lock_irqsave(&rds_conn_lock, flags); 124 rcu_read_lock();
136 conn = rds_conn_lookup(head, laddr, faddr, trans); 125 conn = rds_conn_lookup(head, laddr, faddr, trans);
137 if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && 126 if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport &&
138 !is_outgoing) { 127 !is_outgoing) {
@@ -143,12 +132,12 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
143 parent = conn; 132 parent = conn;
144 conn = parent->c_passive; 133 conn = parent->c_passive;
145 } 134 }
146 spin_unlock_irqrestore(&rds_conn_lock, flags); 135 rcu_read_unlock();
147 if (conn) 136 if (conn)
148 goto out; 137 goto out;
149 138
150 conn = kmem_cache_zalloc(rds_conn_slab, gfp); 139 conn = kmem_cache_zalloc(rds_conn_slab, gfp);
151 if (conn == NULL) { 140 if (!conn) {
152 conn = ERR_PTR(-ENOMEM); 141 conn = ERR_PTR(-ENOMEM);
153 goto out; 142 goto out;
154 } 143 }
@@ -159,7 +148,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
159 spin_lock_init(&conn->c_lock); 148 spin_lock_init(&conn->c_lock);
160 conn->c_next_tx_seq = 1; 149 conn->c_next_tx_seq = 1;
161 150
162 mutex_init(&conn->c_send_lock); 151 init_waitqueue_head(&conn->c_waitq);
163 INIT_LIST_HEAD(&conn->c_send_queue); 152 INIT_LIST_HEAD(&conn->c_send_queue);
164 INIT_LIST_HEAD(&conn->c_retrans); 153 INIT_LIST_HEAD(&conn->c_retrans);
165 154
@@ -175,7 +164,9 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
175 * can bind to the destination address then we'd rather the messages 164 * can bind to the destination address then we'd rather the messages
176 * flow through loopback rather than either transport. 165 * flow through loopback rather than either transport.
177 */ 166 */
178 if (rds_trans_get_preferred(faddr)) { 167 loop_trans = rds_trans_get_preferred(faddr);
168 if (loop_trans) {
169 rds_trans_put(loop_trans);
179 conn->c_loopback = 1; 170 conn->c_loopback = 1;
180 if (is_outgoing && trans->t_prefer_loopback) { 171 if (is_outgoing && trans->t_prefer_loopback) {
181 /* "outgoing" connection - and the transport 172 /* "outgoing" connection - and the transport
@@ -238,7 +229,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
238 kmem_cache_free(rds_conn_slab, conn); 229 kmem_cache_free(rds_conn_slab, conn);
239 conn = found; 230 conn = found;
240 } else { 231 } else {
241 hlist_add_head(&conn->c_hash_node, head); 232 hlist_add_head_rcu(&conn->c_hash_node, head);
242 rds_cong_add_conn(conn); 233 rds_cong_add_conn(conn);
243 rds_conn_count++; 234 rds_conn_count++;
244 } 235 }
@@ -263,21 +254,91 @@ struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
263} 254}
264EXPORT_SYMBOL_GPL(rds_conn_create_outgoing); 255EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
265 256
257void rds_conn_shutdown(struct rds_connection *conn)
258{
259 /* shut it down unless it's down already */
260 if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
261 /*
262 * Quiesce the connection mgmt handlers before we start tearing
263 * things down. We don't hold the mutex for the entire
264 * duration of the shutdown operation, else we may be
265 * deadlocking with the CM handler. Instead, the CM event
266 * handler is supposed to check for state DISCONNECTING
267 */
268 mutex_lock(&conn->c_cm_lock);
269 if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING)
270 && !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) {
271 rds_conn_error(conn, "shutdown called in state %d\n",
272 atomic_read(&conn->c_state));
273 mutex_unlock(&conn->c_cm_lock);
274 return;
275 }
276 mutex_unlock(&conn->c_cm_lock);
277
278 wait_event(conn->c_waitq,
279 !test_bit(RDS_IN_XMIT, &conn->c_flags));
280
281 conn->c_trans->conn_shutdown(conn);
282 rds_conn_reset(conn);
283
284 if (!rds_conn_transition(conn, RDS_CONN_DISCONNECTING, RDS_CONN_DOWN)) {
285 /* This can happen - eg when we're in the middle of tearing
286 * down the connection, and someone unloads the rds module.
287 * Quite reproduceable with loopback connections.
288 * Mostly harmless.
289 */
290 rds_conn_error(conn,
291 "%s: failed to transition to state DOWN, "
292 "current state is %d\n",
293 __func__,
294 atomic_read(&conn->c_state));
295 return;
296 }
297 }
298
299 /* Then reconnect if it's still live.
300 * The passive side of an IB loopback connection is never added
301 * to the conn hash, so we never trigger a reconnect on this
302 * conn - the reconnect is always triggered by the active peer. */
303 cancel_delayed_work_sync(&conn->c_conn_w);
304 rcu_read_lock();
305 if (!hlist_unhashed(&conn->c_hash_node)) {
306 rcu_read_unlock();
307 rds_queue_reconnect(conn);
308 } else {
309 rcu_read_unlock();
310 }
311}
312
313/*
314 * Stop and free a connection.
315 *
316 * This can only be used in very limited circumstances. It assumes that once
317 * the conn has been shutdown that no one else is referencing the connection.
318 * We can only ensure this in the rmmod path in the current code.
319 */
266void rds_conn_destroy(struct rds_connection *conn) 320void rds_conn_destroy(struct rds_connection *conn)
267{ 321{
268 struct rds_message *rm, *rtmp; 322 struct rds_message *rm, *rtmp;
323 unsigned long flags;
269 324
270 rdsdebug("freeing conn %p for %pI4 -> " 325 rdsdebug("freeing conn %p for %pI4 -> "
271 "%pI4\n", conn, &conn->c_laddr, 326 "%pI4\n", conn, &conn->c_laddr,
272 &conn->c_faddr); 327 &conn->c_faddr);
273 328
274 hlist_del_init(&conn->c_hash_node); 329 /* Ensure conn will not be scheduled for reconnect */
330 spin_lock_irq(&rds_conn_lock);
331 hlist_del_init_rcu(&conn->c_hash_node);
332 spin_unlock_irq(&rds_conn_lock);
333 synchronize_rcu();
275 334
276 /* wait for the rds thread to shut it down */ 335 /* shut the connection down */
277 atomic_set(&conn->c_state, RDS_CONN_ERROR); 336 rds_conn_drop(conn);
278 cancel_delayed_work(&conn->c_conn_w); 337 flush_work(&conn->c_down_w);
279 queue_work(rds_wq, &conn->c_down_w); 338
280 flush_workqueue(rds_wq); 339 /* make sure lingering queued work won't try to ref the conn */
340 cancel_delayed_work_sync(&conn->c_send_w);
341 cancel_delayed_work_sync(&conn->c_recv_w);
281 342
282 /* tear down queued messages */ 343 /* tear down queued messages */
283 list_for_each_entry_safe(rm, rtmp, 344 list_for_each_entry_safe(rm, rtmp,
@@ -302,7 +363,9 @@ void rds_conn_destroy(struct rds_connection *conn)
302 BUG_ON(!list_empty(&conn->c_retrans)); 363 BUG_ON(!list_empty(&conn->c_retrans));
303 kmem_cache_free(rds_conn_slab, conn); 364 kmem_cache_free(rds_conn_slab, conn);
304 365
366 spin_lock_irqsave(&rds_conn_lock, flags);
305 rds_conn_count--; 367 rds_conn_count--;
368 spin_unlock_irqrestore(&rds_conn_lock, flags);
306} 369}
307EXPORT_SYMBOL_GPL(rds_conn_destroy); 370EXPORT_SYMBOL_GPL(rds_conn_destroy);
308 371
@@ -316,23 +379,23 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
316 struct list_head *list; 379 struct list_head *list;
317 struct rds_connection *conn; 380 struct rds_connection *conn;
318 struct rds_message *rm; 381 struct rds_message *rm;
319 unsigned long flags;
320 unsigned int total = 0; 382 unsigned int total = 0;
383 unsigned long flags;
321 size_t i; 384 size_t i;
322 385
323 len /= sizeof(struct rds_info_message); 386 len /= sizeof(struct rds_info_message);
324 387
325 spin_lock_irqsave(&rds_conn_lock, flags); 388 rcu_read_lock();
326 389
327 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); 390 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
328 i++, head++) { 391 i++, head++) {
329 hlist_for_each_entry(conn, pos, head, c_hash_node) { 392 hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
330 if (want_send) 393 if (want_send)
331 list = &conn->c_send_queue; 394 list = &conn->c_send_queue;
332 else 395 else
333 list = &conn->c_retrans; 396 list = &conn->c_retrans;
334 397
335 spin_lock(&conn->c_lock); 398 spin_lock_irqsave(&conn->c_lock, flags);
336 399
337 /* XXX too lazy to maintain counts.. */ 400 /* XXX too lazy to maintain counts.. */
338 list_for_each_entry(rm, list, m_conn_item) { 401 list_for_each_entry(rm, list, m_conn_item) {
@@ -343,11 +406,10 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
343 conn->c_faddr, 0); 406 conn->c_faddr, 0);
344 } 407 }
345 408
346 spin_unlock(&conn->c_lock); 409 spin_unlock_irqrestore(&conn->c_lock, flags);
347 } 410 }
348 } 411 }
349 412 rcu_read_unlock();
350 spin_unlock_irqrestore(&rds_conn_lock, flags);
351 413
352 lens->nr = total; 414 lens->nr = total;
353 lens->each = sizeof(struct rds_info_message); 415 lens->each = sizeof(struct rds_info_message);
@@ -377,19 +439,17 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
377 uint64_t buffer[(item_len + 7) / 8]; 439 uint64_t buffer[(item_len + 7) / 8];
378 struct hlist_head *head; 440 struct hlist_head *head;
379 struct hlist_node *pos; 441 struct hlist_node *pos;
380 struct hlist_node *tmp;
381 struct rds_connection *conn; 442 struct rds_connection *conn;
382 unsigned long flags;
383 size_t i; 443 size_t i;
384 444
385 spin_lock_irqsave(&rds_conn_lock, flags); 445 rcu_read_lock();
386 446
387 lens->nr = 0; 447 lens->nr = 0;
388 lens->each = item_len; 448 lens->each = item_len;
389 449
390 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); 450 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
391 i++, head++) { 451 i++, head++) {
392 hlist_for_each_entry_safe(conn, pos, tmp, head, c_hash_node) { 452 hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
393 453
394 /* XXX no c_lock usage.. */ 454 /* XXX no c_lock usage.. */
395 if (!visitor(conn, buffer)) 455 if (!visitor(conn, buffer))
@@ -405,8 +465,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
405 lens->nr++; 465 lens->nr++;
406 } 466 }
407 } 467 }
408 468 rcu_read_unlock();
409 spin_unlock_irqrestore(&rds_conn_lock, flags);
410} 469}
411EXPORT_SYMBOL_GPL(rds_for_each_conn_info); 470EXPORT_SYMBOL_GPL(rds_for_each_conn_info);
412 471
@@ -423,8 +482,8 @@ static int rds_conn_info_visitor(struct rds_connection *conn,
423 sizeof(cinfo->transport)); 482 sizeof(cinfo->transport));
424 cinfo->flags = 0; 483 cinfo->flags = 0;
425 484
426 rds_conn_info_set(cinfo->flags, 485 rds_conn_info_set(cinfo->flags, test_bit(RDS_IN_XMIT, &conn->c_flags),
427 rds_conn_is_sending(conn), SENDING); 486 SENDING);
428 /* XXX Future: return the state rather than these funky bits */ 487 /* XXX Future: return the state rather than these funky bits */
429 rds_conn_info_set(cinfo->flags, 488 rds_conn_info_set(cinfo->flags,
430 atomic_read(&conn->c_state) == RDS_CONN_CONNECTING, 489 atomic_read(&conn->c_state) == RDS_CONN_CONNECTING,
@@ -444,12 +503,12 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
444 sizeof(struct rds_info_connection)); 503 sizeof(struct rds_info_connection));
445} 504}
446 505
447int __init rds_conn_init(void) 506int rds_conn_init(void)
448{ 507{
449 rds_conn_slab = kmem_cache_create("rds_connection", 508 rds_conn_slab = kmem_cache_create("rds_connection",
450 sizeof(struct rds_connection), 509 sizeof(struct rds_connection),
451 0, 0, NULL); 510 0, 0, NULL);
452 if (rds_conn_slab == NULL) 511 if (!rds_conn_slab)
453 return -ENOMEM; 512 return -ENOMEM;
454 513
455 rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info); 514 rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info);
@@ -487,6 +546,18 @@ void rds_conn_drop(struct rds_connection *conn)
487EXPORT_SYMBOL_GPL(rds_conn_drop); 546EXPORT_SYMBOL_GPL(rds_conn_drop);
488 547
489/* 548/*
549 * If the connection is down, trigger a connect. We may have scheduled a
550 * delayed reconnect however - in this case we should not interfere.
551 */
552void rds_conn_connect_if_down(struct rds_connection *conn)
553{
554 if (rds_conn_state(conn) == RDS_CONN_DOWN &&
555 !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags))
556 queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
557}
558EXPORT_SYMBOL_GPL(rds_conn_connect_if_down);
559
560/*
490 * An error occurred on the connection 561 * An error occurred on the connection
491 */ 562 */
492void 563void