diff options
Diffstat (limited to 'net/rds/connection.c')
-rw-r--r-- | net/rds/connection.c | 157 |
1 files changed, 114 insertions, 43 deletions
diff --git a/net/rds/connection.c b/net/rds/connection.c index 7619b671ca28..870992e08cae 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c | |||
@@ -37,7 +37,6 @@ | |||
37 | 37 | ||
38 | #include "rds.h" | 38 | #include "rds.h" |
39 | #include "loop.h" | 39 | #include "loop.h" |
40 | #include "rdma.h" | ||
41 | 40 | ||
42 | #define RDS_CONNECTION_HASH_BITS 12 | 41 | #define RDS_CONNECTION_HASH_BITS 12 |
43 | #define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS) | 42 | #define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS) |
@@ -63,18 +62,7 @@ static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr) | |||
63 | var |= RDS_INFO_CONNECTION_FLAG_##suffix; \ | 62 | var |= RDS_INFO_CONNECTION_FLAG_##suffix; \ |
64 | } while (0) | 63 | } while (0) |
65 | 64 | ||
66 | static inline int rds_conn_is_sending(struct rds_connection *conn) | 65 | /* rcu read lock must be held or the connection spinlock */ |
67 | { | ||
68 | int ret = 0; | ||
69 | |||
70 | if (!mutex_trylock(&conn->c_send_lock)) | ||
71 | ret = 1; | ||
72 | else | ||
73 | mutex_unlock(&conn->c_send_lock); | ||
74 | |||
75 | return ret; | ||
76 | } | ||
77 | |||
78 | static struct rds_connection *rds_conn_lookup(struct hlist_head *head, | 66 | static struct rds_connection *rds_conn_lookup(struct hlist_head *head, |
79 | __be32 laddr, __be32 faddr, | 67 | __be32 laddr, __be32 faddr, |
80 | struct rds_transport *trans) | 68 | struct rds_transport *trans) |
@@ -82,7 +70,7 @@ static struct rds_connection *rds_conn_lookup(struct hlist_head *head, | |||
82 | struct rds_connection *conn, *ret = NULL; | 70 | struct rds_connection *conn, *ret = NULL; |
83 | struct hlist_node *pos; | 71 | struct hlist_node *pos; |
84 | 72 | ||
85 | hlist_for_each_entry(conn, pos, head, c_hash_node) { | 73 | hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) { |
86 | if (conn->c_faddr == faddr && conn->c_laddr == laddr && | 74 | if (conn->c_faddr == faddr && conn->c_laddr == laddr && |
87 | conn->c_trans == trans) { | 75 | conn->c_trans == trans) { |
88 | ret = conn; | 76 | ret = conn; |
@@ -129,10 +117,11 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, | |||
129 | { | 117 | { |
130 | struct rds_connection *conn, *parent = NULL; | 118 | struct rds_connection *conn, *parent = NULL; |
131 | struct hlist_head *head = rds_conn_bucket(laddr, faddr); | 119 | struct hlist_head *head = rds_conn_bucket(laddr, faddr); |
120 | struct rds_transport *loop_trans; | ||
132 | unsigned long flags; | 121 | unsigned long flags; |
133 | int ret; | 122 | int ret; |
134 | 123 | ||
135 | spin_lock_irqsave(&rds_conn_lock, flags); | 124 | rcu_read_lock(); |
136 | conn = rds_conn_lookup(head, laddr, faddr, trans); | 125 | conn = rds_conn_lookup(head, laddr, faddr, trans); |
137 | if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && | 126 | if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && |
138 | !is_outgoing) { | 127 | !is_outgoing) { |
@@ -143,12 +132,12 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, | |||
143 | parent = conn; | 132 | parent = conn; |
144 | conn = parent->c_passive; | 133 | conn = parent->c_passive; |
145 | } | 134 | } |
146 | spin_unlock_irqrestore(&rds_conn_lock, flags); | 135 | rcu_read_unlock(); |
147 | if (conn) | 136 | if (conn) |
148 | goto out; | 137 | goto out; |
149 | 138 | ||
150 | conn = kmem_cache_zalloc(rds_conn_slab, gfp); | 139 | conn = kmem_cache_zalloc(rds_conn_slab, gfp); |
151 | if (conn == NULL) { | 140 | if (!conn) { |
152 | conn = ERR_PTR(-ENOMEM); | 141 | conn = ERR_PTR(-ENOMEM); |
153 | goto out; | 142 | goto out; |
154 | } | 143 | } |
@@ -159,7 +148,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, | |||
159 | spin_lock_init(&conn->c_lock); | 148 | spin_lock_init(&conn->c_lock); |
160 | conn->c_next_tx_seq = 1; | 149 | conn->c_next_tx_seq = 1; |
161 | 150 | ||
162 | mutex_init(&conn->c_send_lock); | 151 | init_waitqueue_head(&conn->c_waitq); |
163 | INIT_LIST_HEAD(&conn->c_send_queue); | 152 | INIT_LIST_HEAD(&conn->c_send_queue); |
164 | INIT_LIST_HEAD(&conn->c_retrans); | 153 | INIT_LIST_HEAD(&conn->c_retrans); |
165 | 154 | ||
@@ -175,7 +164,9 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, | |||
175 | * can bind to the destination address then we'd rather the messages | 164 | * can bind to the destination address then we'd rather the messages |
176 | * flow through loopback rather than either transport. | 165 | * flow through loopback rather than either transport. |
177 | */ | 166 | */ |
178 | if (rds_trans_get_preferred(faddr)) { | 167 | loop_trans = rds_trans_get_preferred(faddr); |
168 | if (loop_trans) { | ||
169 | rds_trans_put(loop_trans); | ||
179 | conn->c_loopback = 1; | 170 | conn->c_loopback = 1; |
180 | if (is_outgoing && trans->t_prefer_loopback) { | 171 | if (is_outgoing && trans->t_prefer_loopback) { |
181 | /* "outgoing" connection - and the transport | 172 | /* "outgoing" connection - and the transport |
@@ -238,7 +229,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, | |||
238 | kmem_cache_free(rds_conn_slab, conn); | 229 | kmem_cache_free(rds_conn_slab, conn); |
239 | conn = found; | 230 | conn = found; |
240 | } else { | 231 | } else { |
241 | hlist_add_head(&conn->c_hash_node, head); | 232 | hlist_add_head_rcu(&conn->c_hash_node, head); |
242 | rds_cong_add_conn(conn); | 233 | rds_cong_add_conn(conn); |
243 | rds_conn_count++; | 234 | rds_conn_count++; |
244 | } | 235 | } |
@@ -263,21 +254,91 @@ struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr, | |||
263 | } | 254 | } |
264 | EXPORT_SYMBOL_GPL(rds_conn_create_outgoing); | 255 | EXPORT_SYMBOL_GPL(rds_conn_create_outgoing); |
265 | 256 | ||
257 | void rds_conn_shutdown(struct rds_connection *conn) | ||
258 | { | ||
259 | /* shut it down unless it's down already */ | ||
260 | if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN)) { | ||
261 | /* | ||
262 | * Quiesce the connection mgmt handlers before we start tearing | ||
263 | * things down. We don't hold the mutex for the entire | ||
264 | * duration of the shutdown operation, else we may be | ||
265 | * deadlocking with the CM handler. Instead, the CM event | ||
266 | * handler is supposed to check for state DISCONNECTING | ||
267 | */ | ||
268 | mutex_lock(&conn->c_cm_lock); | ||
269 | if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING) | ||
270 | && !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) { | ||
271 | rds_conn_error(conn, "shutdown called in state %d\n", | ||
272 | atomic_read(&conn->c_state)); | ||
273 | mutex_unlock(&conn->c_cm_lock); | ||
274 | return; | ||
275 | } | ||
276 | mutex_unlock(&conn->c_cm_lock); | ||
277 | |||
278 | wait_event(conn->c_waitq, | ||
279 | !test_bit(RDS_IN_XMIT, &conn->c_flags)); | ||
280 | |||
281 | conn->c_trans->conn_shutdown(conn); | ||
282 | rds_conn_reset(conn); | ||
283 | |||
284 | if (!rds_conn_transition(conn, RDS_CONN_DISCONNECTING, RDS_CONN_DOWN)) { | ||
285 | /* This can happen - eg when we're in the middle of tearing | ||
286 | * down the connection, and someone unloads the rds module. | ||
287 | * Quite reproduceable with loopback connections. | ||
288 | * Mostly harmless. | ||
289 | */ | ||
290 | rds_conn_error(conn, | ||
291 | "%s: failed to transition to state DOWN, " | ||
292 | "current state is %d\n", | ||
293 | __func__, | ||
294 | atomic_read(&conn->c_state)); | ||
295 | return; | ||
296 | } | ||
297 | } | ||
298 | |||
299 | /* Then reconnect if it's still live. | ||
300 | * The passive side of an IB loopback connection is never added | ||
301 | * to the conn hash, so we never trigger a reconnect on this | ||
302 | * conn - the reconnect is always triggered by the active peer. */ | ||
303 | cancel_delayed_work_sync(&conn->c_conn_w); | ||
304 | rcu_read_lock(); | ||
305 | if (!hlist_unhashed(&conn->c_hash_node)) { | ||
306 | rcu_read_unlock(); | ||
307 | rds_queue_reconnect(conn); | ||
308 | } else { | ||
309 | rcu_read_unlock(); | ||
310 | } | ||
311 | } | ||
312 | |||
313 | /* | ||
314 | * Stop and free a connection. | ||
315 | * | ||
316 | * This can only be used in very limited circumstances. It assumes that once | ||
317 | * the conn has been shutdown that no one else is referencing the connection. | ||
318 | * We can only ensure this in the rmmod path in the current code. | ||
319 | */ | ||
266 | void rds_conn_destroy(struct rds_connection *conn) | 320 | void rds_conn_destroy(struct rds_connection *conn) |
267 | { | 321 | { |
268 | struct rds_message *rm, *rtmp; | 322 | struct rds_message *rm, *rtmp; |
323 | unsigned long flags; | ||
269 | 324 | ||
270 | rdsdebug("freeing conn %p for %pI4 -> " | 325 | rdsdebug("freeing conn %p for %pI4 -> " |
271 | "%pI4\n", conn, &conn->c_laddr, | 326 | "%pI4\n", conn, &conn->c_laddr, |
272 | &conn->c_faddr); | 327 | &conn->c_faddr); |
273 | 328 | ||
274 | hlist_del_init(&conn->c_hash_node); | 329 | /* Ensure conn will not be scheduled for reconnect */ |
330 | spin_lock_irq(&rds_conn_lock); | ||
331 | hlist_del_init_rcu(&conn->c_hash_node); | ||
332 | spin_unlock_irq(&rds_conn_lock); | ||
333 | synchronize_rcu(); | ||
275 | 334 | ||
276 | /* wait for the rds thread to shut it down */ | 335 | /* shut the connection down */ |
277 | atomic_set(&conn->c_state, RDS_CONN_ERROR); | 336 | rds_conn_drop(conn); |
278 | cancel_delayed_work(&conn->c_conn_w); | 337 | flush_work(&conn->c_down_w); |
279 | queue_work(rds_wq, &conn->c_down_w); | 338 | |
280 | flush_workqueue(rds_wq); | 339 | /* make sure lingering queued work won't try to ref the conn */ |
340 | cancel_delayed_work_sync(&conn->c_send_w); | ||
341 | cancel_delayed_work_sync(&conn->c_recv_w); | ||
281 | 342 | ||
282 | /* tear down queued messages */ | 343 | /* tear down queued messages */ |
283 | list_for_each_entry_safe(rm, rtmp, | 344 | list_for_each_entry_safe(rm, rtmp, |
@@ -302,7 +363,9 @@ void rds_conn_destroy(struct rds_connection *conn) | |||
302 | BUG_ON(!list_empty(&conn->c_retrans)); | 363 | BUG_ON(!list_empty(&conn->c_retrans)); |
303 | kmem_cache_free(rds_conn_slab, conn); | 364 | kmem_cache_free(rds_conn_slab, conn); |
304 | 365 | ||
366 | spin_lock_irqsave(&rds_conn_lock, flags); | ||
305 | rds_conn_count--; | 367 | rds_conn_count--; |
368 | spin_unlock_irqrestore(&rds_conn_lock, flags); | ||
306 | } | 369 | } |
307 | EXPORT_SYMBOL_GPL(rds_conn_destroy); | 370 | EXPORT_SYMBOL_GPL(rds_conn_destroy); |
308 | 371 | ||
@@ -316,23 +379,23 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, | |||
316 | struct list_head *list; | 379 | struct list_head *list; |
317 | struct rds_connection *conn; | 380 | struct rds_connection *conn; |
318 | struct rds_message *rm; | 381 | struct rds_message *rm; |
319 | unsigned long flags; | ||
320 | unsigned int total = 0; | 382 | unsigned int total = 0; |
383 | unsigned long flags; | ||
321 | size_t i; | 384 | size_t i; |
322 | 385 | ||
323 | len /= sizeof(struct rds_info_message); | 386 | len /= sizeof(struct rds_info_message); |
324 | 387 | ||
325 | spin_lock_irqsave(&rds_conn_lock, flags); | 388 | rcu_read_lock(); |
326 | 389 | ||
327 | for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); | 390 | for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); |
328 | i++, head++) { | 391 | i++, head++) { |
329 | hlist_for_each_entry(conn, pos, head, c_hash_node) { | 392 | hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) { |
330 | if (want_send) | 393 | if (want_send) |
331 | list = &conn->c_send_queue; | 394 | list = &conn->c_send_queue; |
332 | else | 395 | else |
333 | list = &conn->c_retrans; | 396 | list = &conn->c_retrans; |
334 | 397 | ||
335 | spin_lock(&conn->c_lock); | 398 | spin_lock_irqsave(&conn->c_lock, flags); |
336 | 399 | ||
337 | /* XXX too lazy to maintain counts.. */ | 400 | /* XXX too lazy to maintain counts.. */ |
338 | list_for_each_entry(rm, list, m_conn_item) { | 401 | list_for_each_entry(rm, list, m_conn_item) { |
@@ -343,11 +406,10 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, | |||
343 | conn->c_faddr, 0); | 406 | conn->c_faddr, 0); |
344 | } | 407 | } |
345 | 408 | ||
346 | spin_unlock(&conn->c_lock); | 409 | spin_unlock_irqrestore(&conn->c_lock, flags); |
347 | } | 410 | } |
348 | } | 411 | } |
349 | 412 | rcu_read_unlock(); | |
350 | spin_unlock_irqrestore(&rds_conn_lock, flags); | ||
351 | 413 | ||
352 | lens->nr = total; | 414 | lens->nr = total; |
353 | lens->each = sizeof(struct rds_info_message); | 415 | lens->each = sizeof(struct rds_info_message); |
@@ -377,19 +439,17 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, | |||
377 | uint64_t buffer[(item_len + 7) / 8]; | 439 | uint64_t buffer[(item_len + 7) / 8]; |
378 | struct hlist_head *head; | 440 | struct hlist_head *head; |
379 | struct hlist_node *pos; | 441 | struct hlist_node *pos; |
380 | struct hlist_node *tmp; | ||
381 | struct rds_connection *conn; | 442 | struct rds_connection *conn; |
382 | unsigned long flags; | ||
383 | size_t i; | 443 | size_t i; |
384 | 444 | ||
385 | spin_lock_irqsave(&rds_conn_lock, flags); | 445 | rcu_read_lock(); |
386 | 446 | ||
387 | lens->nr = 0; | 447 | lens->nr = 0; |
388 | lens->each = item_len; | 448 | lens->each = item_len; |
389 | 449 | ||
390 | for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); | 450 | for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); |
391 | i++, head++) { | 451 | i++, head++) { |
392 | hlist_for_each_entry_safe(conn, pos, tmp, head, c_hash_node) { | 452 | hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) { |
393 | 453 | ||
394 | /* XXX no c_lock usage.. */ | 454 | /* XXX no c_lock usage.. */ |
395 | if (!visitor(conn, buffer)) | 455 | if (!visitor(conn, buffer)) |
@@ -405,8 +465,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, | |||
405 | lens->nr++; | 465 | lens->nr++; |
406 | } | 466 | } |
407 | } | 467 | } |
408 | 468 | rcu_read_unlock(); | |
409 | spin_unlock_irqrestore(&rds_conn_lock, flags); | ||
410 | } | 469 | } |
411 | EXPORT_SYMBOL_GPL(rds_for_each_conn_info); | 470 | EXPORT_SYMBOL_GPL(rds_for_each_conn_info); |
412 | 471 | ||
@@ -423,8 +482,8 @@ static int rds_conn_info_visitor(struct rds_connection *conn, | |||
423 | sizeof(cinfo->transport)); | 482 | sizeof(cinfo->transport)); |
424 | cinfo->flags = 0; | 483 | cinfo->flags = 0; |
425 | 484 | ||
426 | rds_conn_info_set(cinfo->flags, | 485 | rds_conn_info_set(cinfo->flags, test_bit(RDS_IN_XMIT, &conn->c_flags), |
427 | rds_conn_is_sending(conn), SENDING); | 486 | SENDING); |
428 | /* XXX Future: return the state rather than these funky bits */ | 487 | /* XXX Future: return the state rather than these funky bits */ |
429 | rds_conn_info_set(cinfo->flags, | 488 | rds_conn_info_set(cinfo->flags, |
430 | atomic_read(&conn->c_state) == RDS_CONN_CONNECTING, | 489 | atomic_read(&conn->c_state) == RDS_CONN_CONNECTING, |
@@ -444,12 +503,12 @@ static void rds_conn_info(struct socket *sock, unsigned int len, | |||
444 | sizeof(struct rds_info_connection)); | 503 | sizeof(struct rds_info_connection)); |
445 | } | 504 | } |
446 | 505 | ||
447 | int __init rds_conn_init(void) | 506 | int rds_conn_init(void) |
448 | { | 507 | { |
449 | rds_conn_slab = kmem_cache_create("rds_connection", | 508 | rds_conn_slab = kmem_cache_create("rds_connection", |
450 | sizeof(struct rds_connection), | 509 | sizeof(struct rds_connection), |
451 | 0, 0, NULL); | 510 | 0, 0, NULL); |
452 | if (rds_conn_slab == NULL) | 511 | if (!rds_conn_slab) |
453 | return -ENOMEM; | 512 | return -ENOMEM; |
454 | 513 | ||
455 | rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info); | 514 | rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info); |
@@ -487,6 +546,18 @@ void rds_conn_drop(struct rds_connection *conn) | |||
487 | EXPORT_SYMBOL_GPL(rds_conn_drop); | 546 | EXPORT_SYMBOL_GPL(rds_conn_drop); |
488 | 547 | ||
489 | /* | 548 | /* |
549 | * If the connection is down, trigger a connect. We may have scheduled a | ||
550 | * delayed reconnect however - in this case we should not interfere. | ||
551 | */ | ||
552 | void rds_conn_connect_if_down(struct rds_connection *conn) | ||
553 | { | ||
554 | if (rds_conn_state(conn) == RDS_CONN_DOWN && | ||
555 | !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags)) | ||
556 | queue_delayed_work(rds_wq, &conn->c_conn_w, 0); | ||
557 | } | ||
558 | EXPORT_SYMBOL_GPL(rds_conn_connect_if_down); | ||
559 | |||
560 | /* | ||
490 | * An error occurred on the connection | 561 | * An error occurred on the connection |
491 | */ | 562 | */ |
492 | void | 563 | void |