summaryrefslogtreecommitdiffstats
path: root/net/rds
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 17:31:10 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 17:31:10 -0500
commitb2fe5fa68642860e7de76167c3111623aa0d5de1 (patch)
treeb7f9b89b7039ecefbc35fe3c8e73a6ff972641dd /net/rds
parenta103950e0dd2058df5e8a8d4a915707bdcf205f0 (diff)
parenta54667f6728c2714a400f3c884727da74b6d1717 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Significantly shrink the core networking routing structures. Result of http://vger.kernel.org/~davem/seoul2017_netdev_keynote.pdf 2) Add netdevsim driver for testing various offloads, from Jakub Kicinski. 3) Support cross-chip FDB operations in DSA, from Vivien Didelot. 4) Add a 2nd listener hash table for TCP, similar to what was done for UDP. From Martin KaFai Lau. 5) Add eBPF based queue selection to tun, from Jason Wang. 6) Lockless qdisc support, from John Fastabend. 7) SCTP stream interleave support, from Xin Long. 8) Smoother TCP receive autotuning, from Eric Dumazet. 9) Lots of erspan tunneling enhancements, from William Tu. 10) Add true function call support to BPF, from Alexei Starovoitov. 11) Add explicit support for GRO HW offloading, from Michael Chan. 12) Support extack generation in more netlink subsystems. From Alexander Aring, Quentin Monnet, and Jakub Kicinski. 13) Add 1000BaseX, flow control, and EEE support to mvneta driver. From Russell King. 14) Add flow table abstraction to netfilter, from Pablo Neira Ayuso. 15) Many improvements and simplifications to the NFP driver bpf JIT, from Jakub Kicinski. 16) Support for ipv6 non-equal cost multipath routing, from Ido Schimmel. 17) Add resource abstration to devlink, from Arkadi Sharshevsky. 18) Packet scheduler classifier shared filter block support, from Jiri Pirko. 19) Avoid locking in act_csum, from Davide Caratti. 20) devinet_ioctl() simplifications from Al viro. 21) More TCP bpf improvements from Lawrence Brakmo. 22) Add support for onlink ipv6 route flag, similar to ipv4, from David Ahern. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1925 commits) tls: Add support for encryption using async offload accelerator ip6mr: fix stale iterator net/sched: kconfig: Remove blank help texts openvswitch: meter: Use 64-bit arithmetic instead of 32-bit tcp_nv: fix potential integer overflow in tcpnv_acked r8169: fix RTL8168EP take too long to complete driver initialization. qmi_wwan: Add support for Quectel EP06 rtnetlink: enable IFLA_IF_NETNSID for RTM_NEWLINK ipmr: Fix ptrdiff_t print formatting ibmvnic: Wait for device response when changing MAC qlcnic: fix deadlock bug tcp: release sk_frag.page in tcp_disconnect ipv4: Get the address of interface correctly. net_sched: gen_estimator: fix lockdep splat net: macb: Handle HRESP error net/mlx5e: IPoIB, Fix copy-paste bug in flow steering refactoring ipv6: addrconf: break critical section in addrconf_verify_rtnl() ipv6: change route cache aging logic i40e/i40evf: Update DESC_NEEDED value to reflect larger value bnxt_en: cleanup DIM work on device shutdown ...
Diffstat (limited to 'net/rds')
-rw-r--r--net/rds/bind.c1
-rw-r--r--net/rds/cong.c10
-rw-r--r--net/rds/connection.c27
-rw-r--r--net/rds/rds.h10
-rw-r--r--net/rds/send.c37
-rw-r--r--net/rds/tcp.c81
-rw-r--r--net/rds/tcp.h1
-rw-r--r--net/rds/tcp_connect.c2
-rw-r--r--net/rds/tcp_recv.c8
-rw-r--r--net/rds/tcp_send.c5
-rw-r--r--net/rds/threads.c20
11 files changed, 128 insertions, 74 deletions
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 75d43dc8e96b..5aa3a64aa4f0 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -114,6 +114,7 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
114 rs, &addr, (int)ntohs(*port)); 114 rs, &addr, (int)ntohs(*port));
115 break; 115 break;
116 } else { 116 } else {
117 rs->rs_bound_addr = 0;
117 rds_sock_put(rs); 118 rds_sock_put(rs);
118 ret = -ENOMEM; 119 ret = -ENOMEM;
119 break; 120 break;
diff --git a/net/rds/cong.c b/net/rds/cong.c
index 8398fee7c866..8d19fd25dce3 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -219,7 +219,11 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
219 spin_lock_irqsave(&rds_cong_lock, flags); 219 spin_lock_irqsave(&rds_cong_lock, flags);
220 220
221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) { 221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
222 if (!test_and_set_bit(0, &conn->c_map_queued)) { 222 struct rds_conn_path *cp = &conn->c_path[0];
223
224 rcu_read_lock();
225 if (!test_and_set_bit(0, &conn->c_map_queued) &&
226 !test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
223 rds_stats_inc(s_cong_update_queued); 227 rds_stats_inc(s_cong_update_queued);
224 /* We cannot inline the call to rds_send_xmit() here 228 /* We cannot inline the call to rds_send_xmit() here
225 * for two reasons (both pertaining to a TCP transport): 229 * for two reasons (both pertaining to a TCP transport):
@@ -235,9 +239,9 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
235 * therefore trigger warnings. 239 * therefore trigger warnings.
236 * Defer the xmit to rds_send_worker() instead. 240 * Defer the xmit to rds_send_worker() instead.
237 */ 241 */
238 queue_delayed_work(rds_wq, 242 queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
239 &conn->c_path[0].cp_send_w, 0);
240 } 243 }
244 rcu_read_unlock();
241 } 245 }
242 246
243 spin_unlock_irqrestore(&rds_cong_lock, flags); 247 spin_unlock_irqrestore(&rds_cong_lock, flags);
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 7ee2d5d68b78..b10c0ef36d8d 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -230,8 +230,8 @@ static struct rds_connection *__rds_conn_create(struct net *net,
230 230
231 rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n", 231 rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n",
232 conn, &laddr, &faddr, 232 conn, &laddr, &faddr,
233 trans->t_name ? trans->t_name : "[unknown]", 233 strnlen(trans->t_name, sizeof(trans->t_name)) ? trans->t_name :
234 is_outgoing ? "(outgoing)" : ""); 234 "[unknown]", is_outgoing ? "(outgoing)" : "");
235 235
236 /* 236 /*
237 * Since we ran without holding the conn lock, someone could 237 * Since we ran without holding the conn lock, someone could
@@ -382,10 +382,13 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
382{ 382{
383 struct rds_message *rm, *rtmp; 383 struct rds_message *rm, *rtmp;
384 384
385 set_bit(RDS_DESTROY_PENDING, &cp->cp_flags);
386
385 if (!cp->cp_transport_data) 387 if (!cp->cp_transport_data)
386 return; 388 return;
387 389
388 /* make sure lingering queued work won't try to ref the conn */ 390 /* make sure lingering queued work won't try to ref the conn */
391 synchronize_rcu();
389 cancel_delayed_work_sync(&cp->cp_send_w); 392 cancel_delayed_work_sync(&cp->cp_send_w);
390 cancel_delayed_work_sync(&cp->cp_recv_w); 393 cancel_delayed_work_sync(&cp->cp_recv_w);
391 394
@@ -403,6 +406,11 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
403 if (cp->cp_xmit_rm) 406 if (cp->cp_xmit_rm)
404 rds_message_put(cp->cp_xmit_rm); 407 rds_message_put(cp->cp_xmit_rm);
405 408
409 WARN_ON(delayed_work_pending(&cp->cp_send_w));
410 WARN_ON(delayed_work_pending(&cp->cp_recv_w));
411 WARN_ON(delayed_work_pending(&cp->cp_conn_w));
412 WARN_ON(work_pending(&cp->cp_down_w));
413
406 cp->cp_conn->c_trans->conn_free(cp->cp_transport_data); 414 cp->cp_conn->c_trans->conn_free(cp->cp_transport_data);
407} 415}
408 416
@@ -424,7 +432,6 @@ void rds_conn_destroy(struct rds_connection *conn)
424 "%pI4\n", conn, &conn->c_laddr, 432 "%pI4\n", conn, &conn->c_laddr,
425 &conn->c_faddr); 433 &conn->c_faddr);
426 434
427 conn->c_destroy_in_prog = 1;
428 /* Ensure conn will not be scheduled for reconnect */ 435 /* Ensure conn will not be scheduled for reconnect */
429 spin_lock_irq(&rds_conn_lock); 436 spin_lock_irq(&rds_conn_lock);
430 hlist_del_init_rcu(&conn->c_hash_node); 437 hlist_del_init_rcu(&conn->c_hash_node);
@@ -445,7 +452,6 @@ void rds_conn_destroy(struct rds_connection *conn)
445 */ 452 */
446 rds_cong_remove_conn(conn); 453 rds_cong_remove_conn(conn);
447 454
448 put_net(conn->c_net);
449 kfree(conn->c_path); 455 kfree(conn->c_path);
450 kmem_cache_free(rds_conn_slab, conn); 456 kmem_cache_free(rds_conn_slab, conn);
451 457
@@ -684,10 +690,13 @@ void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy)
684{ 690{
685 atomic_set(&cp->cp_state, RDS_CONN_ERROR); 691 atomic_set(&cp->cp_state, RDS_CONN_ERROR);
686 692
687 if (!destroy && cp->cp_conn->c_destroy_in_prog) 693 rcu_read_lock();
694 if (!destroy && test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
695 rcu_read_unlock();
688 return; 696 return;
689 697 }
690 queue_work(rds_wq, &cp->cp_down_w); 698 queue_work(rds_wq, &cp->cp_down_w);
699 rcu_read_unlock();
691} 700}
692EXPORT_SYMBOL_GPL(rds_conn_path_drop); 701EXPORT_SYMBOL_GPL(rds_conn_path_drop);
693 702
@@ -704,9 +713,15 @@ EXPORT_SYMBOL_GPL(rds_conn_drop);
704 */ 713 */
705void rds_conn_path_connect_if_down(struct rds_conn_path *cp) 714void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
706{ 715{
716 rcu_read_lock();
717 if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
718 rcu_read_unlock();
719 return;
720 }
707 if (rds_conn_path_state(cp) == RDS_CONN_DOWN && 721 if (rds_conn_path_state(cp) == RDS_CONN_DOWN &&
708 !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags)) 722 !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags))
709 queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); 723 queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
724 rcu_read_unlock();
710} 725}
711EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down); 726EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down);
712 727
diff --git a/net/rds/rds.h b/net/rds/rds.h
index c349c71babff..374ae83b60d4 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -88,6 +88,7 @@ enum {
88#define RDS_RECONNECT_PENDING 1 88#define RDS_RECONNECT_PENDING 1
89#define RDS_IN_XMIT 2 89#define RDS_IN_XMIT 2
90#define RDS_RECV_REFILL 3 90#define RDS_RECV_REFILL 3
91#define RDS_DESTROY_PENDING 4
91 92
92/* Max number of multipaths per RDS connection. Must be a power of 2 */ 93/* Max number of multipaths per RDS connection. Must be a power of 2 */
93#define RDS_MPATH_WORKERS 8 94#define RDS_MPATH_WORKERS 8
@@ -139,8 +140,7 @@ struct rds_connection {
139 __be32 c_faddr; 140 __be32 c_faddr;
140 unsigned int c_loopback:1, 141 unsigned int c_loopback:1,
141 c_ping_triggered:1, 142 c_ping_triggered:1,
142 c_destroy_in_prog:1, 143 c_pad_to_32:30;
143 c_pad_to_32:29;
144 int c_npaths; 144 int c_npaths;
145 struct rds_connection *c_passive; 145 struct rds_connection *c_passive;
146 struct rds_transport *c_trans; 146 struct rds_transport *c_trans;
@@ -150,7 +150,7 @@ struct rds_connection {
150 150
151 /* Protocol version */ 151 /* Protocol version */
152 unsigned int c_version; 152 unsigned int c_version;
153 struct net *c_net; 153 possible_net_t c_net;
154 154
155 struct list_head c_map_item; 155 struct list_head c_map_item;
156 unsigned long c_map_queued; 156 unsigned long c_map_queued;
@@ -165,13 +165,13 @@ struct rds_connection {
165static inline 165static inline
166struct net *rds_conn_net(struct rds_connection *conn) 166struct net *rds_conn_net(struct rds_connection *conn)
167{ 167{
168 return conn->c_net; 168 return read_pnet(&conn->c_net);
169} 169}
170 170
171static inline 171static inline
172void rds_conn_net_set(struct rds_connection *conn, struct net *net) 172void rds_conn_net_set(struct rds_connection *conn, struct net *net)
173{ 173{
174 conn->c_net = get_net(net); 174 write_pnet(&conn->c_net, net);
175} 175}
176 176
177#define RDS_FLAG_CONG_BITMAP 0x01 177#define RDS_FLAG_CONG_BITMAP 0x01
diff --git a/net/rds/send.c b/net/rds/send.c
index f72466c63f0c..d3e32d1f3c7d 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -162,6 +162,12 @@ restart:
162 goto out; 162 goto out;
163 } 163 }
164 164
165 if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
166 release_in_xmit(cp);
167 ret = -ENETUNREACH; /* dont requeue send work */
168 goto out;
169 }
170
165 /* 171 /*
166 * we record the send generation after doing the xmit acquire. 172 * we record the send generation after doing the xmit acquire.
167 * if someone else manages to jump in and do some work, we'll use 173 * if someone else manages to jump in and do some work, we'll use
@@ -437,7 +443,12 @@ over_batch:
437 !list_empty(&cp->cp_send_queue)) && !raced) { 443 !list_empty(&cp->cp_send_queue)) && !raced) {
438 if (batch_count < send_batch_count) 444 if (batch_count < send_batch_count)
439 goto restart; 445 goto restart;
440 queue_delayed_work(rds_wq, &cp->cp_send_w, 1); 446 rcu_read_lock();
447 if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
448 ret = -ENETUNREACH;
449 else
450 queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
451 rcu_read_unlock();
441 } else if (raced) { 452 } else if (raced) {
442 rds_stats_inc(s_send_lock_queue_raced); 453 rds_stats_inc(s_send_lock_queue_raced);
443 } 454 }
@@ -1151,6 +1162,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
1151 else 1162 else
1152 cpath = &conn->c_path[0]; 1163 cpath = &conn->c_path[0];
1153 1164
1165 if (test_bit(RDS_DESTROY_PENDING, &cpath->cp_flags)) {
1166 ret = -EAGAIN;
1167 goto out;
1168 }
1169
1154 rds_conn_path_connect_if_down(cpath); 1170 rds_conn_path_connect_if_down(cpath);
1155 1171
1156 ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); 1172 ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
@@ -1190,9 +1206,17 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
1190 rds_stats_inc(s_send_queued); 1206 rds_stats_inc(s_send_queued);
1191 1207
1192 ret = rds_send_xmit(cpath); 1208 ret = rds_send_xmit(cpath);
1193 if (ret == -ENOMEM || ret == -EAGAIN) 1209 if (ret == -ENOMEM || ret == -EAGAIN) {
1194 queue_delayed_work(rds_wq, &cpath->cp_send_w, 1); 1210 ret = 0;
1195 1211 rcu_read_lock();
1212 if (test_bit(RDS_DESTROY_PENDING, &cpath->cp_flags))
1213 ret = -ENETUNREACH;
1214 else
1215 queue_delayed_work(rds_wq, &cpath->cp_send_w, 1);
1216 rcu_read_unlock();
1217 }
1218 if (ret)
1219 goto out;
1196 rds_message_put(rm); 1220 rds_message_put(rm);
1197 return payload_len; 1221 return payload_len;
1198 1222
@@ -1270,7 +1294,10 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport,
1270 rds_stats_inc(s_send_pong); 1294 rds_stats_inc(s_send_pong);
1271 1295
1272 /* schedule the send work on rds_wq */ 1296 /* schedule the send work on rds_wq */
1273 queue_delayed_work(rds_wq, &cp->cp_send_w, 1); 1297 rcu_read_lock();
1298 if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
1299 queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
1300 rcu_read_unlock();
1274 1301
1275 rds_message_put(rm); 1302 rds_message_put(rm);
1276 return 0; 1303 return 0;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index ab7356e0ba83..9920d2f84eff 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -271,16 +271,33 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr)
271 return -EADDRNOTAVAIL; 271 return -EADDRNOTAVAIL;
272} 272}
273 273
274static void rds_tcp_conn_free(void *arg)
275{
276 struct rds_tcp_connection *tc = arg;
277 unsigned long flags;
278
279 rdsdebug("freeing tc %p\n", tc);
280
281 spin_lock_irqsave(&rds_tcp_conn_lock, flags);
282 if (!tc->t_tcp_node_detached)
283 list_del(&tc->t_tcp_node);
284 spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
285
286 kmem_cache_free(rds_tcp_conn_slab, tc);
287}
288
274static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) 289static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
275{ 290{
276 struct rds_tcp_connection *tc; 291 struct rds_tcp_connection *tc;
277 int i; 292 int i, j;
293 int ret = 0;
278 294
279 for (i = 0; i < RDS_MPATH_WORKERS; i++) { 295 for (i = 0; i < RDS_MPATH_WORKERS; i++) {
280 tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); 296 tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
281 if (!tc) 297 if (!tc) {
282 return -ENOMEM; 298 ret = -ENOMEM;
283 299 break;
300 }
284 mutex_init(&tc->t_conn_path_lock); 301 mutex_init(&tc->t_conn_path_lock);
285 tc->t_sock = NULL; 302 tc->t_sock = NULL;
286 tc->t_tinc = NULL; 303 tc->t_tinc = NULL;
@@ -291,26 +308,17 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
291 tc->t_cpath = &conn->c_path[i]; 308 tc->t_cpath = &conn->c_path[i];
292 309
293 spin_lock_irq(&rds_tcp_conn_lock); 310 spin_lock_irq(&rds_tcp_conn_lock);
311 tc->t_tcp_node_detached = false;
294 list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); 312 list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
295 spin_unlock_irq(&rds_tcp_conn_lock); 313 spin_unlock_irq(&rds_tcp_conn_lock);
296 rdsdebug("rds_conn_path [%d] tc %p\n", i, 314 rdsdebug("rds_conn_path [%d] tc %p\n", i,
297 conn->c_path[i].cp_transport_data); 315 conn->c_path[i].cp_transport_data);
298 } 316 }
299 317 if (ret) {
300 return 0; 318 for (j = 0; j < i; j++)
301} 319 rds_tcp_conn_free(conn->c_path[j].cp_transport_data);
302 320 }
303static void rds_tcp_conn_free(void *arg) 321 return ret;
304{
305 struct rds_tcp_connection *tc = arg;
306 unsigned long flags;
307 rdsdebug("freeing tc %p\n", tc);
308
309 spin_lock_irqsave(&rds_tcp_conn_lock, flags);
310 list_del(&tc->t_tcp_node);
311 spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
312
313 kmem_cache_free(rds_tcp_conn_slab, tc);
314} 322}
315 323
316static bool list_has_conn(struct list_head *list, struct rds_connection *conn) 324static bool list_has_conn(struct list_head *list, struct rds_connection *conn)
@@ -496,27 +504,6 @@ static struct pernet_operations rds_tcp_net_ops = {
496 .size = sizeof(struct rds_tcp_net), 504 .size = sizeof(struct rds_tcp_net),
497}; 505};
498 506
499/* explicitly send a RST on each socket, thereby releasing any socket refcnts
500 * that may otherwise hold up netns deletion.
501 */
502static void rds_tcp_conn_paths_destroy(struct rds_connection *conn)
503{
504 struct rds_conn_path *cp;
505 struct rds_tcp_connection *tc;
506 int i;
507 struct sock *sk;
508
509 for (i = 0; i < RDS_MPATH_WORKERS; i++) {
510 cp = &conn->c_path[i];
511 tc = cp->cp_transport_data;
512 if (!tc->t_sock)
513 continue;
514 sk = tc->t_sock->sk;
515 sk->sk_prot->disconnect(sk, 0);
516 tcp_done(sk);
517 }
518}
519
520static void rds_tcp_kill_sock(struct net *net) 507static void rds_tcp_kill_sock(struct net *net)
521{ 508{
522 struct rds_tcp_connection *tc, *_tc; 509 struct rds_tcp_connection *tc, *_tc;
@@ -528,18 +515,20 @@ static void rds_tcp_kill_sock(struct net *net)
528 rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); 515 rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
529 spin_lock_irq(&rds_tcp_conn_lock); 516 spin_lock_irq(&rds_tcp_conn_lock);
530 list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { 517 list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
531 struct net *c_net = tc->t_cpath->cp_conn->c_net; 518 struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
532 519
533 if (net != c_net || !tc->t_sock) 520 if (net != c_net || !tc->t_sock)
534 continue; 521 continue;
535 if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) 522 if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) {
536 list_move_tail(&tc->t_tcp_node, &tmp_list); 523 list_move_tail(&tc->t_tcp_node, &tmp_list);
524 } else {
525 list_del(&tc->t_tcp_node);
526 tc->t_tcp_node_detached = true;
527 }
537 } 528 }
538 spin_unlock_irq(&rds_tcp_conn_lock); 529 spin_unlock_irq(&rds_tcp_conn_lock);
539 list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { 530 list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
540 rds_tcp_conn_paths_destroy(tc->t_cpath->cp_conn);
541 rds_conn_destroy(tc->t_cpath->cp_conn); 531 rds_conn_destroy(tc->t_cpath->cp_conn);
542 }
543} 532}
544 533
545void *rds_tcp_listen_sock_def_readable(struct net *net) 534void *rds_tcp_listen_sock_def_readable(struct net *net)
@@ -587,7 +576,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
587 576
588 spin_lock_irq(&rds_tcp_conn_lock); 577 spin_lock_irq(&rds_tcp_conn_lock);
589 list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { 578 list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
590 struct net *c_net = tc->t_cpath->cp_conn->c_net; 579 struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
591 580
592 if (net != c_net || !tc->t_sock) 581 if (net != c_net || !tc->t_sock)
593 continue; 582 continue;
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 864ca7d8f019..c6fa080e9b6d 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -12,6 +12,7 @@ struct rds_tcp_incoming {
12struct rds_tcp_connection { 12struct rds_tcp_connection {
13 13
14 struct list_head t_tcp_node; 14 struct list_head t_tcp_node;
15 bool t_tcp_node_detached;
15 struct rds_conn_path *t_cpath; 16 struct rds_conn_path *t_cpath;
16 /* t_conn_path_lock synchronizes the connection establishment between 17 /* t_conn_path_lock synchronizes the connection establishment between
17 * rds_tcp_accept_one and rds_tcp_conn_path_connect 18 * rds_tcp_accept_one and rds_tcp_conn_path_connect
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 46f74dad0e16..534c67aeb20f 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -170,7 +170,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp)
170 cp->cp_conn, tc, sock); 170 cp->cp_conn, tc, sock);
171 171
172 if (sock) { 172 if (sock) {
173 if (cp->cp_conn->c_destroy_in_prog) 173 if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
174 rds_tcp_set_linger(sock); 174 rds_tcp_set_linger(sock);
175 sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN); 175 sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN);
176 lock_sock(sock->sk); 176 lock_sock(sock->sk);
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index e006ef8e6d40..dd707b9e73e5 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -321,8 +321,12 @@ void rds_tcp_data_ready(struct sock *sk)
321 ready = tc->t_orig_data_ready; 321 ready = tc->t_orig_data_ready;
322 rds_tcp_stats_inc(s_tcp_data_ready_calls); 322 rds_tcp_stats_inc(s_tcp_data_ready_calls);
323 323
324 if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) 324 if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) {
325 queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); 325 rcu_read_lock();
326 if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
327 queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
328 rcu_read_unlock();
329 }
326out: 330out:
327 read_unlock_bh(&sk->sk_callback_lock); 331 read_unlock_bh(&sk->sk_callback_lock);
328 ready(sk); 332 ready(sk);
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 9b76e0fa1722..16f65744d984 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -202,8 +202,11 @@ void rds_tcp_write_space(struct sock *sk)
202 tc->t_last_seen_una = rds_tcp_snd_una(tc); 202 tc->t_last_seen_una = rds_tcp_snd_una(tc);
203 rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked); 203 rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked);
204 204
205 if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) 205 rcu_read_lock();
206 if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf &&
207 !test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
206 queue_delayed_work(rds_wq, &cp->cp_send_w, 0); 208 queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
209 rcu_read_unlock();
207 210
208out: 211out:
209 read_unlock_bh(&sk->sk_callback_lock); 212 read_unlock_bh(&sk->sk_callback_lock);
diff --git a/net/rds/threads.c b/net/rds/threads.c
index f121daa402c8..eb76db1360b0 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -87,8 +87,12 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
87 87
88 cp->cp_reconnect_jiffies = 0; 88 cp->cp_reconnect_jiffies = 0;
89 set_bit(0, &cp->cp_conn->c_map_queued); 89 set_bit(0, &cp->cp_conn->c_map_queued);
90 queue_delayed_work(rds_wq, &cp->cp_send_w, 0); 90 rcu_read_lock();
91 queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); 91 if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
92 queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
93 queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
94 }
95 rcu_read_unlock();
92} 96}
93EXPORT_SYMBOL_GPL(rds_connect_path_complete); 97EXPORT_SYMBOL_GPL(rds_connect_path_complete);
94 98
@@ -133,7 +137,10 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
133 set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); 137 set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
134 if (cp->cp_reconnect_jiffies == 0) { 138 if (cp->cp_reconnect_jiffies == 0) {
135 cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; 139 cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
136 queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); 140 rcu_read_lock();
141 if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
142 queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
143 rcu_read_unlock();
137 return; 144 return;
138 } 145 }
139 146
@@ -141,8 +148,11 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
141 rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n", 148 rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n",
142 rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies, 149 rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,
143 conn, &conn->c_laddr, &conn->c_faddr); 150 conn, &conn->c_laddr, &conn->c_faddr);
144 queue_delayed_work(rds_wq, &cp->cp_conn_w, 151 rcu_read_lock();
145 rand % cp->cp_reconnect_jiffies); 152 if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
153 queue_delayed_work(rds_wq, &cp->cp_conn_w,
154 rand % cp->cp_reconnect_jiffies);
155 rcu_read_unlock();
146 156
147 cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2, 157 cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2,
148 rds_sysctl_reconnect_max_jiffies); 158 rds_sysctl_reconnect_max_jiffies);