summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSowmini Varadhan <sowmini.varadhan@oracle.com>2018-01-04 09:53:00 -0500
committerDavid S. Miller <davem@davemloft.net>2018-01-05 13:39:18 -0500
commit3db6e0d172c94bd9953a1347c55ffb64b1d2e74f (patch)
tree67f61aa4784da3fba56aa43e3626d3f3b6562c78
parentc90ecbfaf50d2d7db25c531d9169be7e47435f3f (diff)
rds: use RCU to synchronize work-enqueue with connection teardown
rds_sendmsg() can enqueue work on cp_send_w from process context, but it should not enqueue this work if connection teardown has commenced (else we risk enquing work after rds_conn_path_destroy() has assumed that all work has been cancelled/flushed). Similarly some other functions like rds_cong_queue_updates and rds_tcp_data_ready are called in softirq context, and may end up enqueuing work on rds_wq after rds_conn_path_destroy() has assumed that all workqs are quiesced. Check the RDS_DESTROY_PENDING bit and use rcu synchronization to avoid all these races. Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/rds/cong.c10
-rw-r--r--net/rds/connection.c21
-rw-r--r--net/rds/send.c37
-rw-r--r--net/rds/tcp_recv.c8
-rw-r--r--net/rds/tcp_send.c5
-rw-r--r--net/rds/threads.c20
6 files changed, 81 insertions, 20 deletions
diff --git a/net/rds/cong.c b/net/rds/cong.c
index 8398fee7c866..8d19fd25dce3 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -219,7 +219,11 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
219 spin_lock_irqsave(&rds_cong_lock, flags); 219 spin_lock_irqsave(&rds_cong_lock, flags);
220 220
221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) { 221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
222 if (!test_and_set_bit(0, &conn->c_map_queued)) { 222 struct rds_conn_path *cp = &conn->c_path[0];
223
224 rcu_read_lock();
225 if (!test_and_set_bit(0, &conn->c_map_queued) &&
226 !test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
223 rds_stats_inc(s_cong_update_queued); 227 rds_stats_inc(s_cong_update_queued);
224 /* We cannot inline the call to rds_send_xmit() here 228 /* We cannot inline the call to rds_send_xmit() here
225 * for two reasons (both pertaining to a TCP transport): 229 * for two reasons (both pertaining to a TCP transport):
@@ -235,9 +239,9 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
235 * therefore trigger warnings. 239 * therefore trigger warnings.
236 * Defer the xmit to rds_send_worker() instead. 240 * Defer the xmit to rds_send_worker() instead.
237 */ 241 */
238 queue_delayed_work(rds_wq, 242 queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
239 &conn->c_path[0].cp_send_w, 0);
240 } 243 }
244 rcu_read_unlock();
241 } 245 }
242 246
243 spin_unlock_irqrestore(&rds_cong_lock, flags); 247 spin_unlock_irqrestore(&rds_cong_lock, flags);
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 1eed197e694f..b10c0ef36d8d 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -366,8 +366,6 @@ void rds_conn_shutdown(struct rds_conn_path *cp)
366 * to the conn hash, so we never trigger a reconnect on this 366 * to the conn hash, so we never trigger a reconnect on this
367 * conn - the reconnect is always triggered by the active peer. */ 367 * conn - the reconnect is always triggered by the active peer. */
368 cancel_delayed_work_sync(&cp->cp_conn_w); 368 cancel_delayed_work_sync(&cp->cp_conn_w);
369 if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
370 return;
371 rcu_read_lock(); 369 rcu_read_lock();
372 if (!hlist_unhashed(&conn->c_hash_node)) { 370 if (!hlist_unhashed(&conn->c_hash_node)) {
373 rcu_read_unlock(); 371 rcu_read_unlock();
@@ -390,6 +388,7 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
390 return; 388 return;
391 389
392 /* make sure lingering queued work won't try to ref the conn */ 390 /* make sure lingering queued work won't try to ref the conn */
391 synchronize_rcu();
393 cancel_delayed_work_sync(&cp->cp_send_w); 392 cancel_delayed_work_sync(&cp->cp_send_w);
394 cancel_delayed_work_sync(&cp->cp_recv_w); 393 cancel_delayed_work_sync(&cp->cp_recv_w);
395 394
@@ -407,6 +406,11 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
407 if (cp->cp_xmit_rm) 406 if (cp->cp_xmit_rm)
408 rds_message_put(cp->cp_xmit_rm); 407 rds_message_put(cp->cp_xmit_rm);
409 408
409 WARN_ON(delayed_work_pending(&cp->cp_send_w));
410 WARN_ON(delayed_work_pending(&cp->cp_recv_w));
411 WARN_ON(delayed_work_pending(&cp->cp_conn_w));
412 WARN_ON(work_pending(&cp->cp_down_w));
413
410 cp->cp_conn->c_trans->conn_free(cp->cp_transport_data); 414 cp->cp_conn->c_trans->conn_free(cp->cp_transport_data);
411} 415}
412 416
@@ -686,10 +690,13 @@ void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy)
686{ 690{
687 atomic_set(&cp->cp_state, RDS_CONN_ERROR); 691 atomic_set(&cp->cp_state, RDS_CONN_ERROR);
688 692
689 if (!destroy && test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) 693 rcu_read_lock();
694 if (!destroy && test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
695 rcu_read_unlock();
690 return; 696 return;
691 697 }
692 queue_work(rds_wq, &cp->cp_down_w); 698 queue_work(rds_wq, &cp->cp_down_w);
699 rcu_read_unlock();
693} 700}
694EXPORT_SYMBOL_GPL(rds_conn_path_drop); 701EXPORT_SYMBOL_GPL(rds_conn_path_drop);
695 702
@@ -706,9 +713,15 @@ EXPORT_SYMBOL_GPL(rds_conn_drop);
706 */ 713 */
707void rds_conn_path_connect_if_down(struct rds_conn_path *cp) 714void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
708{ 715{
716 rcu_read_lock();
717 if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
718 rcu_read_unlock();
719 return;
720 }
709 if (rds_conn_path_state(cp) == RDS_CONN_DOWN && 721 if (rds_conn_path_state(cp) == RDS_CONN_DOWN &&
710 !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags)) 722 !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags))
711 queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); 723 queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
724 rcu_read_unlock();
712} 725}
713EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down); 726EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down);
714 727
diff --git a/net/rds/send.c b/net/rds/send.c
index f72466c63f0c..d3e32d1f3c7d 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -162,6 +162,12 @@ restart:
162 goto out; 162 goto out;
163 } 163 }
164 164
165 if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
166 release_in_xmit(cp);
167 ret = -ENETUNREACH; /* dont requeue send work */
168 goto out;
169 }
170
165 /* 171 /*
166 * we record the send generation after doing the xmit acquire. 172 * we record the send generation after doing the xmit acquire.
167 * if someone else manages to jump in and do some work, we'll use 173 * if someone else manages to jump in and do some work, we'll use
@@ -437,7 +443,12 @@ over_batch:
437 !list_empty(&cp->cp_send_queue)) && !raced) { 443 !list_empty(&cp->cp_send_queue)) && !raced) {
438 if (batch_count < send_batch_count) 444 if (batch_count < send_batch_count)
439 goto restart; 445 goto restart;
440 queue_delayed_work(rds_wq, &cp->cp_send_w, 1); 446 rcu_read_lock();
447 if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
448 ret = -ENETUNREACH;
449 else
450 queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
451 rcu_read_unlock();
441 } else if (raced) { 452 } else if (raced) {
442 rds_stats_inc(s_send_lock_queue_raced); 453 rds_stats_inc(s_send_lock_queue_raced);
443 } 454 }
@@ -1151,6 +1162,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
1151 else 1162 else
1152 cpath = &conn->c_path[0]; 1163 cpath = &conn->c_path[0];
1153 1164
1165 if (test_bit(RDS_DESTROY_PENDING, &cpath->cp_flags)) {
1166 ret = -EAGAIN;
1167 goto out;
1168 }
1169
1154 rds_conn_path_connect_if_down(cpath); 1170 rds_conn_path_connect_if_down(cpath);
1155 1171
1156 ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); 1172 ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
@@ -1190,9 +1206,17 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
1190 rds_stats_inc(s_send_queued); 1206 rds_stats_inc(s_send_queued);
1191 1207
1192 ret = rds_send_xmit(cpath); 1208 ret = rds_send_xmit(cpath);
1193 if (ret == -ENOMEM || ret == -EAGAIN) 1209 if (ret == -ENOMEM || ret == -EAGAIN) {
1194 queue_delayed_work(rds_wq, &cpath->cp_send_w, 1); 1210 ret = 0;
1195 1211 rcu_read_lock();
1212 if (test_bit(RDS_DESTROY_PENDING, &cpath->cp_flags))
1213 ret = -ENETUNREACH;
1214 else
1215 queue_delayed_work(rds_wq, &cpath->cp_send_w, 1);
1216 rcu_read_unlock();
1217 }
1218 if (ret)
1219 goto out;
1196 rds_message_put(rm); 1220 rds_message_put(rm);
1197 return payload_len; 1221 return payload_len;
1198 1222
@@ -1270,7 +1294,10 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport,
1270 rds_stats_inc(s_send_pong); 1294 rds_stats_inc(s_send_pong);
1271 1295
1272 /* schedule the send work on rds_wq */ 1296 /* schedule the send work on rds_wq */
1273 queue_delayed_work(rds_wq, &cp->cp_send_w, 1); 1297 rcu_read_lock();
1298 if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
1299 queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
1300 rcu_read_unlock();
1274 1301
1275 rds_message_put(rm); 1302 rds_message_put(rm);
1276 return 0; 1303 return 0;
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index e006ef8e6d40..dd707b9e73e5 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -321,8 +321,12 @@ void rds_tcp_data_ready(struct sock *sk)
321 ready = tc->t_orig_data_ready; 321 ready = tc->t_orig_data_ready;
322 rds_tcp_stats_inc(s_tcp_data_ready_calls); 322 rds_tcp_stats_inc(s_tcp_data_ready_calls);
323 323
324 if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) 324 if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) {
325 queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); 325 rcu_read_lock();
326 if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
327 queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
328 rcu_read_unlock();
329 }
326out: 330out:
327 read_unlock_bh(&sk->sk_callback_lock); 331 read_unlock_bh(&sk->sk_callback_lock);
328 ready(sk); 332 ready(sk);
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index dc860d1bb608..73c74763ca72 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -202,8 +202,11 @@ void rds_tcp_write_space(struct sock *sk)
202 tc->t_last_seen_una = rds_tcp_snd_una(tc); 202 tc->t_last_seen_una = rds_tcp_snd_una(tc);
203 rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked); 203 rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked);
204 204
205 if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) 205 rcu_read_lock();
206 if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf &&
207 !test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
206 queue_delayed_work(rds_wq, &cp->cp_send_w, 0); 208 queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
209 rcu_read_unlock();
207 210
208out: 211out:
209 read_unlock_bh(&sk->sk_callback_lock); 212 read_unlock_bh(&sk->sk_callback_lock);
diff --git a/net/rds/threads.c b/net/rds/threads.c
index f121daa402c8..eb76db1360b0 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -87,8 +87,12 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
87 87
88 cp->cp_reconnect_jiffies = 0; 88 cp->cp_reconnect_jiffies = 0;
89 set_bit(0, &cp->cp_conn->c_map_queued); 89 set_bit(0, &cp->cp_conn->c_map_queued);
90 queue_delayed_work(rds_wq, &cp->cp_send_w, 0); 90 rcu_read_lock();
91 queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); 91 if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
92 queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
93 queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
94 }
95 rcu_read_unlock();
92} 96}
93EXPORT_SYMBOL_GPL(rds_connect_path_complete); 97EXPORT_SYMBOL_GPL(rds_connect_path_complete);
94 98
@@ -133,7 +137,10 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
133 set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); 137 set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
134 if (cp->cp_reconnect_jiffies == 0) { 138 if (cp->cp_reconnect_jiffies == 0) {
135 cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; 139 cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
136 queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); 140 rcu_read_lock();
141 if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
142 queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
143 rcu_read_unlock();
137 return; 144 return;
138 } 145 }
139 146
@@ -141,8 +148,11 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
141 rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n", 148 rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n",
142 rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies, 149 rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,
143 conn, &conn->c_laddr, &conn->c_faddr); 150 conn, &conn->c_laddr, &conn->c_faddr);
144 queue_delayed_work(rds_wq, &cp->cp_conn_w, 151 rcu_read_lock();
145 rand % cp->cp_reconnect_jiffies); 152 if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
153 queue_delayed_work(rds_wq, &cp->cp_conn_w,
154 rand % cp->cp_reconnect_jiffies);
155 rcu_read_unlock();
146 156
147 cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2, 157 cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2,
148 rds_sysctl_reconnect_max_jiffies); 158 rds_sysctl_reconnect_max_jiffies);