aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorSowmini Varadhan <sowmini.varadhan@oracle.com>2016-06-04 17:00:00 -0400
committerDavid S. Miller <davem@davemloft.net>2016-06-07 18:10:15 -0400
commit9c79440e2c5e2518879f1599270f64c3ddda3baf (patch)
tree69cbc888d7d91fe1e3c197443f608e35d54a4c30 /net
parent0b6f760cff04a7cdfafc3ec6915e91fed0533d8d (diff)
RDS: TCP: fix race windows in send-path quiescence by rds_tcp_accept_one()
The send path needs to be quiesced before resetting callbacks from rds_tcp_accept_one(), and commit eb192840266f ("RDS:TCP: Synchronize rds_tcp_accept_one with rds_send_xmit when resetting t_sock") achieves this using the c_state and RDS_IN_XMIT bit following the pattern used by rds_conn_shutdown(). However this leaves the possibility of a race window as shown in the sequence below take t_conn_lock in rds_tcp_conn_connect send outgoing syn to peer drop t_conn_lock in rds_tcp_conn_connect incoming from peer triggers rds_tcp_accept_one, conn is marked CONNECTING wait for RDS_IN_XMIT to quiesce any rds_send_xmit threads call rds_tcp_reset_callbacks [.. race-window where incoming syn-ack can cause the conn to be marked UP from rds_tcp_state_change ..] lock_sock called from rds_tcp_reset_callbacks, and we set t_sock to null As soon as the conn is marked UP in the race-window above, rds_send_xmit() threads will proceed to rds_tcp_xmit and may encounter a null-pointer deref on the t_sock. Given that rds_tcp_state_change() is invoked in softirq context, whereas rds_tcp_reset_callbacks() is in workq context, and testing for RDS_IN_XMIT after lock_sock could result in a deadlock with tcp_sendmsg, this commit fixes the race by using a new c_state, RDS_TCP_RESETTING, which will prevent a transition to RDS_CONN_UP from rds_tcp_state_change(). Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/rds/rds.h2
-rw-r--r--net/rds/tcp.c14
-rw-r--r--net/rds/tcp_connect.c2
-rw-r--r--net/rds/tcp_listen.c7
-rw-r--r--net/rds/threads.c10
5 files changed, 27 insertions, 8 deletions
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 80256b08eac0..387df5f32e49 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -74,6 +74,7 @@ enum {
74 RDS_CONN_CONNECTING, 74 RDS_CONN_CONNECTING,
75 RDS_CONN_DISCONNECTING, 75 RDS_CONN_DISCONNECTING,
76 RDS_CONN_UP, 76 RDS_CONN_UP,
77 RDS_CONN_RESETTING,
77 RDS_CONN_ERROR, 78 RDS_CONN_ERROR,
78}; 79};
79 80
@@ -813,6 +814,7 @@ void rds_connect_worker(struct work_struct *);
813void rds_shutdown_worker(struct work_struct *); 814void rds_shutdown_worker(struct work_struct *);
814void rds_send_worker(struct work_struct *); 815void rds_send_worker(struct work_struct *);
815void rds_recv_worker(struct work_struct *); 816void rds_recv_worker(struct work_struct *);
817void rds_connect_path_complete(struct rds_connection *conn, int curr);
816void rds_connect_complete(struct rds_connection *conn); 818void rds_connect_complete(struct rds_connection *conn);
817 819
818/* transport.c */ 820/* transport.c */
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 7ab1b41ffc88..74ee126a6fe6 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -148,11 +148,23 @@ void rds_tcp_reset_callbacks(struct socket *sock,
148 * potentially have transitioned to the RDS_CONN_UP state, 148 * potentially have transitioned to the RDS_CONN_UP state,
149 * so we must quiesce any send threads before resetting 149 * so we must quiesce any send threads before resetting
150 * c_transport_data. We quiesce these threads by setting 150 * c_transport_data. We quiesce these threads by setting
151 * cp_state to something other than RDS_CONN_UP, and then 151 * c_state to something other than RDS_CONN_UP, and then
152 * waiting for any existing threads in rds_send_xmit to 152 * waiting for any existing threads in rds_send_xmit to
153 * complete release_in_xmit(). (Subsequent threads entering 153 * complete release_in_xmit(). (Subsequent threads entering
154 * rds_send_xmit() will bail on !rds_conn_up(). 154 * rds_send_xmit() will bail on !rds_conn_up().
155 *
156 * However an incoming syn-ack at this point would end up
157 * marking the conn as RDS_CONN_UP, and would again permit
158 * rds_send_xmi() threads through, so ideally we would
159 * synchronize on RDS_CONN_UP after lock_sock(), but cannot
160 * do that: waiting on !RDS_IN_XMIT after lock_sock() may
161 * end up deadlocking with tcp_sendmsg(), and the RDS_IN_XMIT
162 * would not get set. As a result, we set c_state to
163 * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change
164 * cannot mark rds_conn_path_up() in the window before lock_sock()
155 */ 165 */
166 atomic_set(&conn->c_state, RDS_CONN_RESETTING);
167 wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags));
156 lock_sock(osock->sk); 168 lock_sock(osock->sk);
157 /* reset receive side state for rds_tcp_data_recv() for osock */ 169 /* reset receive side state for rds_tcp_data_recv() for osock */
158 if (tc->t_tinc) { 170 if (tc->t_tinc) {
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index fb82e0a0bf89..fba13d0305fb 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -60,7 +60,7 @@ void rds_tcp_state_change(struct sock *sk)
60 case TCP_SYN_RECV: 60 case TCP_SYN_RECV:
61 break; 61 break;
62 case TCP_ESTABLISHED: 62 case TCP_ESTABLISHED:
63 rds_connect_complete(conn); 63 rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
64 break; 64 break;
65 case TCP_CLOSE_WAIT: 65 case TCP_CLOSE_WAIT:
66 case TCP_CLOSE: 66 case TCP_CLOSE:
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index d9fe53675d95..686b1d03a558 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -135,16 +135,15 @@ int rds_tcp_accept_one(struct socket *sock)
135 !conn->c_outgoing) { 135 !conn->c_outgoing) {
136 goto rst_nsk; 136 goto rst_nsk;
137 } else { 137 } else {
138 atomic_set(&conn->c_state, RDS_CONN_CONNECTING);
139 wait_event(conn->c_waitq,
140 !test_bit(RDS_IN_XMIT, &conn->c_flags));
141 rds_tcp_reset_callbacks(new_sock, conn); 138 rds_tcp_reset_callbacks(new_sock, conn);
142 conn->c_outgoing = 0; 139 conn->c_outgoing = 0;
140 /* rds_connect_path_complete() marks RDS_CONN_UP */
141 rds_connect_path_complete(conn, RDS_CONN_DISCONNECTING);
143 } 142 }
144 } else { 143 } else {
145 rds_tcp_set_callbacks(new_sock, conn); 144 rds_tcp_set_callbacks(new_sock, conn);
145 rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
146 } 146 }
147 rds_connect_complete(conn); /* marks RDS_CONN_UP */
148 new_sock = NULL; 147 new_sock = NULL;
149 ret = 0; 148 ret = 0;
150 goto out; 149 goto out;
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 454aa6d23327..4a323045719b 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -71,9 +71,9 @@
71struct workqueue_struct *rds_wq; 71struct workqueue_struct *rds_wq;
72EXPORT_SYMBOL_GPL(rds_wq); 72EXPORT_SYMBOL_GPL(rds_wq);
73 73
74void rds_connect_complete(struct rds_connection *conn) 74void rds_connect_path_complete(struct rds_connection *conn, int curr)
75{ 75{
76 if (!rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_UP)) { 76 if (!rds_conn_transition(conn, curr, RDS_CONN_UP)) {
77 printk(KERN_WARNING "%s: Cannot transition to state UP, " 77 printk(KERN_WARNING "%s: Cannot transition to state UP, "
78 "current state is %d\n", 78 "current state is %d\n",
79 __func__, 79 __func__,
@@ -90,6 +90,12 @@ void rds_connect_complete(struct rds_connection *conn)
90 queue_delayed_work(rds_wq, &conn->c_send_w, 0); 90 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
91 queue_delayed_work(rds_wq, &conn->c_recv_w, 0); 91 queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
92} 92}
93EXPORT_SYMBOL_GPL(rds_connect_path_complete);
94
95void rds_connect_complete(struct rds_connection *conn)
96{
97 rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
98}
93EXPORT_SYMBOL_GPL(rds_connect_complete); 99EXPORT_SYMBOL_GPL(rds_connect_complete);
94 100
95/* 101/*