diff options
author | Sowmini Varadhan <sowmini.varadhan@oracle.com> | 2016-06-04 17:00:00 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-06-07 18:10:15 -0400 |
commit | 9c79440e2c5e2518879f1599270f64c3ddda3baf (patch) | |
tree | 69cbc888d7d91fe1e3c197443f608e35d54a4c30 /net | |
parent | 0b6f760cff04a7cdfafc3ec6915e91fed0533d8d (diff) |
RDS: TCP: fix race windows in send-path quiescence by rds_tcp_accept_one()
The send path needs to be quiesced before resetting callbacks from
rds_tcp_accept_one(), and commit eb192840266f ("RDS:TCP: Synchronize
rds_tcp_accept_one with rds_send_xmit when resetting t_sock") achieves
this using the c_state and RDS_IN_XMIT bit following the pattern
used by rds_conn_shutdown(). However this leaves the possibility
of a race window as shown in the sequence below
take t_conn_lock in rds_tcp_conn_connect
send outgoing syn to peer
drop t_conn_lock in rds_tcp_conn_connect
incoming from peer triggers rds_tcp_accept_one, conn is
marked CONNECTING
wait for RDS_IN_XMIT to quiesce any rds_send_xmit threads
call rds_tcp_reset_callbacks
[.. race-window where incoming syn-ack can cause the conn
to be marked UP from rds_tcp_state_change ..]
lock_sock called from rds_tcp_reset_callbacks, and we set
t_sock to null
As soon as the conn is marked UP in the race-window above, rds_send_xmit()
threads will proceed to rds_tcp_xmit and may encounter a null-pointer
deref on the t_sock.
Given that rds_tcp_state_change() is invoked in softirq context, whereas
rds_tcp_reset_callbacks() is in workq context, and testing for RDS_IN_XMIT
after lock_sock could result in a deadlock with tcp_sendmsg, this
commit fixes the race by using a new c_state, RDS_TCP_RESETTING, which
will prevent a transition to RDS_CONN_UP from rds_tcp_state_change().
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/rds/rds.h | 2 | ||||
-rw-r--r-- | net/rds/tcp.c | 14 | ||||
-rw-r--r-- | net/rds/tcp_connect.c | 2 | ||||
-rw-r--r-- | net/rds/tcp_listen.c | 7 | ||||
-rw-r--r-- | net/rds/threads.c | 10 |
5 files changed, 27 insertions, 8 deletions
diff --git a/net/rds/rds.h b/net/rds/rds.h index 80256b08eac0..387df5f32e49 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h | |||
@@ -74,6 +74,7 @@ enum { | |||
74 | RDS_CONN_CONNECTING, | 74 | RDS_CONN_CONNECTING, |
75 | RDS_CONN_DISCONNECTING, | 75 | RDS_CONN_DISCONNECTING, |
76 | RDS_CONN_UP, | 76 | RDS_CONN_UP, |
77 | RDS_CONN_RESETTING, | ||
77 | RDS_CONN_ERROR, | 78 | RDS_CONN_ERROR, |
78 | }; | 79 | }; |
79 | 80 | ||
@@ -813,6 +814,7 @@ void rds_connect_worker(struct work_struct *); | |||
813 | void rds_shutdown_worker(struct work_struct *); | 814 | void rds_shutdown_worker(struct work_struct *); |
814 | void rds_send_worker(struct work_struct *); | 815 | void rds_send_worker(struct work_struct *); |
815 | void rds_recv_worker(struct work_struct *); | 816 | void rds_recv_worker(struct work_struct *); |
817 | void rds_connect_path_complete(struct rds_connection *conn, int curr); | ||
816 | void rds_connect_complete(struct rds_connection *conn); | 818 | void rds_connect_complete(struct rds_connection *conn); |
817 | 819 | ||
818 | /* transport.c */ | 820 | /* transport.c */ |
diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 7ab1b41ffc88..74ee126a6fe6 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c | |||
@@ -148,11 +148,23 @@ void rds_tcp_reset_callbacks(struct socket *sock, | |||
148 | * potentially have transitioned to the RDS_CONN_UP state, | 148 | * potentially have transitioned to the RDS_CONN_UP state, |
149 | * so we must quiesce any send threads before resetting | 149 | * so we must quiesce any send threads before resetting |
150 | * c_transport_data. We quiesce these threads by setting | 150 | * c_transport_data. We quiesce these threads by setting |
151 | * cp_state to something other than RDS_CONN_UP, and then | 151 | * c_state to something other than RDS_CONN_UP, and then |
152 | * waiting for any existing threads in rds_send_xmit to | 152 | * waiting for any existing threads in rds_send_xmit to |
153 | * complete release_in_xmit(). (Subsequent threads entering | 153 | * complete release_in_xmit(). (Subsequent threads entering |
154 | * rds_send_xmit() will bail on !rds_conn_up(). | 154 | * rds_send_xmit() will bail on !rds_conn_up(). |
155 | * | ||
156 | * However an incoming syn-ack at this point would end up | ||
157 | * marking the conn as RDS_CONN_UP, and would again permit | ||
158 | * rds_send_xmi() threads through, so ideally we would | ||
159 | * synchronize on RDS_CONN_UP after lock_sock(), but cannot | ||
160 | * do that: waiting on !RDS_IN_XMIT after lock_sock() may | ||
161 | * end up deadlocking with tcp_sendmsg(), and the RDS_IN_XMIT | ||
162 | * would not get set. As a result, we set c_state to | ||
163 | * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change | ||
164 | * cannot mark rds_conn_path_up() in the window before lock_sock() | ||
155 | */ | 165 | */ |
166 | atomic_set(&conn->c_state, RDS_CONN_RESETTING); | ||
167 | wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags)); | ||
156 | lock_sock(osock->sk); | 168 | lock_sock(osock->sk); |
157 | /* reset receive side state for rds_tcp_data_recv() for osock */ | 169 | /* reset receive side state for rds_tcp_data_recv() for osock */ |
158 | if (tc->t_tinc) { | 170 | if (tc->t_tinc) { |
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index fb82e0a0bf89..fba13d0305fb 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c | |||
@@ -60,7 +60,7 @@ void rds_tcp_state_change(struct sock *sk) | |||
60 | case TCP_SYN_RECV: | 60 | case TCP_SYN_RECV: |
61 | break; | 61 | break; |
62 | case TCP_ESTABLISHED: | 62 | case TCP_ESTABLISHED: |
63 | rds_connect_complete(conn); | 63 | rds_connect_path_complete(conn, RDS_CONN_CONNECTING); |
64 | break; | 64 | break; |
65 | case TCP_CLOSE_WAIT: | 65 | case TCP_CLOSE_WAIT: |
66 | case TCP_CLOSE: | 66 | case TCP_CLOSE: |
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index d9fe53675d95..686b1d03a558 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c | |||
@@ -135,16 +135,15 @@ int rds_tcp_accept_one(struct socket *sock) | |||
135 | !conn->c_outgoing) { | 135 | !conn->c_outgoing) { |
136 | goto rst_nsk; | 136 | goto rst_nsk; |
137 | } else { | 137 | } else { |
138 | atomic_set(&conn->c_state, RDS_CONN_CONNECTING); | ||
139 | wait_event(conn->c_waitq, | ||
140 | !test_bit(RDS_IN_XMIT, &conn->c_flags)); | ||
141 | rds_tcp_reset_callbacks(new_sock, conn); | 138 | rds_tcp_reset_callbacks(new_sock, conn); |
142 | conn->c_outgoing = 0; | 139 | conn->c_outgoing = 0; |
140 | /* rds_connect_path_complete() marks RDS_CONN_UP */ | ||
141 | rds_connect_path_complete(conn, RDS_CONN_DISCONNECTING); | ||
143 | } | 142 | } |
144 | } else { | 143 | } else { |
145 | rds_tcp_set_callbacks(new_sock, conn); | 144 | rds_tcp_set_callbacks(new_sock, conn); |
145 | rds_connect_path_complete(conn, RDS_CONN_CONNECTING); | ||
146 | } | 146 | } |
147 | rds_connect_complete(conn); /* marks RDS_CONN_UP */ | ||
148 | new_sock = NULL; | 147 | new_sock = NULL; |
149 | ret = 0; | 148 | ret = 0; |
150 | goto out; | 149 | goto out; |
diff --git a/net/rds/threads.c b/net/rds/threads.c index 454aa6d23327..4a323045719b 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c | |||
@@ -71,9 +71,9 @@ | |||
71 | struct workqueue_struct *rds_wq; | 71 | struct workqueue_struct *rds_wq; |
72 | EXPORT_SYMBOL_GPL(rds_wq); | 72 | EXPORT_SYMBOL_GPL(rds_wq); |
73 | 73 | ||
74 | void rds_connect_complete(struct rds_connection *conn) | 74 | void rds_connect_path_complete(struct rds_connection *conn, int curr) |
75 | { | 75 | { |
76 | if (!rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_UP)) { | 76 | if (!rds_conn_transition(conn, curr, RDS_CONN_UP)) { |
77 | printk(KERN_WARNING "%s: Cannot transition to state UP, " | 77 | printk(KERN_WARNING "%s: Cannot transition to state UP, " |
78 | "current state is %d\n", | 78 | "current state is %d\n", |
79 | __func__, | 79 | __func__, |
@@ -90,6 +90,12 @@ void rds_connect_complete(struct rds_connection *conn) | |||
90 | queue_delayed_work(rds_wq, &conn->c_send_w, 0); | 90 | queue_delayed_work(rds_wq, &conn->c_send_w, 0); |
91 | queue_delayed_work(rds_wq, &conn->c_recv_w, 0); | 91 | queue_delayed_work(rds_wq, &conn->c_recv_w, 0); |
92 | } | 92 | } |
93 | EXPORT_SYMBOL_GPL(rds_connect_path_complete); | ||
94 | |||
95 | void rds_connect_complete(struct rds_connection *conn) | ||
96 | { | ||
97 | rds_connect_path_complete(conn, RDS_CONN_CONNECTING); | ||
98 | } | ||
93 | EXPORT_SYMBOL_GPL(rds_connect_complete); | 99 | EXPORT_SYMBOL_GPL(rds_connect_complete); |
94 | 100 | ||
95 | /* | 101 | /* |