aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorSowmini Varadhan <sowmini.varadhan@oracle.com>2016-06-04 16:59:58 -0400
committerDavid S. Miller <davem@davemloft.net>2016-06-07 18:10:15 -0400
commit335b48d980f631fbc5b233cbb3625ac0c86d67cb (patch)
treec09223b2cdbe5b6a9d876ad07a9ee29cebcb8588 /net
parent80e509db54c81247b32fcb75bb1730fc789b893d (diff)
RDS: TCP: Add/use rds_tcp_reset_callbacks to reset tcp socket safely
When rds_tcp_accept_one() has to replace the existing tcp socket with a newer tcp socket (duelling-syn resolution), it must lock_sock() to suppress the rds_tcp_data_recv() path while callbacks are being changed. Also, existing RDS datagram reassembly state must be reset, so that the next datagram on the new socket does not have corrupted state. Similarly when resetting the newly accepted socket, appropriate locks and synchronization is needed. This commit ensures correct synchronization by invoking kernel_sock_shutdown to reset a newly accepted sock, and by taking appropriate lock_sock()s (for old and new sockets) when resetting existing callbacks. Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/rds/tcp.c65
-rw-r--r--net/rds/tcp.h1
-rw-r--r--net/rds/tcp_listen.c13
3 files changed, 67 insertions, 12 deletions
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 86187dad1440..8faa0b1ae39d 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -126,9 +126,68 @@ void rds_tcp_restore_callbacks(struct socket *sock,
126} 126}
127 127
128/* 128/*
129 * This is the only path that sets tc->t_sock. Send and receive trust that 129 * rds_tcp_reset_callbacks() switches the to the new sock and
130 * it is set. The RDS_CONN_UP bit protects those paths from being 130 * returns the existing tc->t_sock.
131 * called while it isn't set. 131 *
132 * The only functions that set tc->t_sock are rds_tcp_set_callbacks
133 * and rds_tcp_reset_callbacks. Send and receive trust that
134 * it is set. The absence of RDS_CONN_UP bit protects those paths
135 * from being called while it isn't set.
136 */
137void rds_tcp_reset_callbacks(struct socket *sock,
138 struct rds_connection *conn)
139{
140 struct rds_tcp_connection *tc = conn->c_transport_data;
141 struct socket *osock = tc->t_sock;
142
143 if (!osock)
144 goto newsock;
145
146 /* Need to resolve a duelling SYN between peers.
147 * We have an outstanding SYN to this peer, which may
148 * potentially have transitioned to the RDS_CONN_UP state,
149 * so we must quiesce any send threads before resetting
150 * c_transport_data. We quiesce these threads by setting
151 * cp_state to something other than RDS_CONN_UP, and then
152 * waiting for any existing threads in rds_send_xmit to
153 * complete release_in_xmit(). (Subsequent threads entering
154 * rds_send_xmit() will bail on !rds_conn_up().
155 */
156 lock_sock(osock->sk);
157 /* reset receive side state for rds_tcp_data_recv() for osock */
158 if (tc->t_tinc) {
159 rds_inc_put(&tc->t_tinc->ti_inc);
160 tc->t_tinc = NULL;
161 }
162 tc->t_tinc_hdr_rem = sizeof(struct rds_header);
163 tc->t_tinc_data_rem = 0;
164 tc->t_sock = NULL;
165
166 write_lock_bh(&osock->sk->sk_callback_lock);
167
168 osock->sk->sk_user_data = NULL;
169 osock->sk->sk_data_ready = tc->t_orig_data_ready;
170 osock->sk->sk_write_space = tc->t_orig_write_space;
171 osock->sk->sk_state_change = tc->t_orig_state_change;
172 write_unlock_bh(&osock->sk->sk_callback_lock);
173 release_sock(osock->sk);
174 sock_release(osock);
175newsock:
176 lock_sock(sock->sk);
177 write_lock_bh(&sock->sk->sk_callback_lock);
178 tc->t_sock = sock;
179 sock->sk->sk_user_data = conn;
180 sock->sk->sk_data_ready = rds_tcp_data_ready;
181 sock->sk->sk_write_space = rds_tcp_write_space;
182 sock->sk->sk_state_change = rds_tcp_state_change;
183
184 write_unlock_bh(&sock->sk->sk_callback_lock);
185 release_sock(sock->sk);
186}
187
188/* Add tc to rds_tcp_tc_list and set tc->t_sock. See comments
189 * above rds_tcp_reset_callbacks for notes about synchronization
190 * with data path
132 */ 191 */
133void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) 192void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
134{ 193{
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 41c228300525..ec0602b0dc24 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -50,6 +50,7 @@ struct rds_tcp_statistics {
50void rds_tcp_tune(struct socket *sock); 50void rds_tcp_tune(struct socket *sock);
51void rds_tcp_nonagle(struct socket *sock); 51void rds_tcp_nonagle(struct socket *sock);
52void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn); 52void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn);
53void rds_tcp_reset_callbacks(struct socket *sock, struct rds_connection *conn);
53void rds_tcp_restore_callbacks(struct socket *sock, 54void rds_tcp_restore_callbacks(struct socket *sock,
54 struct rds_tcp_connection *tc); 55 struct rds_tcp_connection *tc);
55u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc); 56u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc);
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 4bf4befe5066..d9fe53675d95 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -78,7 +78,6 @@ int rds_tcp_accept_one(struct socket *sock)
78 struct inet_sock *inet; 78 struct inet_sock *inet;
79 struct rds_tcp_connection *rs_tcp = NULL; 79 struct rds_tcp_connection *rs_tcp = NULL;
80 int conn_state; 80 int conn_state;
81 struct sock *nsk;
82 81
83 if (!sock) /* module unload or netns delete in progress */ 82 if (!sock) /* module unload or netns delete in progress */
84 return -ENETUNREACH; 83 return -ENETUNREACH;
@@ -139,23 +138,19 @@ int rds_tcp_accept_one(struct socket *sock)
139 atomic_set(&conn->c_state, RDS_CONN_CONNECTING); 138 atomic_set(&conn->c_state, RDS_CONN_CONNECTING);
140 wait_event(conn->c_waitq, 139 wait_event(conn->c_waitq,
141 !test_bit(RDS_IN_XMIT, &conn->c_flags)); 140 !test_bit(RDS_IN_XMIT, &conn->c_flags));
142 rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp); 141 rds_tcp_reset_callbacks(new_sock, conn);
143 conn->c_outgoing = 0; 142 conn->c_outgoing = 0;
144 } 143 }
144 } else {
145 rds_tcp_set_callbacks(new_sock, conn);
145 } 146 }
146 rds_tcp_set_callbacks(new_sock, conn);
147 rds_connect_complete(conn); /* marks RDS_CONN_UP */ 147 rds_connect_complete(conn); /* marks RDS_CONN_UP */
148 new_sock = NULL; 148 new_sock = NULL;
149 ret = 0; 149 ret = 0;
150 goto out; 150 goto out;
151rst_nsk: 151rst_nsk:
152 /* reset the newly returned accept sock and bail */ 152 /* reset the newly returned accept sock and bail */
153 nsk = new_sock->sk; 153 kernel_sock_shutdown(new_sock, SHUT_RDWR);
154 rds_tcp_stats_inc(s_tcp_listen_closed_stale);
155 nsk->sk_user_data = NULL;
156 nsk->sk_prot->disconnect(nsk, 0);
157 tcp_done(nsk);
158 new_sock = NULL;
159 ret = 0; 154 ret = 0;
160out: 155out:
161 if (rs_tcp) 156 if (rs_tcp)