aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-10-16 03:52:27 -0400
committerDavid S. Miller <davem@davemloft.net>2015-10-16 03:52:27 -0400
commita302afe980ca6b25b2bae0b5bc816fe1dc1bb039 (patch)
treebf2c3efa6f72141e1824bcd075fcfcc250475476
parent47ea0325337b166c1c8695119aa6e83cdc035ef5 (diff)
parentebb516af60e18258aac8e80bbe068740ef1579ed (diff)
Merge branch 'robust_listener'
Eric Dumazet says: ==================== tcp/dccp: make our listener code more robust This patch series addresses request sockets leaks and listener dismantle phase. This survives a stress test with listeners being added/removed quite randomly. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/inet_connection_sock.h10
-rw-r--r--include/net/request_sock.h19
-rw-r--r--net/dccp/ipv4.c4
-rw-r--r--net/dccp/ipv6.c2
-rw-r--r--net/ipv4/inet_connection_sock.c81
-rw-r--r--net/ipv4/tcp_ipv4.c6
-rw-r--r--net/ipv6/tcp_ipv6.c2
7 files changed, 67 insertions, 57 deletions
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index fd645c49e71e..63615709839d 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -268,13 +268,8 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
268 struct sock *newsk, 268 struct sock *newsk,
269 const struct request_sock *req); 269 const struct request_sock *req);
270 270
271static inline void inet_csk_reqsk_queue_add(struct sock *sk, 271void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
272 struct request_sock *req, 272 struct sock *child);
273 struct sock *child)
274{
275 reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child);
276}
277
278void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 273void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
279 unsigned long timeout); 274 unsigned long timeout);
280 275
@@ -299,6 +294,7 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
299} 294}
300 295
301void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req); 296void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req);
297void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req);
302 298
303void inet_csk_destroy_sock(struct sock *sk); 299void inet_csk_destroy_sock(struct sock *sk);
304void inet_csk_prepare_forced_close(struct sock *sk); 300void inet_csk_prepare_forced_close(struct sock *sk);
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 2e73748956d5..a0dde04eb178 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -186,25 +186,6 @@ static inline bool reqsk_queue_empty(const struct request_sock_queue *queue)
186 return queue->rskq_accept_head == NULL; 186 return queue->rskq_accept_head == NULL;
187} 187}
188 188
189static inline void reqsk_queue_add(struct request_sock_queue *queue,
190 struct request_sock *req,
191 struct sock *parent,
192 struct sock *child)
193{
194 spin_lock(&queue->rskq_lock);
195 req->sk = child;
196 sk_acceptq_added(parent);
197
198 if (queue->rskq_accept_head == NULL)
199 queue->rskq_accept_head = req;
200 else
201 queue->rskq_accept_tail->dl_next = req;
202
203 queue->rskq_accept_tail = req;
204 req->dl_next = NULL;
205 spin_unlock(&queue->rskq_lock);
206}
207
208static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue, 189static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue,
209 struct sock *parent) 190 struct sock *parent)
210{ 191{
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 0dcf1963b323..59bc180b02d8 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -208,7 +208,6 @@ void dccp_req_err(struct sock *sk, u64 seq)
208 208
209 if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { 209 if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) {
210 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 210 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
211 reqsk_put(req);
212 } else { 211 } else {
213 /* 212 /*
214 * Still in RESPOND, just remove it silently. 213 * Still in RESPOND, just remove it silently.
@@ -218,6 +217,7 @@ void dccp_req_err(struct sock *sk, u64 seq)
218 */ 217 */
219 inet_csk_reqsk_queue_drop(req->rsk_listener, req); 218 inet_csk_reqsk_queue_drop(req->rsk_listener, req);
220 } 219 }
220 reqsk_put(req);
221} 221}
222EXPORT_SYMBOL(dccp_req_err); 222EXPORT_SYMBOL(dccp_req_err);
223 223
@@ -828,7 +828,7 @@ lookup:
828 if (likely(sk->sk_state == DCCP_LISTEN)) { 828 if (likely(sk->sk_state == DCCP_LISTEN)) {
829 nsk = dccp_check_req(sk, skb, req); 829 nsk = dccp_check_req(sk, skb, req);
830 } else { 830 } else {
831 inet_csk_reqsk_queue_drop(sk, req); 831 inet_csk_reqsk_queue_drop_and_put(sk, req);
832 goto lookup; 832 goto lookup;
833 } 833 }
834 if (!nsk) { 834 if (!nsk) {
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 68831931b1fe..d9cc731f2619 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -686,7 +686,7 @@ lookup:
686 if (likely(sk->sk_state == DCCP_LISTEN)) { 686 if (likely(sk->sk_state == DCCP_LISTEN)) {
687 nsk = dccp_check_req(sk, skb, req); 687 nsk = dccp_check_req(sk, skb, req);
688 } else { 688 } else {
689 inet_csk_reqsk_queue_drop(sk, req); 689 inet_csk_reqsk_queue_drop_and_put(sk, req);
690 goto lookup; 690 goto lookup;
691 } 691 }
692 if (!nsk) { 692 if (!nsk) {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index ba9ec9a0d0ce..8430bc8ccd58 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -546,6 +546,13 @@ void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
546} 546}
547EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); 547EXPORT_SYMBOL(inet_csk_reqsk_queue_drop);
548 548
549void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req)
550{
551 inet_csk_reqsk_queue_drop(sk, req);
552 reqsk_put(req);
553}
554EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
555
549static void reqsk_timer_handler(unsigned long data) 556static void reqsk_timer_handler(unsigned long data)
550{ 557{
551 struct request_sock *req = (struct request_sock *)data; 558 struct request_sock *req = (struct request_sock *)data;
@@ -608,8 +615,7 @@ static void reqsk_timer_handler(unsigned long data)
608 return; 615 return;
609 } 616 }
610drop: 617drop:
611 inet_csk_reqsk_queue_drop(sk_listener, req); 618 inet_csk_reqsk_queue_drop_and_put(sk_listener, req);
612 reqsk_put(req);
613} 619}
614 620
615static void reqsk_queue_hash_req(struct request_sock *req, 621static void reqsk_queue_hash_req(struct request_sock *req,
@@ -758,6 +764,53 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
758} 764}
759EXPORT_SYMBOL_GPL(inet_csk_listen_start); 765EXPORT_SYMBOL_GPL(inet_csk_listen_start);
760 766
767static void inet_child_forget(struct sock *sk, struct request_sock *req,
768 struct sock *child)
769{
770 sk->sk_prot->disconnect(child, O_NONBLOCK);
771
772 sock_orphan(child);
773
774 percpu_counter_inc(sk->sk_prot->orphan_count);
775
776 if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
777 BUG_ON(tcp_sk(child)->fastopen_rsk != req);
778 BUG_ON(sk != req->rsk_listener);
779
780 /* Paranoid, to prevent race condition if
781 * an inbound pkt destined for child is
782 * blocked by sock lock in tcp_v4_rcv().
783 * Also to satisfy an assertion in
784 * tcp_v4_destroy_sock().
785 */
786 tcp_sk(child)->fastopen_rsk = NULL;
787 }
788 inet_csk_destroy_sock(child);
789 reqsk_put(req);
790}
791
792void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
793 struct sock *child)
794{
795 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
796
797 spin_lock(&queue->rskq_lock);
798 if (unlikely(sk->sk_state != TCP_LISTEN)) {
799 inet_child_forget(sk, req, child);
800 } else {
801 req->sk = child;
802 req->dl_next = NULL;
803 if (queue->rskq_accept_head == NULL)
804 queue->rskq_accept_head = req;
805 else
806 queue->rskq_accept_tail->dl_next = req;
807 queue->rskq_accept_tail = req;
808 sk_acceptq_added(sk);
809 }
810 spin_unlock(&queue->rskq_lock);
811}
812EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
813
761/* 814/*
762 * This routine closes sockets which have been at least partially 815 * This routine closes sockets which have been at least partially
763 * opened, but not yet accepted. 816 * opened, but not yet accepted.
@@ -784,31 +837,11 @@ void inet_csk_listen_stop(struct sock *sk)
784 WARN_ON(sock_owned_by_user(child)); 837 WARN_ON(sock_owned_by_user(child));
785 sock_hold(child); 838 sock_hold(child);
786 839
787 sk->sk_prot->disconnect(child, O_NONBLOCK); 840 inet_child_forget(sk, req, child);
788
789 sock_orphan(child);
790
791 percpu_counter_inc(sk->sk_prot->orphan_count);
792
793 if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
794 BUG_ON(tcp_sk(child)->fastopen_rsk != req);
795 BUG_ON(sk != req->rsk_listener);
796
797 /* Paranoid, to prevent race condition if
798 * an inbound pkt destined for child is
799 * blocked by sock lock in tcp_v4_rcv().
800 * Also to satisfy an assertion in
801 * tcp_v4_destroy_sock().
802 */
803 tcp_sk(child)->fastopen_rsk = NULL;
804 }
805 inet_csk_destroy_sock(child);
806
807 bh_unlock_sock(child); 841 bh_unlock_sock(child);
808 local_bh_enable(); 842 local_bh_enable();
809 sock_put(child); 843 sock_put(child);
810 844
811 reqsk_put(req);
812 cond_resched(); 845 cond_resched();
813 } 846 }
814 if (queue->fastopenq.rskq_rst_head) { 847 if (queue->fastopenq.rskq_rst_head) {
@@ -823,7 +856,7 @@ void inet_csk_listen_stop(struct sock *sk)
823 req = next; 856 req = next;
824 } 857 }
825 } 858 }
826 WARN_ON(sk->sk_ack_backlog); 859 WARN_ON_ONCE(sk->sk_ack_backlog);
827} 860}
828EXPORT_SYMBOL_GPL(inet_csk_listen_stop); 861EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
829 862
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1ff0923df715..9c68cf3762c4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -324,7 +324,6 @@ void tcp_req_err(struct sock *sk, u32 seq)
324 324
325 if (seq != tcp_rsk(req)->snt_isn) { 325 if (seq != tcp_rsk(req)->snt_isn) {
326 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 326 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
327 reqsk_put(req);
328 } else { 327 } else {
329 /* 328 /*
330 * Still in SYN_RECV, just remove it silently. 329 * Still in SYN_RECV, just remove it silently.
@@ -332,9 +331,10 @@ void tcp_req_err(struct sock *sk, u32 seq)
332 * created socket, and POSIX does not want network 331 * created socket, and POSIX does not want network
333 * errors returned from accept(). 332 * errors returned from accept().
334 */ 333 */
335 NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS);
336 inet_csk_reqsk_queue_drop(req->rsk_listener, req); 334 inet_csk_reqsk_queue_drop(req->rsk_listener, req);
335 NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS);
337 } 336 }
337 reqsk_put(req);
338} 338}
339EXPORT_SYMBOL(tcp_req_err); 339EXPORT_SYMBOL(tcp_req_err);
340 340
@@ -1591,7 +1591,7 @@ process:
1591 if (likely(sk->sk_state == TCP_LISTEN)) { 1591 if (likely(sk->sk_state == TCP_LISTEN)) {
1592 nsk = tcp_check_req(sk, skb, req, false); 1592 nsk = tcp_check_req(sk, skb, req, false);
1593 } else { 1593 } else {
1594 inet_csk_reqsk_queue_drop(sk, req); 1594 inet_csk_reqsk_queue_drop_and_put(sk, req);
1595 goto lookup; 1595 goto lookup;
1596 } 1596 }
1597 if (!nsk) { 1597 if (!nsk) {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7ce1c57199d1..acb06f86f372 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1386,7 +1386,7 @@ process:
1386 if (likely(sk->sk_state == TCP_LISTEN)) { 1386 if (likely(sk->sk_state == TCP_LISTEN)) {
1387 nsk = tcp_check_req(sk, skb, req, false); 1387 nsk = tcp_check_req(sk, skb, req, false);
1388 } else { 1388 } else {
1389 inet_csk_reqsk_queue_drop(sk, req); 1389 inet_csk_reqsk_queue_drop_and_put(sk, req);
1390 goto lookup; 1390 goto lookup;
1391 } 1391 }
1392 if (!nsk) { 1392 if (!nsk) {