diff options
author | David S. Miller <davem@davemloft.net> | 2015-10-16 03:52:27 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-10-16 03:52:27 -0400 |
commit | a302afe980ca6b25b2bae0b5bc816fe1dc1bb039 (patch) | |
tree | bf2c3efa6f72141e1824bcd075fcfcc250475476 | |
parent | 47ea0325337b166c1c8695119aa6e83cdc035ef5 (diff) | |
parent | ebb516af60e18258aac8e80bbe068740ef1579ed (diff) |
Merge branch 'robust_listener'
Eric Dumazet says:
====================
tcp/dccp: make our listener code more robust
This patch series addresses request sockets leaks and listener dismantle
phase. This survives a stress test with listeners being added/removed
quite randomly.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/inet_connection_sock.h | 10 | ||||
-rw-r--r-- | include/net/request_sock.h | 19 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 4 | ||||
-rw-r--r-- | net/dccp/ipv6.c | 2 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 81 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 6 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 2 |
7 files changed, 67 insertions, 57 deletions
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index fd645c49e71e..63615709839d 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h | |||
@@ -268,13 +268,8 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, | |||
268 | struct sock *newsk, | 268 | struct sock *newsk, |
269 | const struct request_sock *req); | 269 | const struct request_sock *req); |
270 | 270 | ||
271 | static inline void inet_csk_reqsk_queue_add(struct sock *sk, | 271 | void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, |
272 | struct request_sock *req, | 272 | struct sock *child); |
273 | struct sock *child) | ||
274 | { | ||
275 | reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); | ||
276 | } | ||
277 | |||
278 | void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, | 273 | void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, |
279 | unsigned long timeout); | 274 | unsigned long timeout); |
280 | 275 | ||
@@ -299,6 +294,7 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) | |||
299 | } | 294 | } |
300 | 295 | ||
301 | void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req); | 296 | void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req); |
297 | void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req); | ||
302 | 298 | ||
303 | void inet_csk_destroy_sock(struct sock *sk); | 299 | void inet_csk_destroy_sock(struct sock *sk); |
304 | void inet_csk_prepare_forced_close(struct sock *sk); | 300 | void inet_csk_prepare_forced_close(struct sock *sk); |
diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 2e73748956d5..a0dde04eb178 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h | |||
@@ -186,25 +186,6 @@ static inline bool reqsk_queue_empty(const struct request_sock_queue *queue) | |||
186 | return queue->rskq_accept_head == NULL; | 186 | return queue->rskq_accept_head == NULL; |
187 | } | 187 | } |
188 | 188 | ||
189 | static inline void reqsk_queue_add(struct request_sock_queue *queue, | ||
190 | struct request_sock *req, | ||
191 | struct sock *parent, | ||
192 | struct sock *child) | ||
193 | { | ||
194 | spin_lock(&queue->rskq_lock); | ||
195 | req->sk = child; | ||
196 | sk_acceptq_added(parent); | ||
197 | |||
198 | if (queue->rskq_accept_head == NULL) | ||
199 | queue->rskq_accept_head = req; | ||
200 | else | ||
201 | queue->rskq_accept_tail->dl_next = req; | ||
202 | |||
203 | queue->rskq_accept_tail = req; | ||
204 | req->dl_next = NULL; | ||
205 | spin_unlock(&queue->rskq_lock); | ||
206 | } | ||
207 | |||
208 | static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue, | 189 | static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue, |
209 | struct sock *parent) | 190 | struct sock *parent) |
210 | { | 191 | { |
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 0dcf1963b323..59bc180b02d8 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c | |||
@@ -208,7 +208,6 @@ void dccp_req_err(struct sock *sk, u64 seq) | |||
208 | 208 | ||
209 | if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { | 209 | if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { |
210 | NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); | 210 | NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); |
211 | reqsk_put(req); | ||
212 | } else { | 211 | } else { |
213 | /* | 212 | /* |
214 | * Still in RESPOND, just remove it silently. | 213 | * Still in RESPOND, just remove it silently. |
@@ -218,6 +217,7 @@ void dccp_req_err(struct sock *sk, u64 seq) | |||
218 | */ | 217 | */ |
219 | inet_csk_reqsk_queue_drop(req->rsk_listener, req); | 218 | inet_csk_reqsk_queue_drop(req->rsk_listener, req); |
220 | } | 219 | } |
220 | reqsk_put(req); | ||
221 | } | 221 | } |
222 | EXPORT_SYMBOL(dccp_req_err); | 222 | EXPORT_SYMBOL(dccp_req_err); |
223 | 223 | ||
@@ -828,7 +828,7 @@ lookup: | |||
828 | if (likely(sk->sk_state == DCCP_LISTEN)) { | 828 | if (likely(sk->sk_state == DCCP_LISTEN)) { |
829 | nsk = dccp_check_req(sk, skb, req); | 829 | nsk = dccp_check_req(sk, skb, req); |
830 | } else { | 830 | } else { |
831 | inet_csk_reqsk_queue_drop(sk, req); | 831 | inet_csk_reqsk_queue_drop_and_put(sk, req); |
832 | goto lookup; | 832 | goto lookup; |
833 | } | 833 | } |
834 | if (!nsk) { | 834 | if (!nsk) { |
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 68831931b1fe..d9cc731f2619 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c | |||
@@ -686,7 +686,7 @@ lookup: | |||
686 | if (likely(sk->sk_state == DCCP_LISTEN)) { | 686 | if (likely(sk->sk_state == DCCP_LISTEN)) { |
687 | nsk = dccp_check_req(sk, skb, req); | 687 | nsk = dccp_check_req(sk, skb, req); |
688 | } else { | 688 | } else { |
689 | inet_csk_reqsk_queue_drop(sk, req); | 689 | inet_csk_reqsk_queue_drop_and_put(sk, req); |
690 | goto lookup; | 690 | goto lookup; |
691 | } | 691 | } |
692 | if (!nsk) { | 692 | if (!nsk) { |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index ba9ec9a0d0ce..8430bc8ccd58 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -546,6 +546,13 @@ void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) | |||
546 | } | 546 | } |
547 | EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); | 547 | EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); |
548 | 548 | ||
549 | void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req) | ||
550 | { | ||
551 | inet_csk_reqsk_queue_drop(sk, req); | ||
552 | reqsk_put(req); | ||
553 | } | ||
554 | EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put); | ||
555 | |||
549 | static void reqsk_timer_handler(unsigned long data) | 556 | static void reqsk_timer_handler(unsigned long data) |
550 | { | 557 | { |
551 | struct request_sock *req = (struct request_sock *)data; | 558 | struct request_sock *req = (struct request_sock *)data; |
@@ -608,8 +615,7 @@ static void reqsk_timer_handler(unsigned long data) | |||
608 | return; | 615 | return; |
609 | } | 616 | } |
610 | drop: | 617 | drop: |
611 | inet_csk_reqsk_queue_drop(sk_listener, req); | 618 | inet_csk_reqsk_queue_drop_and_put(sk_listener, req); |
612 | reqsk_put(req); | ||
613 | } | 619 | } |
614 | 620 | ||
615 | static void reqsk_queue_hash_req(struct request_sock *req, | 621 | static void reqsk_queue_hash_req(struct request_sock *req, |
@@ -758,6 +764,53 @@ int inet_csk_listen_start(struct sock *sk, int backlog) | |||
758 | } | 764 | } |
759 | EXPORT_SYMBOL_GPL(inet_csk_listen_start); | 765 | EXPORT_SYMBOL_GPL(inet_csk_listen_start); |
760 | 766 | ||
767 | static void inet_child_forget(struct sock *sk, struct request_sock *req, | ||
768 | struct sock *child) | ||
769 | { | ||
770 | sk->sk_prot->disconnect(child, O_NONBLOCK); | ||
771 | |||
772 | sock_orphan(child); | ||
773 | |||
774 | percpu_counter_inc(sk->sk_prot->orphan_count); | ||
775 | |||
776 | if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { | ||
777 | BUG_ON(tcp_sk(child)->fastopen_rsk != req); | ||
778 | BUG_ON(sk != req->rsk_listener); | ||
779 | |||
780 | /* Paranoid, to prevent race condition if | ||
781 | * an inbound pkt destined for child is | ||
782 | * blocked by sock lock in tcp_v4_rcv(). | ||
783 | * Also to satisfy an assertion in | ||
784 | * tcp_v4_destroy_sock(). | ||
785 | */ | ||
786 | tcp_sk(child)->fastopen_rsk = NULL; | ||
787 | } | ||
788 | inet_csk_destroy_sock(child); | ||
789 | reqsk_put(req); | ||
790 | } | ||
791 | |||
792 | void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, | ||
793 | struct sock *child) | ||
794 | { | ||
795 | struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; | ||
796 | |||
797 | spin_lock(&queue->rskq_lock); | ||
798 | if (unlikely(sk->sk_state != TCP_LISTEN)) { | ||
799 | inet_child_forget(sk, req, child); | ||
800 | } else { | ||
801 | req->sk = child; | ||
802 | req->dl_next = NULL; | ||
803 | if (queue->rskq_accept_head == NULL) | ||
804 | queue->rskq_accept_head = req; | ||
805 | else | ||
806 | queue->rskq_accept_tail->dl_next = req; | ||
807 | queue->rskq_accept_tail = req; | ||
808 | sk_acceptq_added(sk); | ||
809 | } | ||
810 | spin_unlock(&queue->rskq_lock); | ||
811 | } | ||
812 | EXPORT_SYMBOL(inet_csk_reqsk_queue_add); | ||
813 | |||
761 | /* | 814 | /* |
762 | * This routine closes sockets which have been at least partially | 815 | * This routine closes sockets which have been at least partially |
763 | * opened, but not yet accepted. | 816 | * opened, but not yet accepted. |
@@ -784,31 +837,11 @@ void inet_csk_listen_stop(struct sock *sk) | |||
784 | WARN_ON(sock_owned_by_user(child)); | 837 | WARN_ON(sock_owned_by_user(child)); |
785 | sock_hold(child); | 838 | sock_hold(child); |
786 | 839 | ||
787 | sk->sk_prot->disconnect(child, O_NONBLOCK); | 840 | inet_child_forget(sk, req, child); |
788 | |||
789 | sock_orphan(child); | ||
790 | |||
791 | percpu_counter_inc(sk->sk_prot->orphan_count); | ||
792 | |||
793 | if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { | ||
794 | BUG_ON(tcp_sk(child)->fastopen_rsk != req); | ||
795 | BUG_ON(sk != req->rsk_listener); | ||
796 | |||
797 | /* Paranoid, to prevent race condition if | ||
798 | * an inbound pkt destined for child is | ||
799 | * blocked by sock lock in tcp_v4_rcv(). | ||
800 | * Also to satisfy an assertion in | ||
801 | * tcp_v4_destroy_sock(). | ||
802 | */ | ||
803 | tcp_sk(child)->fastopen_rsk = NULL; | ||
804 | } | ||
805 | inet_csk_destroy_sock(child); | ||
806 | |||
807 | bh_unlock_sock(child); | 841 | bh_unlock_sock(child); |
808 | local_bh_enable(); | 842 | local_bh_enable(); |
809 | sock_put(child); | 843 | sock_put(child); |
810 | 844 | ||
811 | reqsk_put(req); | ||
812 | cond_resched(); | 845 | cond_resched(); |
813 | } | 846 | } |
814 | if (queue->fastopenq.rskq_rst_head) { | 847 | if (queue->fastopenq.rskq_rst_head) { |
@@ -823,7 +856,7 @@ void inet_csk_listen_stop(struct sock *sk) | |||
823 | req = next; | 856 | req = next; |
824 | } | 857 | } |
825 | } | 858 | } |
826 | WARN_ON(sk->sk_ack_backlog); | 859 | WARN_ON_ONCE(sk->sk_ack_backlog); |
827 | } | 860 | } |
828 | EXPORT_SYMBOL_GPL(inet_csk_listen_stop); | 861 | EXPORT_SYMBOL_GPL(inet_csk_listen_stop); |
829 | 862 | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1ff0923df715..9c68cf3762c4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -324,7 +324,6 @@ void tcp_req_err(struct sock *sk, u32 seq) | |||
324 | 324 | ||
325 | if (seq != tcp_rsk(req)->snt_isn) { | 325 | if (seq != tcp_rsk(req)->snt_isn) { |
326 | NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); | 326 | NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); |
327 | reqsk_put(req); | ||
328 | } else { | 327 | } else { |
329 | /* | 328 | /* |
330 | * Still in SYN_RECV, just remove it silently. | 329 | * Still in SYN_RECV, just remove it silently. |
@@ -332,9 +331,10 @@ void tcp_req_err(struct sock *sk, u32 seq) | |||
332 | * created socket, and POSIX does not want network | 331 | * created socket, and POSIX does not want network |
333 | * errors returned from accept(). | 332 | * errors returned from accept(). |
334 | */ | 333 | */ |
335 | NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS); | ||
336 | inet_csk_reqsk_queue_drop(req->rsk_listener, req); | 334 | inet_csk_reqsk_queue_drop(req->rsk_listener, req); |
335 | NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS); | ||
337 | } | 336 | } |
337 | reqsk_put(req); | ||
338 | } | 338 | } |
339 | EXPORT_SYMBOL(tcp_req_err); | 339 | EXPORT_SYMBOL(tcp_req_err); |
340 | 340 | ||
@@ -1591,7 +1591,7 @@ process: | |||
1591 | if (likely(sk->sk_state == TCP_LISTEN)) { | 1591 | if (likely(sk->sk_state == TCP_LISTEN)) { |
1592 | nsk = tcp_check_req(sk, skb, req, false); | 1592 | nsk = tcp_check_req(sk, skb, req, false); |
1593 | } else { | 1593 | } else { |
1594 | inet_csk_reqsk_queue_drop(sk, req); | 1594 | inet_csk_reqsk_queue_drop_and_put(sk, req); |
1595 | goto lookup; | 1595 | goto lookup; |
1596 | } | 1596 | } |
1597 | if (!nsk) { | 1597 | if (!nsk) { |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7ce1c57199d1..acb06f86f372 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -1386,7 +1386,7 @@ process: | |||
1386 | if (likely(sk->sk_state == TCP_LISTEN)) { | 1386 | if (likely(sk->sk_state == TCP_LISTEN)) { |
1387 | nsk = tcp_check_req(sk, skb, req, false); | 1387 | nsk = tcp_check_req(sk, skb, req, false); |
1388 | } else { | 1388 | } else { |
1389 | inet_csk_reqsk_queue_drop(sk, req); | 1389 | inet_csk_reqsk_queue_drop_and_put(sk, req); |
1390 | goto lookup; | 1390 | goto lookup; |
1391 | } | 1391 | } |
1392 | if (!nsk) { | 1392 | if (!nsk) { |