aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2015-04-23 13:42:39 -0400
committerDavid S. Miller <davem@davemloft.net>2015-04-24 11:06:48 -0400
commit845704a535e9b3c76448f52af1b70e4422ea03fd (patch)
tree3a14cdcb01442f0f4d61a3161a9ac508bbdb3732 /net/ipv4
parente4b6c30375e83b92d9c3e9b9d853417e8cc74006 (diff)
tcp: avoid looping in tcp_send_fin()
Presence of an unbound loop in tcp_send_fin() had always been hard to explain when analyzing crash dumps involving gigantic dying processes with millions of sockets. Lets try a different strategy : In case of memory pressure, try to add the FIN flag to last packet in write queue, even if packet was already sent. TCP stack will be able to deliver this FIN after a timeout event. Note that this FIN being delivered by a retransmit, it also carries a Push flag given our current implementation. By checking sk_under_memory_pressure(), we anticipate that cooking many FIN packets might deplete tcp memory. In the case we could not allocate a packet, even with __GFP_WAIT allocation, then not sending a FIN seems quite reasonable if it allows to get rid of this socket, free memory, and not block the process from eventually doing other useful work. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/tcp_output.c50
1 files changed, 29 insertions, 21 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2ade67b7cdb0..a369e8a70b2c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2814,7 +2814,8 @@ begin_fwd:
2814 2814
2815/* We allow to exceed memory limits for FIN packets to expedite 2815/* We allow to exceed memory limits for FIN packets to expedite
2816 * connection tear down and (memory) recovery. 2816 * connection tear down and (memory) recovery.
2817 * Otherwise tcp_send_fin() could loop forever. 2817 * Otherwise tcp_send_fin() could be tempted to either delay FIN
2818 * or even be forced to close flow without any FIN.
2818 */ 2819 */
2819static void sk_forced_wmem_schedule(struct sock *sk, int size) 2820static void sk_forced_wmem_schedule(struct sock *sk, int size)
2820{ 2821{
@@ -2827,33 +2828,40 @@ static void sk_forced_wmem_schedule(struct sock *sk, int size)
2827 sk_memory_allocated_add(sk, amt, &status); 2828 sk_memory_allocated_add(sk, amt, &status);
2828} 2829}
2829 2830
2830/* Send a fin. The caller locks the socket for us. This cannot be 2831/* Send a FIN. The caller locks the socket for us.
2831 * allowed to fail queueing a FIN frame under any circumstances. 2832 * We should try to send a FIN packet really hard, but eventually give up.
2832 */ 2833 */
2833void tcp_send_fin(struct sock *sk) 2834void tcp_send_fin(struct sock *sk)
2834{ 2835{
2836 struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk);
2835 struct tcp_sock *tp = tcp_sk(sk); 2837 struct tcp_sock *tp = tcp_sk(sk);
2836 struct sk_buff *skb = tcp_write_queue_tail(sk);
2837 int mss_now;
2838 2838
2839 /* Optimization, tack on the FIN if we have a queue of 2839 /* Optimization, tack on the FIN if we have one skb in write queue and
2840 * unsent frames. But be careful about outgoing SACKS 2840 * this skb was not yet sent, or we are under memory pressure.
2841 * and IP options. 2841 * Note: in the latter case, FIN packet will be sent after a timeout,
2842 * as TCP stack thinks it has already been transmitted.
2842 */ 2843 */
2843 mss_now = tcp_current_mss(sk); 2844 if (tskb && (tcp_send_head(sk) || sk_under_memory_pressure(sk))) {
2844 2845coalesce:
2845 if (tcp_send_head(sk)) { 2846 TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
2846 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; 2847 TCP_SKB_CB(tskb)->end_seq++;
2847 TCP_SKB_CB(skb)->end_seq++;
2848 tp->write_seq++; 2848 tp->write_seq++;
2849 if (!tcp_send_head(sk)) {
2850 /* This means tskb was already sent.
2851 * Pretend we included the FIN on previous transmit.
2852 * We need to set tp->snd_nxt to the value it would have
2853 * if FIN had been sent. This is because retransmit path
2854 * does not change tp->snd_nxt.
2855 */
2856 tp->snd_nxt++;
2857 return;
2858 }
2849 } else { 2859 } else {
2850 /* Socket is locked, keep trying until memory is available. */ 2860 skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation);
2851 for (;;) { 2861 if (unlikely(!skb)) {
2852 skb = alloc_skb_fclone(MAX_TCP_HEADER, 2862 if (tskb)
2853 sk->sk_allocation); 2863 goto coalesce;
2854 if (skb) 2864 return;
2855 break;
2856 yield();
2857 } 2865 }
2858 skb_reserve(skb, MAX_TCP_HEADER); 2866 skb_reserve(skb, MAX_TCP_HEADER);
2859 sk_forced_wmem_schedule(sk, skb->truesize); 2867 sk_forced_wmem_schedule(sk, skb->truesize);
@@ -2862,7 +2870,7 @@ void tcp_send_fin(struct sock *sk)
2862 TCPHDR_ACK | TCPHDR_FIN); 2870 TCPHDR_ACK | TCPHDR_FIN);
2863 tcp_queue_skb(sk, skb); 2871 tcp_queue_skb(sk, skb);
2864 } 2872 }
2865 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); 2873 __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF);
2866} 2874}
2867 2875
2868/* We get here when a process closes a file descriptor (either due to 2876/* We get here when a process closes a file descriptor (either due to