diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 36 |
1 files changed, 26 insertions, 10 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8d4eac793700..3af21296d967 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -839,26 +839,38 @@ void tcp_wfree(struct sk_buff *skb) | |||
839 | { | 839 | { |
840 | struct sock *sk = skb->sk; | 840 | struct sock *sk = skb->sk; |
841 | struct tcp_sock *tp = tcp_sk(sk); | 841 | struct tcp_sock *tp = tcp_sk(sk); |
842 | int wmem; | ||
843 | |||
844 | /* Keep one reference on sk_wmem_alloc. | ||
845 | * Will be released by sk_free() from here or tcp_tasklet_func() | ||
846 | */ | ||
847 | wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc); | ||
848 | |||
849 | /* If this softirq is serviced by ksoftirqd, we are likely under stress. | ||
850 | * Wait until our queues (qdisc + devices) are drained. | ||
851 | * This gives : | ||
852 | * - less callbacks to tcp_write_xmit(), reducing stress (batches) | ||
853 | * - chance for incoming ACK (processed by another cpu maybe) | ||
854 | * to migrate this flow (skb->ooo_okay will be eventually set) | ||
855 | */ | ||
856 | if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) | ||
857 | goto out; | ||
842 | 858 | ||
843 | if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) && | 859 | if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) && |
844 | !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) { | 860 | !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) { |
845 | unsigned long flags; | 861 | unsigned long flags; |
846 | struct tsq_tasklet *tsq; | 862 | struct tsq_tasklet *tsq; |
847 | 863 | ||
848 | /* Keep a ref on socket. | ||
849 | * This last ref will be released in tcp_tasklet_func() | ||
850 | */ | ||
851 | atomic_sub(skb->truesize - 1, &sk->sk_wmem_alloc); | ||
852 | |||
853 | /* queue this socket to tasklet queue */ | 864 | /* queue this socket to tasklet queue */ |
854 | local_irq_save(flags); | 865 | local_irq_save(flags); |
855 | tsq = &__get_cpu_var(tsq_tasklet); | 866 | tsq = this_cpu_ptr(&tsq_tasklet); |
856 | list_add(&tp->tsq_node, &tsq->head); | 867 | list_add(&tp->tsq_node, &tsq->head); |
857 | tasklet_schedule(&tsq->tasklet); | 868 | tasklet_schedule(&tsq->tasklet); |
858 | local_irq_restore(flags); | 869 | local_irq_restore(flags); |
859 | } else { | 870 | return; |
860 | sock_wfree(skb); | ||
861 | } | 871 | } |
872 | out: | ||
873 | sk_free(sk); | ||
862 | } | 874 | } |
863 | 875 | ||
864 | /* This routine actually transmits TCP packets queued in by | 876 | /* This routine actually transmits TCP packets queued in by |
@@ -914,9 +926,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
914 | tcp_ca_event(sk, CA_EVENT_TX_START); | 926 | tcp_ca_event(sk, CA_EVENT_TX_START); |
915 | 927 | ||
916 | /* if no packet is in qdisc/device queue, then allow XPS to select | 928 | /* if no packet is in qdisc/device queue, then allow XPS to select |
917 | * another queue. | 929 | * another queue. We can be called from tcp_tsq_handler() |
930 | * which holds one reference to sk_wmem_alloc. | ||
931 | * | ||
932 | * TODO: Ideally, in-flight pure ACK packets should not matter here. | ||
933 | * One way to get this would be to set skb->truesize = 2 on them. | ||
918 | */ | 934 | */ |
919 | skb->ooo_okay = sk_wmem_alloc_get(sk) == 0; | 935 | skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1); |
920 | 936 | ||
921 | skb_push(skb, tcp_header_size); | 937 | skb_push(skb, tcp_header_size); |
922 | skb_reset_transport_header(skb); | 938 | skb_reset_transport_header(skb); |