diff options
author | Eric Dumazet <edumazet@google.com> | 2016-05-02 13:56:27 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-05-03 16:02:36 -0400 |
commit | 1d2077ac0165c0d173a2255e37cf4dc5033d92c7 (patch) | |
tree | fc18bebb7e5ffe2109fd61c56230c5f47d5f313b | |
parent | e34b1638d02bef8c3278af30ee73077c5babc082 (diff) |
net: add __sock_wfree() helper
Hosts sending lot of ACK packets exhibit high sock_wfree() cost
because of cache line miss to test SOCK_USE_WRITE_QUEUE
We could move this flag close to sk_wmem_alloc but it is better
to perform the atomic_sub_and_test() on a clean cache line,
as it avoid one extra bus transaction.
skb_orphan_partial() can also have a fast track for packets that either
are TCP acks, or already went through another skb_orphan_partial()
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/sock.h | 1 | ||||
-rw-r--r-- | net/core/sock.c | 24 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 2 |
3 files changed, 26 insertions, 1 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index 1dbb1f9f7c1b..45f5b492c658 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -1445,6 +1445,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); | |||
1445 | 1445 | ||
1446 | struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, | 1446 | struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, |
1447 | gfp_t priority); | 1447 | gfp_t priority); |
1448 | void __sock_wfree(struct sk_buff *skb); | ||
1448 | void sock_wfree(struct sk_buff *skb); | 1449 | void sock_wfree(struct sk_buff *skb); |
1449 | void skb_orphan_partial(struct sk_buff *skb); | 1450 | void skb_orphan_partial(struct sk_buff *skb); |
1450 | void sock_rfree(struct sk_buff *skb); | 1451 | void sock_rfree(struct sk_buff *skb); |
diff --git a/net/core/sock.c b/net/core/sock.c index f615e9391170..08bf97eceeb3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -1655,6 +1655,17 @@ void sock_wfree(struct sk_buff *skb) | |||
1655 | } | 1655 | } |
1656 | EXPORT_SYMBOL(sock_wfree); | 1656 | EXPORT_SYMBOL(sock_wfree); |
1657 | 1657 | ||
1658 | /* This variant of sock_wfree() is used by TCP, | ||
1659 | * since it sets SOCK_USE_WRITE_QUEUE. | ||
1660 | */ | ||
1661 | void __sock_wfree(struct sk_buff *skb) | ||
1662 | { | ||
1663 | struct sock *sk = skb->sk; | ||
1664 | |||
1665 | if (atomic_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)) | ||
1666 | __sk_free(sk); | ||
1667 | } | ||
1668 | |||
1658 | void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) | 1669 | void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) |
1659 | { | 1670 | { |
1660 | skb_orphan(skb); | 1671 | skb_orphan(skb); |
@@ -1677,8 +1688,21 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) | |||
1677 | } | 1688 | } |
1678 | EXPORT_SYMBOL(skb_set_owner_w); | 1689 | EXPORT_SYMBOL(skb_set_owner_w); |
1679 | 1690 | ||
1691 | /* This helper is used by netem, as it can hold packets in its | ||
1692 | * delay queue. We want to allow the owner socket to send more | ||
1693 | * packets, as if they were already TX completed by a typical driver. | ||
1694 | * But we also want to keep skb->sk set because some packet schedulers | ||
1695 | * rely on it (sch_fq for example). So we set skb->truesize to a small | ||
1696 | * amount (1) and decrease sk_wmem_alloc accordingly. | ||
1697 | */ | ||
1680 | void skb_orphan_partial(struct sk_buff *skb) | 1698 | void skb_orphan_partial(struct sk_buff *skb) |
1681 | { | 1699 | { |
1700 | /* If this skb is a TCP pure ACK or already went here, | ||
1701 | * we have nothing to do. 2 is already a very small truesize. | ||
1702 | */ | ||
1703 | if (skb->truesize <= 2) | ||
1704 | return; | ||
1705 | |||
1682 | /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc, | 1706 | /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc, |
1683 | * so we do not completely orphan skb, but transfert all | 1707 | * so we do not completely orphan skb, but transfert all |
1684 | * accounted bytes but one, to avoid unexpected reorders. | 1708 | * accounted bytes but one, to avoid unexpected reorders. |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 25d527922b18..8daefd8b1b49 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -949,7 +949,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
949 | 949 | ||
950 | skb_orphan(skb); | 950 | skb_orphan(skb); |
951 | skb->sk = sk; | 951 | skb->sk = sk; |
952 | skb->destructor = skb_is_tcp_pure_ack(skb) ? sock_wfree : tcp_wfree; | 952 | skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree; |
953 | skb_set_hash_from_sk(skb, sk); | 953 | skb_set_hash_from_sk(skb, sk); |
954 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); | 954 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); |
955 | 955 | ||