aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/ip_output.c
diff options
context:
space:
mode:
authorWillem de Bruijn <willemb@google.com>2018-11-30 15:32:40 -0500
committerDavid S. Miller <davem@davemloft.net>2018-12-03 18:58:32 -0500
commit52900d22288e7d45846037e1db277c665bbc40db (patch)
tree8e69a5639d94cd4774bfc4645f5f0397abfb8cb8 /net/ipv4/ip_output.c
parentb5947e5d1e710c35ea281247bd27e6975250285c (diff)
udp: elide zerocopy operation in hot path
With MSG_ZEROCOPY, each skb holds a reference to a struct ubuf_info. Release of its last reference triggers a completion notification. The TCP stack in tcp_sendmsg_locked holds an extra ref independent of the skbs, because it can build, send and free skbs within its loop, possibly reaching refcount zero and freeing the ubuf_info too soon. The UDP stack currently also takes this extra ref, but does not need it as all skbs are sent after return from __ip(6)_append_data. Avoid the extra refcount_inc and refcount_dec_and_test, and generally the sock_zerocopy_put in the common path, by passing the initial reference to the first skb. This approach is taken instead of initializing the refcount to 0, as that would generate error "refcount_t: increment on 0" on the next skb_zcopy_set. Changes v3 -> v4 - Move skb_zcopy_set below the only kfree_skb that might cause a premature uarg destroy before skb_zerocopy_put_abort - Move the entire skb_shinfo assignment block, to keep that cacheline access in one place Signed-off-by: Willem de Bruijn <willemb@google.com> Acked-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/ip_output.c')
-rw-r--r--net/ipv4/ip_output.c22
1 files changed, 11 insertions, 11 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6f843aff628c..78f028bdad30 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -881,8 +881,8 @@ static int __ip_append_data(struct sock *sk,
881 int csummode = CHECKSUM_NONE; 881 int csummode = CHECKSUM_NONE;
882 struct rtable *rt = (struct rtable *)cork->dst; 882 struct rtable *rt = (struct rtable *)cork->dst;
883 unsigned int wmem_alloc_delta = 0; 883 unsigned int wmem_alloc_delta = 0;
884 bool paged, extra_uref;
884 u32 tskey = 0; 885 u32 tskey = 0;
885 bool paged;
886 886
887 skb = skb_peek_tail(queue); 887 skb = skb_peek_tail(queue);
888 888
@@ -921,12 +921,13 @@ static int __ip_append_data(struct sock *sk,
921 uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb)); 921 uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
922 if (!uarg) 922 if (!uarg)
923 return -ENOBUFS; 923 return -ENOBUFS;
924 extra_uref = true;
924 if (rt->dst.dev->features & NETIF_F_SG && 925 if (rt->dst.dev->features & NETIF_F_SG &&
925 csummode == CHECKSUM_PARTIAL) { 926 csummode == CHECKSUM_PARTIAL) {
926 paged = true; 927 paged = true;
927 } else { 928 } else {
928 uarg->zerocopy = 0; 929 uarg->zerocopy = 0;
929 skb_zcopy_set(skb, uarg); 930 skb_zcopy_set(skb, uarg, &extra_uref);
930 } 931 }
931 } 932 }
932 933
@@ -1015,13 +1016,6 @@ alloc_new_skb:
1015 skb->csum = 0; 1016 skb->csum = 0;
1016 skb_reserve(skb, hh_len); 1017 skb_reserve(skb, hh_len);
1017 1018
1018 /* only the initial fragment is time stamped */
1019 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1020 cork->tx_flags = 0;
1021 skb_shinfo(skb)->tskey = tskey;
1022 tskey = 0;
1023 skb_zcopy_set(skb, uarg);
1024
1025 /* 1019 /*
1026 * Find where to start putting bytes. 1020 * Find where to start putting bytes.
1027 */ 1021 */
@@ -1054,6 +1048,13 @@ alloc_new_skb:
1054 exthdrlen = 0; 1048 exthdrlen = 0;
1055 csummode = CHECKSUM_NONE; 1049 csummode = CHECKSUM_NONE;
1056 1050
1051 /* only the initial fragment is time stamped */
1052 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1053 cork->tx_flags = 0;
1054 skb_shinfo(skb)->tskey = tskey;
1055 tskey = 0;
1056 skb_zcopy_set(skb, uarg, &extra_uref);
1057
1057 if ((flags & MSG_CONFIRM) && !skb_prev) 1058 if ((flags & MSG_CONFIRM) && !skb_prev)
1058 skb_set_dst_pending_confirm(skb, 1); 1059 skb_set_dst_pending_confirm(skb, 1);
1059 1060
@@ -1124,13 +1125,12 @@ alloc_new_skb:
1124 1125
1125 if (wmem_alloc_delta) 1126 if (wmem_alloc_delta)
1126 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1127 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1127 sock_zerocopy_put(uarg);
1128 return 0; 1128 return 0;
1129 1129
1130error_efault: 1130error_efault:
1131 err = -EFAULT; 1131 err = -EFAULT;
1132error: 1132error:
1133 sock_zerocopy_put_abort(uarg); 1133 sock_zerocopy_put_abort(uarg, extra_uref);
1134 cork->length -= length; 1134 cork->length -= length;
1135 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); 1135 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
1136 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 1136 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);