diff options
-rw-r--r-- | include/linux/skbuff.h | 1 | ||||
-rw-r--r-- | net/core/skbuff.c | 6 | ||||
-rw-r--r-- | net/core/sock.c | 5 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 23 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 23 |
5 files changed, 55 insertions, 3 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 73902acf2b71..04f52e719571 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h | |||
@@ -485,6 +485,7 @@ void sock_zerocopy_put_abort(struct ubuf_info *uarg); | |||
485 | 485 | ||
486 | void sock_zerocopy_callback(struct ubuf_info *uarg, bool success); | 486 | void sock_zerocopy_callback(struct ubuf_info *uarg, bool success); |
487 | 487 | ||
488 | int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len); | ||
488 | int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, | 489 | int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, |
489 | struct msghdr *msg, int len, | 490 | struct msghdr *msg, int len, |
490 | struct ubuf_info *uarg); | 491 | struct ubuf_info *uarg); |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3c814565ed7c..1350901c5cb8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -1105,6 +1105,12 @@ EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort); | |||
1105 | extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, | 1105 | extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, |
1106 | struct iov_iter *from, size_t length); | 1106 | struct iov_iter *from, size_t length); |
1107 | 1107 | ||
1108 | int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len) | ||
1109 | { | ||
1110 | return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len); | ||
1111 | } | ||
1112 | EXPORT_SYMBOL_GPL(skb_zerocopy_iter_dgram); | ||
1113 | |||
1108 | int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, | 1114 | int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, |
1109 | struct msghdr *msg, int len, | 1115 | struct msghdr *msg, int len, |
1110 | struct ubuf_info *uarg) | 1116 | struct ubuf_info *uarg) |
diff --git a/net/core/sock.c b/net/core/sock.c index 6d7e189e3cd9..f5bb89785e47 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -1018,7 +1018,10 @@ set_rcvbuf: | |||
1018 | 1018 | ||
1019 | case SO_ZEROCOPY: | 1019 | case SO_ZEROCOPY: |
1020 | if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) { | 1020 | if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) { |
1021 | if (sk->sk_protocol != IPPROTO_TCP) | 1021 | if (!((sk->sk_type == SOCK_STREAM && |
1022 | sk->sk_protocol == IPPROTO_TCP) || | ||
1023 | (sk->sk_type == SOCK_DGRAM && | ||
1024 | sk->sk_protocol == IPPROTO_UDP))) | ||
1022 | ret = -ENOTSUPP; | 1025 | ret = -ENOTSUPP; |
1023 | } else if (sk->sk_family != PF_RDS) { | 1026 | } else if (sk->sk_family != PF_RDS) { |
1024 | ret = -ENOTSUPP; | 1027 | ret = -ENOTSUPP; |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 5dbec21856f4..6f843aff628c 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -867,6 +867,7 @@ static int __ip_append_data(struct sock *sk, | |||
867 | unsigned int flags) | 867 | unsigned int flags) |
868 | { | 868 | { |
869 | struct inet_sock *inet = inet_sk(sk); | 869 | struct inet_sock *inet = inet_sk(sk); |
870 | struct ubuf_info *uarg = NULL; | ||
870 | struct sk_buff *skb; | 871 | struct sk_buff *skb; |
871 | 872 | ||
872 | struct ip_options *opt = cork->opt; | 873 | struct ip_options *opt = cork->opt; |
@@ -916,6 +917,19 @@ static int __ip_append_data(struct sock *sk, | |||
916 | (!exthdrlen || (rt->dst.dev->features & NETIF_F_HW_ESP_TX_CSUM))) | 917 | (!exthdrlen || (rt->dst.dev->features & NETIF_F_HW_ESP_TX_CSUM))) |
917 | csummode = CHECKSUM_PARTIAL; | 918 | csummode = CHECKSUM_PARTIAL; |
918 | 919 | ||
920 | if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) { | ||
921 | uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb)); | ||
922 | if (!uarg) | ||
923 | return -ENOBUFS; | ||
924 | if (rt->dst.dev->features & NETIF_F_SG && | ||
925 | csummode == CHECKSUM_PARTIAL) { | ||
926 | paged = true; | ||
927 | } else { | ||
928 | uarg->zerocopy = 0; | ||
929 | skb_zcopy_set(skb, uarg); | ||
930 | } | ||
931 | } | ||
932 | |||
919 | cork->length += length; | 933 | cork->length += length; |
920 | 934 | ||
921 | /* So, what's going on in the loop below? | 935 | /* So, what's going on in the loop below? |
@@ -1006,6 +1020,7 @@ alloc_new_skb: | |||
1006 | cork->tx_flags = 0; | 1020 | cork->tx_flags = 0; |
1007 | skb_shinfo(skb)->tskey = tskey; | 1021 | skb_shinfo(skb)->tskey = tskey; |
1008 | tskey = 0; | 1022 | tskey = 0; |
1023 | skb_zcopy_set(skb, uarg); | ||
1009 | 1024 | ||
1010 | /* | 1025 | /* |
1011 | * Find where to start putting bytes. | 1026 | * Find where to start putting bytes. |
@@ -1068,7 +1083,7 @@ alloc_new_skb: | |||
1068 | err = -EFAULT; | 1083 | err = -EFAULT; |
1069 | goto error; | 1084 | goto error; |
1070 | } | 1085 | } |
1071 | } else { | 1086 | } else if (!uarg || !uarg->zerocopy) { |
1072 | int i = skb_shinfo(skb)->nr_frags; | 1087 | int i = skb_shinfo(skb)->nr_frags; |
1073 | 1088 | ||
1074 | err = -ENOMEM; | 1089 | err = -ENOMEM; |
@@ -1098,6 +1113,10 @@ alloc_new_skb: | |||
1098 | skb->data_len += copy; | 1113 | skb->data_len += copy; |
1099 | skb->truesize += copy; | 1114 | skb->truesize += copy; |
1100 | wmem_alloc_delta += copy; | 1115 | wmem_alloc_delta += copy; |
1116 | } else { | ||
1117 | err = skb_zerocopy_iter_dgram(skb, from, copy); | ||
1118 | if (err < 0) | ||
1119 | goto error; | ||
1101 | } | 1120 | } |
1102 | offset += copy; | 1121 | offset += copy; |
1103 | length -= copy; | 1122 | length -= copy; |
@@ -1105,11 +1124,13 @@ alloc_new_skb: | |||
1105 | 1124 | ||
1106 | if (wmem_alloc_delta) | 1125 | if (wmem_alloc_delta) |
1107 | refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); | 1126 | refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); |
1127 | sock_zerocopy_put(uarg); | ||
1108 | return 0; | 1128 | return 0; |
1109 | 1129 | ||
1110 | error_efault: | 1130 | error_efault: |
1111 | err = -EFAULT; | 1131 | err = -EFAULT; |
1112 | error: | 1132 | error: |
1133 | sock_zerocopy_put_abort(uarg); | ||
1113 | cork->length -= length; | 1134 | cork->length -= length; |
1114 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); | 1135 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); |
1115 | refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); | 1136 | refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); |
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 827a3f5ff3bb..7df04d20a91f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c | |||
@@ -1245,6 +1245,7 @@ static int __ip6_append_data(struct sock *sk, | |||
1245 | { | 1245 | { |
1246 | struct sk_buff *skb, *skb_prev = NULL; | 1246 | struct sk_buff *skb, *skb_prev = NULL; |
1247 | unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; | 1247 | unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; |
1248 | struct ubuf_info *uarg = NULL; | ||
1248 | int exthdrlen = 0; | 1249 | int exthdrlen = 0; |
1249 | int dst_exthdrlen = 0; | 1250 | int dst_exthdrlen = 0; |
1250 | int hh_len; | 1251 | int hh_len; |
@@ -1322,6 +1323,19 @@ emsgsize: | |||
1322 | rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) | 1323 | rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) |
1323 | csummode = CHECKSUM_PARTIAL; | 1324 | csummode = CHECKSUM_PARTIAL; |
1324 | 1325 | ||
1326 | if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) { | ||
1327 | uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb)); | ||
1328 | if (!uarg) | ||
1329 | return -ENOBUFS; | ||
1330 | if (rt->dst.dev->features & NETIF_F_SG && | ||
1331 | csummode == CHECKSUM_PARTIAL) { | ||
1332 | paged = true; | ||
1333 | } else { | ||
1334 | uarg->zerocopy = 0; | ||
1335 | skb_zcopy_set(skb, uarg); | ||
1336 | } | ||
1337 | } | ||
1338 | |||
1325 | /* | 1339 | /* |
1326 | * Let's try using as much space as possible. | 1340 | * Let's try using as much space as possible. |
1327 | * Use MTU if total length of the message fits into the MTU. | 1341 | * Use MTU if total length of the message fits into the MTU. |
@@ -1445,6 +1459,7 @@ alloc_new_skb: | |||
1445 | cork->tx_flags = 0; | 1459 | cork->tx_flags = 0; |
1446 | skb_shinfo(skb)->tskey = tskey; | 1460 | skb_shinfo(skb)->tskey = tskey; |
1447 | tskey = 0; | 1461 | tskey = 0; |
1462 | skb_zcopy_set(skb, uarg); | ||
1448 | 1463 | ||
1449 | /* | 1464 | /* |
1450 | * Find where to start putting bytes | 1465 | * Find where to start putting bytes |
@@ -1506,7 +1521,7 @@ alloc_new_skb: | |||
1506 | err = -EFAULT; | 1521 | err = -EFAULT; |
1507 | goto error; | 1522 | goto error; |
1508 | } | 1523 | } |
1509 | } else { | 1524 | } else if (!uarg || !uarg->zerocopy) { |
1510 | int i = skb_shinfo(skb)->nr_frags; | 1525 | int i = skb_shinfo(skb)->nr_frags; |
1511 | 1526 | ||
1512 | err = -ENOMEM; | 1527 | err = -ENOMEM; |
@@ -1536,6 +1551,10 @@ alloc_new_skb: | |||
1536 | skb->data_len += copy; | 1551 | skb->data_len += copy; |
1537 | skb->truesize += copy; | 1552 | skb->truesize += copy; |
1538 | wmem_alloc_delta += copy; | 1553 | wmem_alloc_delta += copy; |
1554 | } else { | ||
1555 | err = skb_zerocopy_iter_dgram(skb, from, copy); | ||
1556 | if (err < 0) | ||
1557 | goto error; | ||
1539 | } | 1558 | } |
1540 | offset += copy; | 1559 | offset += copy; |
1541 | length -= copy; | 1560 | length -= copy; |
@@ -1543,11 +1562,13 @@ alloc_new_skb: | |||
1543 | 1562 | ||
1544 | if (wmem_alloc_delta) | 1563 | if (wmem_alloc_delta) |
1545 | refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); | 1564 | refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); |
1565 | sock_zerocopy_put(uarg); | ||
1546 | return 0; | 1566 | return 0; |
1547 | 1567 | ||
1548 | error_efault: | 1568 | error_efault: |
1549 | err = -EFAULT; | 1569 | err = -EFAULT; |
1550 | error: | 1570 | error: |
1571 | sock_zerocopy_put_abort(uarg); | ||
1551 | cork->length -= length; | 1572 | cork->length -= length; |
1552 | IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); | 1573 | IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); |
1553 | refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); | 1574 | refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); |