aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/tun.c211
-rw-r--r--drivers/vhost/net.c13
-rw-r--r--include/linux/if_tun.h17
3 files changed, 208 insertions, 33 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index b8e39c6d2a5f..2fba3be5719e 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -241,6 +241,24 @@ struct tun_struct {
241 struct tun_steering_prog __rcu *steering_prog; 241 struct tun_steering_prog __rcu *steering_prog;
242}; 242};
243 243
244bool tun_is_xdp_buff(void *ptr)
245{
246 return (unsigned long)ptr & TUN_XDP_FLAG;
247}
248EXPORT_SYMBOL(tun_is_xdp_buff);
249
250void *tun_xdp_to_ptr(void *ptr)
251{
252 return (void *)((unsigned long)ptr | TUN_XDP_FLAG);
253}
254EXPORT_SYMBOL(tun_xdp_to_ptr);
255
256void *tun_ptr_to_xdp(void *ptr)
257{
258 return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG);
259}
260EXPORT_SYMBOL(tun_ptr_to_xdp);
261
244static int tun_napi_receive(struct napi_struct *napi, int budget) 262static int tun_napi_receive(struct napi_struct *napi, int budget)
245{ 263{
246 struct tun_file *tfile = container_of(napi, struct tun_file, napi); 264 struct tun_file *tfile = container_of(napi, struct tun_file, napi);
@@ -631,12 +649,25 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile)
631 return tun; 649 return tun;
632} 650}
633 651
652static void tun_ptr_free(void *ptr)
653{
654 if (!ptr)
655 return;
656 if (tun_is_xdp_buff(ptr)) {
657 struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
658
659 put_page(virt_to_head_page(xdp->data));
660 } else {
661 __skb_array_destroy_skb(ptr);
662 }
663}
664
634static void tun_queue_purge(struct tun_file *tfile) 665static void tun_queue_purge(struct tun_file *tfile)
635{ 666{
636 struct sk_buff *skb; 667 void *ptr;
637 668
638 while ((skb = ptr_ring_consume(&tfile->tx_ring)) != NULL) 669 while ((ptr = ptr_ring_consume(&tfile->tx_ring)) != NULL)
639 kfree_skb(skb); 670 tun_ptr_free(ptr);
640 671
641 skb_queue_purge(&tfile->sk.sk_write_queue); 672 skb_queue_purge(&tfile->sk.sk_write_queue);
642 skb_queue_purge(&tfile->sk.sk_error_queue); 673 skb_queue_purge(&tfile->sk.sk_error_queue);
@@ -689,8 +720,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
689 unregister_netdevice(tun->dev); 720 unregister_netdevice(tun->dev);
690 } 721 }
691 if (tun) { 722 if (tun) {
692 ptr_ring_cleanup(&tfile->tx_ring, 723 ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
693 __skb_array_destroy_skb);
694 xdp_rxq_info_unreg(&tfile->xdp_rxq); 724 xdp_rxq_info_unreg(&tfile->xdp_rxq);
695 } 725 }
696 sock_put(&tfile->sk); 726 sock_put(&tfile->sk);
@@ -1222,6 +1252,67 @@ static const struct net_device_ops tun_netdev_ops = {
1222 .ndo_get_stats64 = tun_net_get_stats64, 1252 .ndo_get_stats64 = tun_net_get_stats64,
1223}; 1253};
1224 1254
1255static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
1256{
1257 struct tun_struct *tun = netdev_priv(dev);
1258 struct xdp_buff *buff = xdp->data_hard_start;
1259 int headroom = xdp->data - xdp->data_hard_start;
1260 struct tun_file *tfile;
1261 u32 numqueues;
1262 int ret = 0;
1263
1264 /* Assure headroom is available and buff is properly aligned */
1265 if (unlikely(headroom < sizeof(*xdp) || tun_is_xdp_buff(xdp)))
1266 return -ENOSPC;
1267
1268 *buff = *xdp;
1269
1270 rcu_read_lock();
1271
1272 numqueues = READ_ONCE(tun->numqueues);
1273 if (!numqueues) {
1274 ret = -ENOSPC;
1275 goto out;
1276 }
1277
1278 tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
1279 numqueues]);
1280 /* Encode the XDP flag into lowest bit for consumer to differ
1281 * XDP buffer from sk_buff.
1282 */
1283 if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(buff))) {
1284 this_cpu_inc(tun->pcpu_stats->tx_dropped);
1285 ret = -ENOSPC;
1286 }
1287
1288out:
1289 rcu_read_unlock();
1290 return ret;
1291}
1292
1293static void tun_xdp_flush(struct net_device *dev)
1294{
1295 struct tun_struct *tun = netdev_priv(dev);
1296 struct tun_file *tfile;
1297 u32 numqueues;
1298
1299 rcu_read_lock();
1300
1301 numqueues = READ_ONCE(tun->numqueues);
1302 if (!numqueues)
1303 goto out;
1304
1305 tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
1306 numqueues]);
1307 /* Notify and wake up reader process */
1308 if (tfile->flags & TUN_FASYNC)
1309 kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
1310 tfile->socket.sk->sk_data_ready(tfile->socket.sk);
1311
1312out:
1313 rcu_read_unlock();
1314}
1315
1225static const struct net_device_ops tap_netdev_ops = { 1316static const struct net_device_ops tap_netdev_ops = {
1226 .ndo_uninit = tun_net_uninit, 1317 .ndo_uninit = tun_net_uninit,
1227 .ndo_open = tun_net_open, 1318 .ndo_open = tun_net_open,
@@ -1239,6 +1330,8 @@ static const struct net_device_ops tap_netdev_ops = {
1239 .ndo_set_rx_headroom = tun_set_headroom, 1330 .ndo_set_rx_headroom = tun_set_headroom,
1240 .ndo_get_stats64 = tun_net_get_stats64, 1331 .ndo_get_stats64 = tun_net_get_stats64,
1241 .ndo_bpf = tun_xdp, 1332 .ndo_bpf = tun_xdp,
1333 .ndo_xdp_xmit = tun_xdp_xmit,
1334 .ndo_xdp_flush = tun_xdp_flush,
1242}; 1335};
1243 1336
1244static void tun_flow_init(struct tun_struct *tun) 1337static void tun_flow_init(struct tun_struct *tun)
@@ -1863,6 +1956,40 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
1863 return result; 1956 return result;
1864} 1957}
1865 1958
1959static ssize_t tun_put_user_xdp(struct tun_struct *tun,
1960 struct tun_file *tfile,
1961 struct xdp_buff *xdp,
1962 struct iov_iter *iter)
1963{
1964 int vnet_hdr_sz = 0;
1965 size_t size = xdp->data_end - xdp->data;
1966 struct tun_pcpu_stats *stats;
1967 size_t ret;
1968
1969 if (tun->flags & IFF_VNET_HDR) {
1970 struct virtio_net_hdr gso = { 0 };
1971
1972 vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
1973 if (unlikely(iov_iter_count(iter) < vnet_hdr_sz))
1974 return -EINVAL;
1975 if (unlikely(copy_to_iter(&gso, sizeof(gso), iter) !=
1976 sizeof(gso)))
1977 return -EFAULT;
1978 iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
1979 }
1980
1981 ret = copy_to_iter(xdp->data, size, iter) + vnet_hdr_sz;
1982
1983 stats = get_cpu_ptr(tun->pcpu_stats);
1984 u64_stats_update_begin(&stats->syncp);
1985 stats->tx_packets++;
1986 stats->tx_bytes += ret;
1987 u64_stats_update_end(&stats->syncp);
1988 put_cpu_ptr(tun->pcpu_stats);
1989
1990 return ret;
1991}
1992
1866/* Put packet to the user space buffer */ 1993/* Put packet to the user space buffer */
1867static ssize_t tun_put_user(struct tun_struct *tun, 1994static ssize_t tun_put_user(struct tun_struct *tun,
1868 struct tun_file *tfile, 1995 struct tun_file *tfile,
@@ -1960,15 +2087,14 @@ done:
1960 return total; 2087 return total;
1961} 2088}
1962 2089
1963static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock, 2090static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err)
1964 int *err)
1965{ 2091{
1966 DECLARE_WAITQUEUE(wait, current); 2092 DECLARE_WAITQUEUE(wait, current);
1967 struct sk_buff *skb = NULL; 2093 void *ptr = NULL;
1968 int error = 0; 2094 int error = 0;
1969 2095
1970 skb = ptr_ring_consume(&tfile->tx_ring); 2096 ptr = ptr_ring_consume(&tfile->tx_ring);
1971 if (skb) 2097 if (ptr)
1972 goto out; 2098 goto out;
1973 if (noblock) { 2099 if (noblock) {
1974 error = -EAGAIN; 2100 error = -EAGAIN;
@@ -1979,8 +2105,8 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
1979 current->state = TASK_INTERRUPTIBLE; 2105 current->state = TASK_INTERRUPTIBLE;
1980 2106
1981 while (1) { 2107 while (1) {
1982 skb = ptr_ring_consume(&tfile->tx_ring); 2108 ptr = ptr_ring_consume(&tfile->tx_ring);
1983 if (skb) 2109 if (ptr)
1984 break; 2110 break;
1985 if (signal_pending(current)) { 2111 if (signal_pending(current)) {
1986 error = -ERESTARTSYS; 2112 error = -ERESTARTSYS;
@@ -1999,12 +2125,12 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
1999 2125
2000out: 2126out:
2001 *err = error; 2127 *err = error;
2002 return skb; 2128 return ptr;
2003} 2129}
2004 2130
2005static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, 2131static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
2006 struct iov_iter *to, 2132 struct iov_iter *to,
2007 int noblock, struct sk_buff *skb) 2133 int noblock, void *ptr)
2008{ 2134{
2009 ssize_t ret; 2135 ssize_t ret;
2010 int err; 2136 int err;
@@ -2012,23 +2138,31 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
2012 tun_debug(KERN_INFO, tun, "tun_do_read\n"); 2138 tun_debug(KERN_INFO, tun, "tun_do_read\n");
2013 2139
2014 if (!iov_iter_count(to)) { 2140 if (!iov_iter_count(to)) {
2015 if (skb) 2141 tun_ptr_free(ptr);
2016 kfree_skb(skb);
2017 return 0; 2142 return 0;
2018 } 2143 }
2019 2144
2020 if (!skb) { 2145 if (!ptr) {
2021 /* Read frames from ring */ 2146 /* Read frames from ring */
2022 skb = tun_ring_recv(tfile, noblock, &err); 2147 ptr = tun_ring_recv(tfile, noblock, &err);
2023 if (!skb) 2148 if (!ptr)
2024 return err; 2149 return err;
2025 } 2150 }
2026 2151
2027 ret = tun_put_user(tun, tfile, skb, to); 2152 if (tun_is_xdp_buff(ptr)) {
2028 if (unlikely(ret < 0)) 2153 struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
2029 kfree_skb(skb); 2154
2030 else 2155 ret = tun_put_user_xdp(tun, tfile, xdp, to);
2031 consume_skb(skb); 2156 put_page(virt_to_head_page(xdp->data));
2157 } else {
2158 struct sk_buff *skb = ptr;
2159
2160 ret = tun_put_user(tun, tfile, skb, to);
2161 if (unlikely(ret < 0))
2162 kfree_skb(skb);
2163 else
2164 consume_skb(skb);
2165 }
2032 2166
2033 return ret; 2167 return ret;
2034} 2168}
@@ -2165,12 +2299,12 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
2165{ 2299{
2166 struct tun_file *tfile = container_of(sock, struct tun_file, socket); 2300 struct tun_file *tfile = container_of(sock, struct tun_file, socket);
2167 struct tun_struct *tun = tun_get(tfile); 2301 struct tun_struct *tun = tun_get(tfile);
2168 struct sk_buff *skb = m->msg_control; 2302 void *ptr = m->msg_control;
2169 int ret; 2303 int ret;
2170 2304
2171 if (!tun) { 2305 if (!tun) {
2172 ret = -EBADFD; 2306 ret = -EBADFD;
2173 goto out_free_skb; 2307 goto out_free;
2174 } 2308 }
2175 2309
2176 if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) { 2310 if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) {
@@ -2182,7 +2316,7 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
2182 SOL_PACKET, TUN_TX_TIMESTAMP); 2316 SOL_PACKET, TUN_TX_TIMESTAMP);
2183 goto out; 2317 goto out;
2184 } 2318 }
2185 ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, skb); 2319 ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, ptr);
2186 if (ret > (ssize_t)total_len) { 2320 if (ret > (ssize_t)total_len) {
2187 m->msg_flags |= MSG_TRUNC; 2321 m->msg_flags |= MSG_TRUNC;
2188 ret = flags & MSG_TRUNC ? ret : total_len; 2322 ret = flags & MSG_TRUNC ? ret : total_len;
@@ -2193,12 +2327,25 @@ out:
2193 2327
2194out_put_tun: 2328out_put_tun:
2195 tun_put(tun); 2329 tun_put(tun);
2196out_free_skb: 2330out_free:
2197 if (skb) 2331 tun_ptr_free(ptr);
2198 kfree_skb(skb);
2199 return ret; 2332 return ret;
2200} 2333}
2201 2334
2335static int tun_ptr_peek_len(void *ptr)
2336{
2337 if (likely(ptr)) {
2338 if (tun_is_xdp_buff(ptr)) {
2339 struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
2340
2341 return xdp->data_end - xdp->data;
2342 }
2343 return __skb_array_len_with_tag(ptr);
2344 } else {
2345 return 0;
2346 }
2347}
2348
2202static int tun_peek_len(struct socket *sock) 2349static int tun_peek_len(struct socket *sock)
2203{ 2350{
2204 struct tun_file *tfile = container_of(sock, struct tun_file, socket); 2351 struct tun_file *tfile = container_of(sock, struct tun_file, socket);
@@ -2209,7 +2356,7 @@ static int tun_peek_len(struct socket *sock)
2209 if (!tun) 2356 if (!tun)
2210 return 0; 2357 return 0;
2211 2358
2212 ret = PTR_RING_PEEK_CALL(&tfile->tx_ring, __skb_array_len_with_tag); 2359 ret = PTR_RING_PEEK_CALL(&tfile->tx_ring, tun_ptr_peek_len);
2213 tun_put(tun); 2360 tun_put(tun);
2214 2361
2215 return ret; 2362 return ret;
@@ -3132,7 +3279,7 @@ static int tun_queue_resize(struct tun_struct *tun)
3132 3279
3133 ret = ptr_ring_resize_multiple(rings, n, 3280 ret = ptr_ring_resize_multiple(rings, n,
3134 dev->tx_queue_len, GFP_KERNEL, 3281 dev->tx_queue_len, GFP_KERNEL,
3135 __skb_array_destroy_skb); 3282 tun_ptr_free);
3136 3283
3137 kfree(rings); 3284 kfree(rings);
3138 return ret; 3285 return ret;
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index c31655548da2..a5a1db647635 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -175,6 +175,17 @@ static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq)
175 } 175 }
176} 176}
177 177
178static int vhost_net_buf_peek_len(void *ptr)
179{
180 if (tun_is_xdp_buff(ptr)) {
181 struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
182
183 return xdp->data_end - xdp->data;
184 }
185
186 return __skb_array_len_with_tag(ptr);
187}
188
178static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq) 189static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
179{ 190{
180 struct vhost_net_buf *rxq = &nvq->rxq; 191 struct vhost_net_buf *rxq = &nvq->rxq;
@@ -186,7 +197,7 @@ static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
186 return 0; 197 return 0;
187 198
188out: 199out:
189 return __skb_array_len_with_tag(vhost_net_buf_get_ptr(rxq)); 200 return vhost_net_buf_peek_len(vhost_net_buf_get_ptr(rxq));
190} 201}
191 202
192static void vhost_net_buf_init(struct vhost_net_buf *rxq) 203static void vhost_net_buf_init(struct vhost_net_buf *rxq)
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index bdee9b83baf6..08e66827ad8e 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -17,9 +17,14 @@
17 17
18#include <uapi/linux/if_tun.h> 18#include <uapi/linux/if_tun.h>
19 19
20#define TUN_XDP_FLAG 0x1UL
21
20#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) 22#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
21struct socket *tun_get_socket(struct file *); 23struct socket *tun_get_socket(struct file *);
22struct ptr_ring *tun_get_tx_ring(struct file *file); 24struct ptr_ring *tun_get_tx_ring(struct file *file);
25bool tun_is_xdp_buff(void *ptr);
26void *tun_xdp_to_ptr(void *ptr);
27void *tun_ptr_to_xdp(void *ptr);
23#else 28#else
24#include <linux/err.h> 29#include <linux/err.h>
25#include <linux/errno.h> 30#include <linux/errno.h>
@@ -33,5 +38,17 @@ static inline struct ptr_ring *tun_get_tx_ring(struct file *f)
33{ 38{
34 return ERR_PTR(-EINVAL); 39 return ERR_PTR(-EINVAL);
35} 40}
41static inline bool tun_is_xdp_buff(void *ptr)
42{
43 return false;
44}
45void *tun_xdp_to_ptr(void *ptr)
46{
47 return NULL;
48}
49void *tun_ptr_to_xdp(void *ptr)
50{
51 return NULL;
52}
36#endif /* CONFIG_TUN */ 53#endif /* CONFIG_TUN */
37#endif /* __IF_TUN_H */ 54#endif /* __IF_TUN_H */