aboutsummaryrefslogtreecommitdiffstats
path: root/net/xdp
diff options
context:
space:
mode:
authorMagnus Karlsson <magnus.karlsson@intel.com>2018-06-04 08:05:57 -0400
committerDaniel Borkmann <daniel@iogearbox.net>2018-06-05 09:48:34 -0400
commitac98d8aab61baf785eb8f099b36daf34fc76a70e (patch)
treec0fa347892f50786cd516e5eb4396abf69bebb0d /net/xdp
parente3760c7e50ac6cdf1188fec44938dd7e6e6eef61 (diff)
xsk: wire upp Tx zero-copy functions
Here we add the functionality required to support zero-copy Tx, and also exposes various zero-copy related functions for the netdevs. Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'net/xdp')
-rw-r--r--net/xdp/xdp_umem.c29
-rw-r--r--net/xdp/xdp_umem.h8
-rw-r--r--net/xdp/xsk.c70
-rw-r--r--net/xdp/xsk_queue.h32
4 files changed, 128 insertions, 11 deletions
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index f729d79b8d91..7eb4948a38d2 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -17,6 +17,29 @@
17 17
18#define XDP_UMEM_MIN_CHUNK_SIZE 2048 18#define XDP_UMEM_MIN_CHUNK_SIZE 2048
19 19
20void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
21{
22 unsigned long flags;
23
24 spin_lock_irqsave(&umem->xsk_list_lock, flags);
25 list_add_rcu(&xs->list, &umem->xsk_list);
26 spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
27}
28
29void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
30{
31 unsigned long flags;
32
33 if (xs->dev) {
34 spin_lock_irqsave(&umem->xsk_list_lock, flags);
35 list_del_rcu(&xs->list);
36 spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
37
38 if (umem->zc)
39 synchronize_net();
40 }
41}
42
20int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, 43int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
21 u32 queue_id, u16 flags) 44 u32 queue_id, u16 flags)
22{ 45{
@@ -35,7 +58,7 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
35 58
36 dev_hold(dev); 59 dev_hold(dev);
37 60
38 if (dev->netdev_ops->ndo_bpf) { 61 if (dev->netdev_ops->ndo_bpf && dev->netdev_ops->ndo_xsk_async_xmit) {
39 bpf.command = XDP_QUERY_XSK_UMEM; 62 bpf.command = XDP_QUERY_XSK_UMEM;
40 63
41 rtnl_lock(); 64 rtnl_lock();
@@ -70,7 +93,7 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
70 return force_zc ? -ENOTSUPP : 0; /* fail or fallback */ 93 return force_zc ? -ENOTSUPP : 0; /* fail or fallback */
71} 94}
72 95
73void xdp_umem_clear_dev(struct xdp_umem *umem) 96static void xdp_umem_clear_dev(struct xdp_umem *umem)
74{ 97{
75 struct netdev_bpf bpf; 98 struct netdev_bpf bpf;
76 int err; 99 int err;
@@ -283,6 +306,8 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
283 umem->npgs = size / PAGE_SIZE; 306 umem->npgs = size / PAGE_SIZE;
284 umem->pgs = NULL; 307 umem->pgs = NULL;
285 umem->user = NULL; 308 umem->user = NULL;
309 INIT_LIST_HEAD(&umem->xsk_list);
310 spin_lock_init(&umem->xsk_list_lock);
286 311
287 refcount_set(&umem->users, 1); 312 refcount_set(&umem->users, 1);
288 313
diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h
index 674508a32a4d..f11560334f88 100644
--- a/net/xdp/xdp_umem.h
+++ b/net/xdp/xdp_umem.h
@@ -13,12 +13,18 @@ static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
13 return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1)); 13 return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1));
14} 14}
15 15
16static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
17{
18 return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1));
19}
20
16int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, 21int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
17 u32 queue_id, u16 flags); 22 u32 queue_id, u16 flags);
18void xdp_umem_clear_dev(struct xdp_umem *umem);
19bool xdp_umem_validate_queues(struct xdp_umem *umem); 23bool xdp_umem_validate_queues(struct xdp_umem *umem);
20void xdp_get_umem(struct xdp_umem *umem); 24void xdp_get_umem(struct xdp_umem *umem);
21void xdp_put_umem(struct xdp_umem *umem); 25void xdp_put_umem(struct xdp_umem *umem);
26void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs);
27void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs);
22struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr); 28struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr);
23 29
24#endif /* XDP_UMEM_H_ */ 30#endif /* XDP_UMEM_H_ */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index ab64bd8260ea..ddca4bf1cfc8 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -21,6 +21,7 @@
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/net.h> 22#include <linux/net.h>
23#include <linux/netdevice.h> 23#include <linux/netdevice.h>
24#include <linux/rculist.h>
24#include <net/xdp_sock.h> 25#include <net/xdp_sock.h>
25#include <net/xdp.h> 26#include <net/xdp.h>
26 27
@@ -138,6 +139,59 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
138 return err; 139 return err;
139} 140}
140 141
142void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
143{
144 xskq_produce_flush_addr_n(umem->cq, nb_entries);
145}
146EXPORT_SYMBOL(xsk_umem_complete_tx);
147
148void xsk_umem_consume_tx_done(struct xdp_umem *umem)
149{
150 struct xdp_sock *xs;
151
152 rcu_read_lock();
153 list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
154 xs->sk.sk_write_space(&xs->sk);
155 }
156 rcu_read_unlock();
157}
158EXPORT_SYMBOL(xsk_umem_consume_tx_done);
159
160bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len)
161{
162 struct xdp_desc desc;
163 struct xdp_sock *xs;
164
165 rcu_read_lock();
166 list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
167 if (!xskq_peek_desc(xs->tx, &desc))
168 continue;
169
170 if (xskq_produce_addr_lazy(umem->cq, desc.addr))
171 goto out;
172
173 *dma = xdp_umem_get_dma(umem, desc.addr);
174 *len = desc.len;
175
176 xskq_discard_desc(xs->tx);
177 rcu_read_unlock();
178 return true;
179 }
180
181out:
182 rcu_read_unlock();
183 return false;
184}
185EXPORT_SYMBOL(xsk_umem_consume_tx);
186
187static int xsk_zc_xmit(struct sock *sk)
188{
189 struct xdp_sock *xs = xdp_sk(sk);
190 struct net_device *dev = xs->dev;
191
192 return dev->netdev_ops->ndo_xsk_async_xmit(dev, xs->queue_id);
193}
194
141static void xsk_destruct_skb(struct sk_buff *skb) 195static void xsk_destruct_skb(struct sk_buff *skb)
142{ 196{
143 u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg; 197 u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
@@ -151,7 +205,6 @@ static void xsk_destruct_skb(struct sk_buff *skb)
151static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, 205static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
152 size_t total_len) 206 size_t total_len)
153{ 207{
154 bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
155 u32 max_batch = TX_BATCH_SIZE; 208 u32 max_batch = TX_BATCH_SIZE;
156 struct xdp_sock *xs = xdp_sk(sk); 209 struct xdp_sock *xs = xdp_sk(sk);
157 bool sent_frame = false; 210 bool sent_frame = false;
@@ -161,8 +214,6 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
161 214
162 if (unlikely(!xs->tx)) 215 if (unlikely(!xs->tx))
163 return -ENOBUFS; 216 return -ENOBUFS;
164 if (need_wait)
165 return -EOPNOTSUPP;
166 217
167 mutex_lock(&xs->mutex); 218 mutex_lock(&xs->mutex);
168 219
@@ -192,7 +243,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
192 goto out; 243 goto out;
193 } 244 }
194 245
195 skb = sock_alloc_send_skb(sk, len, !need_wait, &err); 246 skb = sock_alloc_send_skb(sk, len, 1, &err);
196 if (unlikely(!skb)) { 247 if (unlikely(!skb)) {
197 err = -EAGAIN; 248 err = -EAGAIN;
198 goto out; 249 goto out;
@@ -235,6 +286,7 @@ out:
235 286
236static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) 287static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
237{ 288{
289 bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
238 struct sock *sk = sock->sk; 290 struct sock *sk = sock->sk;
239 struct xdp_sock *xs = xdp_sk(sk); 291 struct xdp_sock *xs = xdp_sk(sk);
240 292
@@ -242,8 +294,10 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
242 return -ENXIO; 294 return -ENXIO;
243 if (unlikely(!(xs->dev->flags & IFF_UP))) 295 if (unlikely(!(xs->dev->flags & IFF_UP)))
244 return -ENETDOWN; 296 return -ENETDOWN;
297 if (need_wait)
298 return -EOPNOTSUPP;
245 299
246 return xsk_generic_xmit(sk, m, total_len); 300 return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len);
247} 301}
248 302
249static unsigned int xsk_poll(struct file *file, struct socket *sock, 303static unsigned int xsk_poll(struct file *file, struct socket *sock,
@@ -419,10 +473,11 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
419 } 473 }
420 474
421 xs->dev = dev; 475 xs->dev = dev;
422 xs->queue_id = sxdp->sxdp_queue_id; 476 xs->zc = xs->umem->zc;
423 477 xs->queue_id = qid;
424 xskq_set_umem(xs->rx, &xs->umem->props); 478 xskq_set_umem(xs->rx, &xs->umem->props);
425 xskq_set_umem(xs->tx, &xs->umem->props); 479 xskq_set_umem(xs->tx, &xs->umem->props);
480 xdp_add_sk_umem(xs->umem, xs);
426 481
427out_unlock: 482out_unlock:
428 if (err) 483 if (err)
@@ -660,6 +715,7 @@ static void xsk_destruct(struct sock *sk)
660 715
661 xskq_destroy(xs->rx); 716 xskq_destroy(xs->rx);
662 xskq_destroy(xs->tx); 717 xskq_destroy(xs->tx);
718 xdp_del_sk_umem(xs->umem, xs);
663 xdp_put_umem(xs->umem); 719 xdp_put_umem(xs->umem);
664 720
665 sk_refcnt_debug_dec(sk); 721 sk_refcnt_debug_dec(sk);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 5246ed420a16..ef6a6f0ec949 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -11,6 +11,7 @@
11#include <net/xdp_sock.h> 11#include <net/xdp_sock.h>
12 12
13#define RX_BATCH_SIZE 16 13#define RX_BATCH_SIZE 16
14#define LAZY_UPDATE_THRESHOLD 128
14 15
15struct xdp_ring { 16struct xdp_ring {
16 u32 producer ____cacheline_aligned_in_smp; 17 u32 producer ____cacheline_aligned_in_smp;
@@ -61,9 +62,14 @@ static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt)
61 return (entries > dcnt) ? dcnt : entries; 62 return (entries > dcnt) ? dcnt : entries;
62} 63}
63 64
65static inline u32 xskq_nb_free_lazy(struct xsk_queue *q, u32 producer)
66{
67 return q->nentries - (producer - q->cons_tail);
68}
69
64static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt) 70static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt)
65{ 71{
66 u32 free_entries = q->nentries - (producer - q->cons_tail); 72 u32 free_entries = xskq_nb_free_lazy(q, producer);
67 73
68 if (free_entries >= dcnt) 74 if (free_entries >= dcnt)
69 return free_entries; 75 return free_entries;
@@ -123,6 +129,9 @@ static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr)
123{ 129{
124 struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; 130 struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
125 131
132 if (xskq_nb_free(q, q->prod_tail, LAZY_UPDATE_THRESHOLD) == 0)
133 return -ENOSPC;
134
126 ring->desc[q->prod_tail++ & q->ring_mask] = addr; 135 ring->desc[q->prod_tail++ & q->ring_mask] = addr;
127 136
128 /* Order producer and data */ 137 /* Order producer and data */
@@ -132,6 +141,27 @@ static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr)
132 return 0; 141 return 0;
133} 142}
134 143
144static inline int xskq_produce_addr_lazy(struct xsk_queue *q, u64 addr)
145{
146 struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
147
148 if (xskq_nb_free(q, q->prod_head, LAZY_UPDATE_THRESHOLD) == 0)
149 return -ENOSPC;
150
151 ring->desc[q->prod_head++ & q->ring_mask] = addr;
152 return 0;
153}
154
155static inline void xskq_produce_flush_addr_n(struct xsk_queue *q,
156 u32 nb_entries)
157{
158 /* Order producer and data */
159 smp_wmb();
160
161 q->prod_tail += nb_entries;
162 WRITE_ONCE(q->ring->producer, q->prod_tail);
163}
164
135static inline int xskq_reserve_addr(struct xsk_queue *q) 165static inline int xskq_reserve_addr(struct xsk_queue *q)
136{ 166{
137 if (xskq_nb_free(q, q->prod_head, 1) == 0) 167 if (xskq_nb_free(q, q->prod_head, 1) == 0)