diff options
author | Magnus Karlsson <magnus.karlsson@intel.com> | 2018-06-04 08:05:57 -0400 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2018-06-05 09:48:34 -0400 |
commit | ac98d8aab61baf785eb8f099b36daf34fc76a70e (patch) | |
tree | c0fa347892f50786cd516e5eb4396abf69bebb0d /net/xdp | |
parent | e3760c7e50ac6cdf1188fec44938dd7e6e6eef61 (diff) |
xsk: wire upp Tx zero-copy functions
Here we add the functionality required to support zero-copy Tx, and
also exposes various zero-copy related functions for the netdevs.
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'net/xdp')
-rw-r--r-- | net/xdp/xdp_umem.c | 29 | ||||
-rw-r--r-- | net/xdp/xdp_umem.h | 8 | ||||
-rw-r--r-- | net/xdp/xsk.c | 70 | ||||
-rw-r--r-- | net/xdp/xsk_queue.h | 32 |
4 files changed, 128 insertions, 11 deletions
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index f729d79b8d91..7eb4948a38d2 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c | |||
@@ -17,6 +17,29 @@ | |||
17 | 17 | ||
18 | #define XDP_UMEM_MIN_CHUNK_SIZE 2048 | 18 | #define XDP_UMEM_MIN_CHUNK_SIZE 2048 |
19 | 19 | ||
20 | void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs) | ||
21 | { | ||
22 | unsigned long flags; | ||
23 | |||
24 | spin_lock_irqsave(&umem->xsk_list_lock, flags); | ||
25 | list_add_rcu(&xs->list, &umem->xsk_list); | ||
26 | spin_unlock_irqrestore(&umem->xsk_list_lock, flags); | ||
27 | } | ||
28 | |||
29 | void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs) | ||
30 | { | ||
31 | unsigned long flags; | ||
32 | |||
33 | if (xs->dev) { | ||
34 | spin_lock_irqsave(&umem->xsk_list_lock, flags); | ||
35 | list_del_rcu(&xs->list); | ||
36 | spin_unlock_irqrestore(&umem->xsk_list_lock, flags); | ||
37 | |||
38 | if (umem->zc) | ||
39 | synchronize_net(); | ||
40 | } | ||
41 | } | ||
42 | |||
20 | int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, | 43 | int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, |
21 | u32 queue_id, u16 flags) | 44 | u32 queue_id, u16 flags) |
22 | { | 45 | { |
@@ -35,7 +58,7 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, | |||
35 | 58 | ||
36 | dev_hold(dev); | 59 | dev_hold(dev); |
37 | 60 | ||
38 | if (dev->netdev_ops->ndo_bpf) { | 61 | if (dev->netdev_ops->ndo_bpf && dev->netdev_ops->ndo_xsk_async_xmit) { |
39 | bpf.command = XDP_QUERY_XSK_UMEM; | 62 | bpf.command = XDP_QUERY_XSK_UMEM; |
40 | 63 | ||
41 | rtnl_lock(); | 64 | rtnl_lock(); |
@@ -70,7 +93,7 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, | |||
70 | return force_zc ? -ENOTSUPP : 0; /* fail or fallback */ | 93 | return force_zc ? -ENOTSUPP : 0; /* fail or fallback */ |
71 | } | 94 | } |
72 | 95 | ||
73 | void xdp_umem_clear_dev(struct xdp_umem *umem) | 96 | static void xdp_umem_clear_dev(struct xdp_umem *umem) |
74 | { | 97 | { |
75 | struct netdev_bpf bpf; | 98 | struct netdev_bpf bpf; |
76 | int err; | 99 | int err; |
@@ -283,6 +306,8 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) | |||
283 | umem->npgs = size / PAGE_SIZE; | 306 | umem->npgs = size / PAGE_SIZE; |
284 | umem->pgs = NULL; | 307 | umem->pgs = NULL; |
285 | umem->user = NULL; | 308 | umem->user = NULL; |
309 | INIT_LIST_HEAD(&umem->xsk_list); | ||
310 | spin_lock_init(&umem->xsk_list_lock); | ||
286 | 311 | ||
287 | refcount_set(&umem->users, 1); | 312 | refcount_set(&umem->users, 1); |
288 | 313 | ||
diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h index 674508a32a4d..f11560334f88 100644 --- a/net/xdp/xdp_umem.h +++ b/net/xdp/xdp_umem.h | |||
@@ -13,12 +13,18 @@ static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) | |||
13 | return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1)); | 13 | return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1)); |
14 | } | 14 | } |
15 | 15 | ||
16 | static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) | ||
17 | { | ||
18 | return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1)); | ||
19 | } | ||
20 | |||
16 | int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, | 21 | int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, |
17 | u32 queue_id, u16 flags); | 22 | u32 queue_id, u16 flags); |
18 | void xdp_umem_clear_dev(struct xdp_umem *umem); | ||
19 | bool xdp_umem_validate_queues(struct xdp_umem *umem); | 23 | bool xdp_umem_validate_queues(struct xdp_umem *umem); |
20 | void xdp_get_umem(struct xdp_umem *umem); | 24 | void xdp_get_umem(struct xdp_umem *umem); |
21 | void xdp_put_umem(struct xdp_umem *umem); | 25 | void xdp_put_umem(struct xdp_umem *umem); |
26 | void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs); | ||
27 | void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs); | ||
22 | struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr); | 28 | struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr); |
23 | 29 | ||
24 | #endif /* XDP_UMEM_H_ */ | 30 | #endif /* XDP_UMEM_H_ */ |
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index ab64bd8260ea..ddca4bf1cfc8 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
22 | #include <linux/net.h> | 22 | #include <linux/net.h> |
23 | #include <linux/netdevice.h> | 23 | #include <linux/netdevice.h> |
24 | #include <linux/rculist.h> | ||
24 | #include <net/xdp_sock.h> | 25 | #include <net/xdp_sock.h> |
25 | #include <net/xdp.h> | 26 | #include <net/xdp.h> |
26 | 27 | ||
@@ -138,6 +139,59 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) | |||
138 | return err; | 139 | return err; |
139 | } | 140 | } |
140 | 141 | ||
142 | void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries) | ||
143 | { | ||
144 | xskq_produce_flush_addr_n(umem->cq, nb_entries); | ||
145 | } | ||
146 | EXPORT_SYMBOL(xsk_umem_complete_tx); | ||
147 | |||
148 | void xsk_umem_consume_tx_done(struct xdp_umem *umem) | ||
149 | { | ||
150 | struct xdp_sock *xs; | ||
151 | |||
152 | rcu_read_lock(); | ||
153 | list_for_each_entry_rcu(xs, &umem->xsk_list, list) { | ||
154 | xs->sk.sk_write_space(&xs->sk); | ||
155 | } | ||
156 | rcu_read_unlock(); | ||
157 | } | ||
158 | EXPORT_SYMBOL(xsk_umem_consume_tx_done); | ||
159 | |||
160 | bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len) | ||
161 | { | ||
162 | struct xdp_desc desc; | ||
163 | struct xdp_sock *xs; | ||
164 | |||
165 | rcu_read_lock(); | ||
166 | list_for_each_entry_rcu(xs, &umem->xsk_list, list) { | ||
167 | if (!xskq_peek_desc(xs->tx, &desc)) | ||
168 | continue; | ||
169 | |||
170 | if (xskq_produce_addr_lazy(umem->cq, desc.addr)) | ||
171 | goto out; | ||
172 | |||
173 | *dma = xdp_umem_get_dma(umem, desc.addr); | ||
174 | *len = desc.len; | ||
175 | |||
176 | xskq_discard_desc(xs->tx); | ||
177 | rcu_read_unlock(); | ||
178 | return true; | ||
179 | } | ||
180 | |||
181 | out: | ||
182 | rcu_read_unlock(); | ||
183 | return false; | ||
184 | } | ||
185 | EXPORT_SYMBOL(xsk_umem_consume_tx); | ||
186 | |||
187 | static int xsk_zc_xmit(struct sock *sk) | ||
188 | { | ||
189 | struct xdp_sock *xs = xdp_sk(sk); | ||
190 | struct net_device *dev = xs->dev; | ||
191 | |||
192 | return dev->netdev_ops->ndo_xsk_async_xmit(dev, xs->queue_id); | ||
193 | } | ||
194 | |||
141 | static void xsk_destruct_skb(struct sk_buff *skb) | 195 | static void xsk_destruct_skb(struct sk_buff *skb) |
142 | { | 196 | { |
143 | u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg; | 197 | u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg; |
@@ -151,7 +205,6 @@ static void xsk_destruct_skb(struct sk_buff *skb) | |||
151 | static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, | 205 | static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, |
152 | size_t total_len) | 206 | size_t total_len) |
153 | { | 207 | { |
154 | bool need_wait = !(m->msg_flags & MSG_DONTWAIT); | ||
155 | u32 max_batch = TX_BATCH_SIZE; | 208 | u32 max_batch = TX_BATCH_SIZE; |
156 | struct xdp_sock *xs = xdp_sk(sk); | 209 | struct xdp_sock *xs = xdp_sk(sk); |
157 | bool sent_frame = false; | 210 | bool sent_frame = false; |
@@ -161,8 +214,6 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, | |||
161 | 214 | ||
162 | if (unlikely(!xs->tx)) | 215 | if (unlikely(!xs->tx)) |
163 | return -ENOBUFS; | 216 | return -ENOBUFS; |
164 | if (need_wait) | ||
165 | return -EOPNOTSUPP; | ||
166 | 217 | ||
167 | mutex_lock(&xs->mutex); | 218 | mutex_lock(&xs->mutex); |
168 | 219 | ||
@@ -192,7 +243,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, | |||
192 | goto out; | 243 | goto out; |
193 | } | 244 | } |
194 | 245 | ||
195 | skb = sock_alloc_send_skb(sk, len, !need_wait, &err); | 246 | skb = sock_alloc_send_skb(sk, len, 1, &err); |
196 | if (unlikely(!skb)) { | 247 | if (unlikely(!skb)) { |
197 | err = -EAGAIN; | 248 | err = -EAGAIN; |
198 | goto out; | 249 | goto out; |
@@ -235,6 +286,7 @@ out: | |||
235 | 286 | ||
236 | static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) | 287 | static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) |
237 | { | 288 | { |
289 | bool need_wait = !(m->msg_flags & MSG_DONTWAIT); | ||
238 | struct sock *sk = sock->sk; | 290 | struct sock *sk = sock->sk; |
239 | struct xdp_sock *xs = xdp_sk(sk); | 291 | struct xdp_sock *xs = xdp_sk(sk); |
240 | 292 | ||
@@ -242,8 +294,10 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) | |||
242 | return -ENXIO; | 294 | return -ENXIO; |
243 | if (unlikely(!(xs->dev->flags & IFF_UP))) | 295 | if (unlikely(!(xs->dev->flags & IFF_UP))) |
244 | return -ENETDOWN; | 296 | return -ENETDOWN; |
297 | if (need_wait) | ||
298 | return -EOPNOTSUPP; | ||
245 | 299 | ||
246 | return xsk_generic_xmit(sk, m, total_len); | 300 | return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len); |
247 | } | 301 | } |
248 | 302 | ||
249 | static unsigned int xsk_poll(struct file *file, struct socket *sock, | 303 | static unsigned int xsk_poll(struct file *file, struct socket *sock, |
@@ -419,10 +473,11 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) | |||
419 | } | 473 | } |
420 | 474 | ||
421 | xs->dev = dev; | 475 | xs->dev = dev; |
422 | xs->queue_id = sxdp->sxdp_queue_id; | 476 | xs->zc = xs->umem->zc; |
423 | 477 | xs->queue_id = qid; | |
424 | xskq_set_umem(xs->rx, &xs->umem->props); | 478 | xskq_set_umem(xs->rx, &xs->umem->props); |
425 | xskq_set_umem(xs->tx, &xs->umem->props); | 479 | xskq_set_umem(xs->tx, &xs->umem->props); |
480 | xdp_add_sk_umem(xs->umem, xs); | ||
426 | 481 | ||
427 | out_unlock: | 482 | out_unlock: |
428 | if (err) | 483 | if (err) |
@@ -660,6 +715,7 @@ static void xsk_destruct(struct sock *sk) | |||
660 | 715 | ||
661 | xskq_destroy(xs->rx); | 716 | xskq_destroy(xs->rx); |
662 | xskq_destroy(xs->tx); | 717 | xskq_destroy(xs->tx); |
718 | xdp_del_sk_umem(xs->umem, xs); | ||
663 | xdp_put_umem(xs->umem); | 719 | xdp_put_umem(xs->umem); |
664 | 720 | ||
665 | sk_refcnt_debug_dec(sk); | 721 | sk_refcnt_debug_dec(sk); |
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 5246ed420a16..ef6a6f0ec949 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <net/xdp_sock.h> | 11 | #include <net/xdp_sock.h> |
12 | 12 | ||
13 | #define RX_BATCH_SIZE 16 | 13 | #define RX_BATCH_SIZE 16 |
14 | #define LAZY_UPDATE_THRESHOLD 128 | ||
14 | 15 | ||
15 | struct xdp_ring { | 16 | struct xdp_ring { |
16 | u32 producer ____cacheline_aligned_in_smp; | 17 | u32 producer ____cacheline_aligned_in_smp; |
@@ -61,9 +62,14 @@ static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt) | |||
61 | return (entries > dcnt) ? dcnt : entries; | 62 | return (entries > dcnt) ? dcnt : entries; |
62 | } | 63 | } |
63 | 64 | ||
65 | static inline u32 xskq_nb_free_lazy(struct xsk_queue *q, u32 producer) | ||
66 | { | ||
67 | return q->nentries - (producer - q->cons_tail); | ||
68 | } | ||
69 | |||
64 | static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt) | 70 | static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt) |
65 | { | 71 | { |
66 | u32 free_entries = q->nentries - (producer - q->cons_tail); | 72 | u32 free_entries = xskq_nb_free_lazy(q, producer); |
67 | 73 | ||
68 | if (free_entries >= dcnt) | 74 | if (free_entries >= dcnt) |
69 | return free_entries; | 75 | return free_entries; |
@@ -123,6 +129,9 @@ static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr) | |||
123 | { | 129 | { |
124 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; | 130 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; |
125 | 131 | ||
132 | if (xskq_nb_free(q, q->prod_tail, LAZY_UPDATE_THRESHOLD) == 0) | ||
133 | return -ENOSPC; | ||
134 | |||
126 | ring->desc[q->prod_tail++ & q->ring_mask] = addr; | 135 | ring->desc[q->prod_tail++ & q->ring_mask] = addr; |
127 | 136 | ||
128 | /* Order producer and data */ | 137 | /* Order producer and data */ |
@@ -132,6 +141,27 @@ static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr) | |||
132 | return 0; | 141 | return 0; |
133 | } | 142 | } |
134 | 143 | ||
144 | static inline int xskq_produce_addr_lazy(struct xsk_queue *q, u64 addr) | ||
145 | { | ||
146 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; | ||
147 | |||
148 | if (xskq_nb_free(q, q->prod_head, LAZY_UPDATE_THRESHOLD) == 0) | ||
149 | return -ENOSPC; | ||
150 | |||
151 | ring->desc[q->prod_head++ & q->ring_mask] = addr; | ||
152 | return 0; | ||
153 | } | ||
154 | |||
155 | static inline void xskq_produce_flush_addr_n(struct xsk_queue *q, | ||
156 | u32 nb_entries) | ||
157 | { | ||
158 | /* Order producer and data */ | ||
159 | smp_wmb(); | ||
160 | |||
161 | q->prod_tail += nb_entries; | ||
162 | WRITE_ONCE(q->ring->producer, q->prod_tail); | ||
163 | } | ||
164 | |||
135 | static inline int xskq_reserve_addr(struct xsk_queue *q) | 165 | static inline int xskq_reserve_addr(struct xsk_queue *q) |
136 | { | 166 | { |
137 | if (xskq_nb_free(q, q->prod_head, 1) == 0) | 167 | if (xskq_nb_free(q, q->prod_head, 1) == 0) |