summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKevin Laatz <kevin.laatz@intel.com>2019-08-26 22:25:22 -0400
committerDaniel Borkmann <daniel@iogearbox.net>2019-08-30 19:08:26 -0400
commitc05cd3645814724bdeb32a2b4d953b12bdea5f8c (patch)
tree9b22aa9bc0d6109c85ed4f0658fca09193618707
parentb35a2d3e895600d8a1273b44f68128491faab3b8 (diff)
xsk: add support to allow unaligned chunk placement
Currently, addresses are chunk size aligned. This means, we are very restricted in terms of where we can place chunk within the umem. For example, if we have a chunk size of 2k, then our chunks can only be placed at 0,2k,4k,6k,8k... and so on (ie. every 2k starting from 0). This patch introduces the ability to use unaligned chunks. With these changes, we are no longer bound to having to place chunks at a 2k (or whatever your chunk size is) interval. Since we are no longer dealing with aligned chunks, they can now cross page boundaries. Checks for page contiguity have been added in order to keep track of which pages are followed by a physically contiguous page. Signed-off-by: Kevin Laatz <kevin.laatz@intel.com> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com> Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--include/net/xdp_sock.h75
-rw-r--r--include/uapi/linux/if_xdp.h9
-rw-r--r--net/xdp/xdp_umem.c19
-rw-r--r--net/xdp/xsk.c94
-rw-r--r--net/xdp/xsk_diag.c2
-rw-r--r--net/xdp/xsk_queue.h70
6 files changed, 233 insertions, 36 deletions
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index f023b9940d64..c9398ce7960f 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -16,6 +16,13 @@
16struct net_device; 16struct net_device;
17struct xsk_queue; 17struct xsk_queue;
18 18
19/* Masks for xdp_umem_page flags.
20 * The low 12-bits of the addr will be 0 since this is the page address, so we
21 * can use them for flags.
22 */
23#define XSK_NEXT_PG_CONTIG_SHIFT 0
24#define XSK_NEXT_PG_CONTIG_MASK (1ULL << XSK_NEXT_PG_CONTIG_SHIFT)
25
19struct xdp_umem_page { 26struct xdp_umem_page {
20 void *addr; 27 void *addr;
21 dma_addr_t dma; 28 dma_addr_t dma;
@@ -27,8 +34,12 @@ struct xdp_umem_fq_reuse {
27 u64 handles[]; 34 u64 handles[];
28}; 35};
29 36
30/* Flags for the umem flags field. */ 37/* Flags for the umem flags field.
31#define XDP_UMEM_USES_NEED_WAKEUP (1 << 0) 38 *
39 * The NEED_WAKEUP flag is 1 due to the reuse of the flags field for public
40 * flags. See inlude/uapi/include/linux/if_xdp.h.
41 */
42#define XDP_UMEM_USES_NEED_WAKEUP (1 << 1)
32 43
33struct xdp_umem { 44struct xdp_umem {
34 struct xsk_queue *fq; 45 struct xsk_queue *fq;
@@ -124,14 +135,36 @@ void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
124int xsk_map_inc(struct xsk_map *map); 135int xsk_map_inc(struct xsk_map *map);
125void xsk_map_put(struct xsk_map *map); 136void xsk_map_put(struct xsk_map *map);
126 137
138static inline u64 xsk_umem_extract_addr(u64 addr)
139{
140 return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
141}
142
143static inline u64 xsk_umem_extract_offset(u64 addr)
144{
145 return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
146}
147
148static inline u64 xsk_umem_add_offset_to_addr(u64 addr)
149{
150 return xsk_umem_extract_addr(addr) + xsk_umem_extract_offset(addr);
151}
152
127static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) 153static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
128{ 154{
129 return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1)); 155 unsigned long page_addr;
156
157 addr = xsk_umem_add_offset_to_addr(addr);
158 page_addr = (unsigned long)umem->pages[addr >> PAGE_SHIFT].addr;
159
160 return (char *)(page_addr & PAGE_MASK) + (addr & ~PAGE_MASK);
130} 161}
131 162
132static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) 163static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
133{ 164{
134 return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1)); 165 addr = xsk_umem_add_offset_to_addr(addr);
166
167 return umem->pages[addr >> PAGE_SHIFT].dma + (addr & ~PAGE_MASK);
135} 168}
136 169
137/* Reuse-queue aware version of FILL queue helpers */ 170/* Reuse-queue aware version of FILL queue helpers */
@@ -172,6 +205,19 @@ static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
172 205
173 rq->handles[rq->length++] = addr; 206 rq->handles[rq->length++] = addr;
174} 207}
208
209/* Handle the offset appropriately depending on aligned or unaligned mode.
210 * For unaligned mode, we store the offset in the upper 16-bits of the address.
211 * For aligned mode, we simply add the offset to the address.
212 */
213static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 address,
214 u64 offset)
215{
216 if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG)
217 return address + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
218 else
219 return address + offset;
220}
175#else 221#else
176static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) 222static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
177{ 223{
@@ -241,6 +287,21 @@ static inline struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
241 return NULL; 287 return NULL;
242} 288}
243 289
290static inline u64 xsk_umem_extract_addr(u64 addr)
291{
292 return 0;
293}
294
295static inline u64 xsk_umem_extract_offset(u64 addr)
296{
297 return 0;
298}
299
300static inline u64 xsk_umem_add_offset_to_addr(u64 addr)
301{
302 return 0;
303}
304
244static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) 305static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
245{ 306{
246 return NULL; 307 return NULL;
@@ -290,6 +351,12 @@ static inline bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
290 return false; 351 return false;
291} 352}
292 353
354static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle,
355 u64 offset)
356{
357 return 0;
358}
359
293#endif /* CONFIG_XDP_SOCKETS */ 360#endif /* CONFIG_XDP_SOCKETS */
294 361
295#endif /* _LINUX_XDP_SOCK_H */ 362#endif /* _LINUX_XDP_SOCK_H */
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index 62b80d57b72a..be328c59389d 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -26,6 +26,9 @@
26 */ 26 */
27#define XDP_USE_NEED_WAKEUP (1 << 3) 27#define XDP_USE_NEED_WAKEUP (1 << 3)
28 28
29/* Flags for xsk_umem_config flags */
30#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0)
31
29struct sockaddr_xdp { 32struct sockaddr_xdp {
30 __u16 sxdp_family; 33 __u16 sxdp_family;
31 __u16 sxdp_flags; 34 __u16 sxdp_flags;
@@ -66,6 +69,7 @@ struct xdp_umem_reg {
66 __u64 len; /* Length of packet data area */ 69 __u64 len; /* Length of packet data area */
67 __u32 chunk_size; 70 __u32 chunk_size;
68 __u32 headroom; 71 __u32 headroom;
72 __u32 flags;
69}; 73};
70 74
71struct xdp_statistics { 75struct xdp_statistics {
@@ -87,6 +91,11 @@ struct xdp_options {
87#define XDP_UMEM_PGOFF_FILL_RING 0x100000000ULL 91#define XDP_UMEM_PGOFF_FILL_RING 0x100000000ULL
88#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL 92#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL
89 93
94/* Masks for unaligned chunks mode */
95#define XSK_UNALIGNED_BUF_OFFSET_SHIFT 48
96#define XSK_UNALIGNED_BUF_ADDR_MASK \
97 ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1)
98
90/* Rx/Tx descriptor */ 99/* Rx/Tx descriptor */
91struct xdp_desc { 100struct xdp_desc {
92 __u64 addr; 101 __u64 addr;
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 2d65779282a1..e997b263a0dd 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -340,6 +340,7 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
340 340
341static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) 341static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
342{ 342{
343 bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
343 u32 chunk_size = mr->chunk_size, headroom = mr->headroom; 344 u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
344 unsigned int chunks, chunks_per_page; 345 unsigned int chunks, chunks_per_page;
345 u64 addr = mr->addr, size = mr->len; 346 u64 addr = mr->addr, size = mr->len;
@@ -355,7 +356,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
355 return -EINVAL; 356 return -EINVAL;
356 } 357 }
357 358
358 if (!is_power_of_2(chunk_size)) 359 if (mr->flags & ~(XDP_UMEM_UNALIGNED_CHUNK_FLAG |
360 XDP_UMEM_USES_NEED_WAKEUP))
361 return -EINVAL;
362
363 if (!unaligned_chunks && !is_power_of_2(chunk_size))
359 return -EINVAL; 364 return -EINVAL;
360 365
361 if (!PAGE_ALIGNED(addr)) { 366 if (!PAGE_ALIGNED(addr)) {
@@ -372,9 +377,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
372 if (chunks == 0) 377 if (chunks == 0)
373 return -EINVAL; 378 return -EINVAL;
374 379
375 chunks_per_page = PAGE_SIZE / chunk_size; 380 if (!unaligned_chunks) {
376 if (chunks < chunks_per_page || chunks % chunks_per_page) 381 chunks_per_page = PAGE_SIZE / chunk_size;
377 return -EINVAL; 382 if (chunks < chunks_per_page || chunks % chunks_per_page)
383 return -EINVAL;
384 }
378 385
379 headroom = ALIGN(headroom, 64); 386 headroom = ALIGN(headroom, 64);
380 387
@@ -383,13 +390,15 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
383 return -EINVAL; 390 return -EINVAL;
384 391
385 umem->address = (unsigned long)addr; 392 umem->address = (unsigned long)addr;
386 umem->chunk_mask = ~((u64)chunk_size - 1); 393 umem->chunk_mask = unaligned_chunks ? XSK_UNALIGNED_BUF_ADDR_MASK
394 : ~((u64)chunk_size - 1);
387 umem->size = size; 395 umem->size = size;
388 umem->headroom = headroom; 396 umem->headroom = headroom;
389 umem->chunk_size_nohr = chunk_size - headroom; 397 umem->chunk_size_nohr = chunk_size - headroom;
390 umem->npgs = size / PAGE_SIZE; 398 umem->npgs = size / PAGE_SIZE;
391 umem->pgs = NULL; 399 umem->pgs = NULL;
392 umem->user = NULL; 400 umem->user = NULL;
401 umem->flags = mr->flags;
393 INIT_LIST_HEAD(&umem->xsk_list); 402 INIT_LIST_HEAD(&umem->xsk_list);
394 spin_lock_init(&umem->xsk_list_lock); 403 spin_lock_init(&umem->xsk_list_lock);
395 404
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index ee4428a892fa..187fd157fcff 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -45,7 +45,7 @@ EXPORT_SYMBOL(xsk_umem_has_addrs);
45 45
46u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr) 46u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
47{ 47{
48 return xskq_peek_addr(umem->fq, addr); 48 return xskq_peek_addr(umem->fq, addr, umem);
49} 49}
50EXPORT_SYMBOL(xsk_umem_peek_addr); 50EXPORT_SYMBOL(xsk_umem_peek_addr);
51 51
@@ -115,21 +115,43 @@ bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
115} 115}
116EXPORT_SYMBOL(xsk_umem_uses_need_wakeup); 116EXPORT_SYMBOL(xsk_umem_uses_need_wakeup);
117 117
118/* If a buffer crosses a page boundary, we need to do 2 memcpy's, one for
119 * each page. This is only required in copy mode.
120 */
121static void __xsk_rcv_memcpy(struct xdp_umem *umem, u64 addr, void *from_buf,
122 u32 len, u32 metalen)
123{
124 void *to_buf = xdp_umem_get_data(umem, addr);
125
126 addr = xsk_umem_add_offset_to_addr(addr);
127 if (xskq_crosses_non_contig_pg(umem, addr, len + metalen)) {
128 void *next_pg_addr = umem->pages[(addr >> PAGE_SHIFT) + 1].addr;
129 u64 page_start = addr & ~(PAGE_SIZE - 1);
130 u64 first_len = PAGE_SIZE - (addr - page_start);
131
132 memcpy(to_buf, from_buf, first_len + metalen);
133 memcpy(next_pg_addr, from_buf + first_len, len - first_len);
134
135 return;
136 }
137
138 memcpy(to_buf, from_buf, len + metalen);
139}
140
118static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) 141static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
119{ 142{
120 void *to_buf, *from_buf; 143 u64 offset = xs->umem->headroom;
144 u64 addr, memcpy_addr;
145 void *from_buf;
121 u32 metalen; 146 u32 metalen;
122 u64 addr;
123 int err; 147 int err;
124 148
125 if (!xskq_peek_addr(xs->umem->fq, &addr) || 149 if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||
126 len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) { 150 len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
127 xs->rx_dropped++; 151 xs->rx_dropped++;
128 return -ENOSPC; 152 return -ENOSPC;
129 } 153 }
130 154
131 addr += xs->umem->headroom;
132
133 if (unlikely(xdp_data_meta_unsupported(xdp))) { 155 if (unlikely(xdp_data_meta_unsupported(xdp))) {
134 from_buf = xdp->data; 156 from_buf = xdp->data;
135 metalen = 0; 157 metalen = 0;
@@ -138,9 +160,11 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
138 metalen = xdp->data - xdp->data_meta; 160 metalen = xdp->data - xdp->data_meta;
139 } 161 }
140 162
141 to_buf = xdp_umem_get_data(xs->umem, addr); 163 memcpy_addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
142 memcpy(to_buf, from_buf, len + metalen); 164 __xsk_rcv_memcpy(xs->umem, memcpy_addr, from_buf, len, metalen);
143 addr += metalen; 165
166 offset += metalen;
167 addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
144 err = xskq_produce_batch_desc(xs->rx, addr, len); 168 err = xskq_produce_batch_desc(xs->rx, addr, len);
145 if (!err) { 169 if (!err) {
146 xskq_discard_addr(xs->umem->fq); 170 xskq_discard_addr(xs->umem->fq);
@@ -185,6 +209,7 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
185{ 209{
186 u32 metalen = xdp->data - xdp->data_meta; 210 u32 metalen = xdp->data - xdp->data_meta;
187 u32 len = xdp->data_end - xdp->data; 211 u32 len = xdp->data_end - xdp->data;
212 u64 offset = xs->umem->headroom;
188 void *buffer; 213 void *buffer;
189 u64 addr; 214 u64 addr;
190 int err; 215 int err;
@@ -196,17 +221,17 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
196 goto out_unlock; 221 goto out_unlock;
197 } 222 }
198 223
199 if (!xskq_peek_addr(xs->umem->fq, &addr) || 224 if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||
200 len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) { 225 len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
201 err = -ENOSPC; 226 err = -ENOSPC;
202 goto out_drop; 227 goto out_drop;
203 } 228 }
204 229
205 addr += xs->umem->headroom; 230 addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
206
207 buffer = xdp_umem_get_data(xs->umem, addr); 231 buffer = xdp_umem_get_data(xs->umem, addr);
208 memcpy(buffer, xdp->data_meta, len + metalen); 232 memcpy(buffer, xdp->data_meta, len + metalen);
209 addr += metalen; 233
234 addr = xsk_umem_adjust_offset(xs->umem, addr, metalen);
210 err = xskq_produce_batch_desc(xs->rx, addr, len); 235 err = xskq_produce_batch_desc(xs->rx, addr, len);
211 if (err) 236 if (err)
212 goto out_drop; 237 goto out_drop;
@@ -250,7 +275,7 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
250 275
251 rcu_read_lock(); 276 rcu_read_lock();
252 list_for_each_entry_rcu(xs, &umem->xsk_list, list) { 277 list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
253 if (!xskq_peek_desc(xs->tx, desc)) 278 if (!xskq_peek_desc(xs->tx, desc, umem))
254 continue; 279 continue;
255 280
256 if (xskq_produce_addr_lazy(umem->cq, desc->addr)) 281 if (xskq_produce_addr_lazy(umem->cq, desc->addr))
@@ -304,7 +329,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
304 if (xs->queue_id >= xs->dev->real_num_tx_queues) 329 if (xs->queue_id >= xs->dev->real_num_tx_queues)
305 goto out; 330 goto out;
306 331
307 while (xskq_peek_desc(xs->tx, &desc)) { 332 while (xskq_peek_desc(xs->tx, &desc, xs->umem)) {
308 char *buffer; 333 char *buffer;
309 u64 addr; 334 u64 addr;
310 u32 len; 335 u32 len;
@@ -333,7 +358,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
333 skb->dev = xs->dev; 358 skb->dev = xs->dev;
334 skb->priority = sk->sk_priority; 359 skb->priority = sk->sk_priority;
335 skb->mark = sk->sk_mark; 360 skb->mark = sk->sk_mark;
336 skb_shinfo(skb)->destructor_arg = (void *)(long)addr; 361 skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
337 skb->destructor = xsk_destruct_skb; 362 skb->destructor = xsk_destruct_skb;
338 363
339 err = dev_direct_xmit(skb, xs->queue_id); 364 err = dev_direct_xmit(skb, xs->queue_id);
@@ -526,6 +551,24 @@ static struct socket *xsk_lookup_xsk_from_fd(int fd)
526 return sock; 551 return sock;
527} 552}
528 553
554/* Check if umem pages are contiguous.
555 * If zero-copy mode, use the DMA address to do the page contiguity check
556 * For all other modes we use addr (kernel virtual address)
557 * Store the result in the low bits of addr.
558 */
559static void xsk_check_page_contiguity(struct xdp_umem *umem, u32 flags)
560{
561 struct xdp_umem_page *pgs = umem->pages;
562 int i, is_contig;
563
564 for (i = 0; i < umem->npgs - 1; i++) {
565 is_contig = (flags & XDP_ZEROCOPY) ?
566 (pgs[i].dma + PAGE_SIZE == pgs[i + 1].dma) :
567 (pgs[i].addr + PAGE_SIZE == pgs[i + 1].addr);
568 pgs[i].addr += is_contig << XSK_NEXT_PG_CONTIG_SHIFT;
569 }
570}
571
529static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) 572static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
530{ 573{
531 struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr; 574 struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
@@ -616,6 +659,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
616 err = xdp_umem_assign_dev(xs->umem, dev, qid, flags); 659 err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
617 if (err) 660 if (err)
618 goto out_unlock; 661 goto out_unlock;
662
663 xsk_check_page_contiguity(xs->umem, flags);
619 } 664 }
620 665
621 xs->dev = dev; 666 xs->dev = dev;
@@ -636,6 +681,13 @@ out_release:
636 return err; 681 return err;
637} 682}
638 683
684struct xdp_umem_reg_v1 {
685 __u64 addr; /* Start of packet data area */
686 __u64 len; /* Length of packet data area */
687 __u32 chunk_size;
688 __u32 headroom;
689};
690
639static int xsk_setsockopt(struct socket *sock, int level, int optname, 691static int xsk_setsockopt(struct socket *sock, int level, int optname,
640 char __user *optval, unsigned int optlen) 692 char __user *optval, unsigned int optlen)
641{ 693{
@@ -673,10 +725,16 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
673 } 725 }
674 case XDP_UMEM_REG: 726 case XDP_UMEM_REG:
675 { 727 {
676 struct xdp_umem_reg mr; 728 size_t mr_size = sizeof(struct xdp_umem_reg);
729 struct xdp_umem_reg mr = {};
677 struct xdp_umem *umem; 730 struct xdp_umem *umem;
678 731
679 if (copy_from_user(&mr, optval, sizeof(mr))) 732 if (optlen < sizeof(struct xdp_umem_reg_v1))
733 return -EINVAL;
734 else if (optlen < sizeof(mr))
735 mr_size = sizeof(struct xdp_umem_reg_v1);
736
737 if (copy_from_user(&mr, optval, mr_size))
680 return -EFAULT; 738 return -EFAULT;
681 739
682 mutex_lock(&xs->mutex); 740 mutex_lock(&xs->mutex);
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index d5e06c8e0cbf..9986a759fe06 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -56,7 +56,7 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
56 du.id = umem->id; 56 du.id = umem->id;
57 du.size = umem->size; 57 du.size = umem->size;
58 du.num_pages = umem->npgs; 58 du.num_pages = umem->npgs;
59 du.chunk_size = (__u32)(~umem->chunk_mask + 1); 59 du.chunk_size = umem->chunk_size_nohr + umem->headroom;
60 du.headroom = umem->headroom; 60 du.headroom = umem->headroom;
61 du.ifindex = umem->dev ? umem->dev->ifindex : 0; 61 du.ifindex = umem->dev ? umem->dev->ifindex : 0;
62 du.queue_id = umem->queue_id; 62 du.queue_id = umem->queue_id;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index dd9e985c2461..eddae4688862 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -134,6 +134,17 @@ static inline bool xskq_has_addrs(struct xsk_queue *q, u32 cnt)
134 134
135/* UMEM queue */ 135/* UMEM queue */
136 136
137static inline bool xskq_crosses_non_contig_pg(struct xdp_umem *umem, u64 addr,
138 u64 length)
139{
140 bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE;
141 bool next_pg_contig =
142 (unsigned long)umem->pages[(addr >> PAGE_SHIFT)].addr &
143 XSK_NEXT_PG_CONTIG_MASK;
144
145 return cross_pg && !next_pg_contig;
146}
147
137static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr) 148static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
138{ 149{
139 if (addr >= q->size) { 150 if (addr >= q->size) {
@@ -144,23 +155,51 @@ static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
144 return true; 155 return true;
145} 156}
146 157
147static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr) 158static inline bool xskq_is_valid_addr_unaligned(struct xsk_queue *q, u64 addr,
159 u64 length,
160 struct xdp_umem *umem)
161{
162 u64 base_addr = xsk_umem_extract_addr(addr);
163
164 addr = xsk_umem_add_offset_to_addr(addr);
165 if (base_addr >= q->size || addr >= q->size ||
166 xskq_crosses_non_contig_pg(umem, addr, length)) {
167 q->invalid_descs++;
168 return false;
169 }
170
171 return true;
172}
173
174static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr,
175 struct xdp_umem *umem)
148{ 176{
149 while (q->cons_tail != q->cons_head) { 177 while (q->cons_tail != q->cons_head) {
150 struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; 178 struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
151 unsigned int idx = q->cons_tail & q->ring_mask; 179 unsigned int idx = q->cons_tail & q->ring_mask;
152 180
153 *addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask; 181 *addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask;
182
183 if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
184 if (xskq_is_valid_addr_unaligned(q, *addr,
185 umem->chunk_size_nohr,
186 umem))
187 return addr;
188 goto out;
189 }
190
154 if (xskq_is_valid_addr(q, *addr)) 191 if (xskq_is_valid_addr(q, *addr))
155 return addr; 192 return addr;
156 193
194out:
157 q->cons_tail++; 195 q->cons_tail++;
158 } 196 }
159 197
160 return NULL; 198 return NULL;
161} 199}
162 200
163static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr) 201static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr,
202 struct xdp_umem *umem)
164{ 203{
165 if (q->cons_tail == q->cons_head) { 204 if (q->cons_tail == q->cons_head) {
166 smp_mb(); /* D, matches A */ 205 smp_mb(); /* D, matches A */
@@ -171,7 +210,7 @@ static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr)
171 smp_rmb(); 210 smp_rmb();
172 } 211 }
173 212
174 return xskq_validate_addr(q, addr); 213 return xskq_validate_addr(q, addr, umem);
175} 214}
176 215
177static inline void xskq_discard_addr(struct xsk_queue *q) 216static inline void xskq_discard_addr(struct xsk_queue *q)
@@ -230,8 +269,21 @@ static inline int xskq_reserve_addr(struct xsk_queue *q)
230 269
231/* Rx/Tx queue */ 270/* Rx/Tx queue */
232 271
233static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d) 272static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d,
273 struct xdp_umem *umem)
234{ 274{
275 if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
276 if (!xskq_is_valid_addr_unaligned(q, d->addr, d->len, umem))
277 return false;
278
279 if (d->len > umem->chunk_size_nohr || d->options) {
280 q->invalid_descs++;
281 return false;
282 }
283
284 return true;
285 }
286
235 if (!xskq_is_valid_addr(q, d->addr)) 287 if (!xskq_is_valid_addr(q, d->addr))
236 return false; 288 return false;
237 289
@@ -245,14 +297,15 @@ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)
245} 297}
246 298
247static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q, 299static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
248 struct xdp_desc *desc) 300 struct xdp_desc *desc,
301 struct xdp_umem *umem)
249{ 302{
250 while (q->cons_tail != q->cons_head) { 303 while (q->cons_tail != q->cons_head) {
251 struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; 304 struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
252 unsigned int idx = q->cons_tail & q->ring_mask; 305 unsigned int idx = q->cons_tail & q->ring_mask;
253 306
254 *desc = READ_ONCE(ring->desc[idx]); 307 *desc = READ_ONCE(ring->desc[idx]);
255 if (xskq_is_valid_desc(q, desc)) 308 if (xskq_is_valid_desc(q, desc, umem))
256 return desc; 309 return desc;
257 310
258 q->cons_tail++; 311 q->cons_tail++;
@@ -262,7 +315,8 @@ static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
262} 315}
263 316
264static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q, 317static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
265 struct xdp_desc *desc) 318 struct xdp_desc *desc,
319 struct xdp_umem *umem)
266{ 320{
267 if (q->cons_tail == q->cons_head) { 321 if (q->cons_tail == q->cons_head) {
268 smp_mb(); /* D, matches A */ 322 smp_mb(); /* D, matches A */
@@ -273,7 +327,7 @@ static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
273 smp_rmb(); /* C, matches B */ 327 smp_rmb(); /* C, matches B */
274 } 328 }
275 329
276 return xskq_validate_desc(q, desc); 330 return xskq_validate_desc(q, desc, umem);
277} 331}
278 332
279static inline void xskq_discard_desc(struct xsk_queue *q) 333static inline void xskq_discard_desc(struct xsk_queue *q)