aboutsummaryrefslogtreecommitdiffstats
path: root/net/xdp
diff options
context:
space:
mode:
authorBjörn Töpel <bjorn.topel@intel.com>2019-09-04 07:49:12 -0400
committerDaniel Borkmann <daniel@iogearbox.net>2019-09-05 08:11:52 -0400
commit42fddcc7c64b723a867c7b2f5f7505e244212f13 (patch)
tree8c99744f0ca7b013f3cec497f68a3f214a501eed /net/xdp
parent9764f4b301c3e7eb3b75eec85b73cad449cdbb0d (diff)
xsk: use state member for socket synchronization
Prior the state variable was introduced by Ilya, the dev member was used to determine whether the socket was bound or not. However, when dev was read, proper SMP barriers and READ_ONCE were missing. In order to address the missing barriers and READ_ONCE, we start using the state variable as a point of synchronization. The state member read/write is paired with proper SMP barriers, and from this follows that the members described above does not need READ_ONCE if used in conjunction with state check. In all syscalls and the xsk_rcv path we check if state is XSK_BOUND. If that is the case we do a SMP read barrier, and this implies that the dev, umem and all rings are correctly setup. Note that no READ_ONCE are needed for these variable if used when state is XSK_BOUND (plus the read barrier). To summarize: The members struct xdp_sock members dev, queue_id, umem, fq, cq, tx, rx, and state were read lock-less, with incorrect barriers and missing {READ, WRITE}_ONCE. Now, umem, fq, cq, tx, rx, and state are read lock-less. When these members are updated, WRITE_ONCE is used. When read, READ_ONCE are only used when read outside the control mutex (e.g. mmap) or, not synchronized with the state member (XSK_BOUND plus smp_rmb()) Note that dev and queue_id do not need a WRITE_ONCE or READ_ONCE, due to the introduce state synchronization (XSK_BOUND plus smp_rmb()). Introducing the state check also fixes a race, found by syzcaller, in xsk_poll() where umem could be accessed when stale. Suggested-by: Hillf Danton <hdanton@sina.com> Reported-by: syzbot+c82697e3043781e08802@syzkaller.appspotmail.com Fixes: 77cd0d7b3f25 ("xsk: add support for need_wakeup flag in AF_XDP rings") Signed-off-by: Björn Töpel <bjorn.topel@intel.com> Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'net/xdp')
-rw-r--r--net/xdp/xsk.c54
1 files changed, 39 insertions, 15 deletions
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 8c9056f06989..c2f1af3b6a7c 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -186,10 +186,23 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
186 return err; 186 return err;
187} 187}
188 188
189static bool xsk_is_bound(struct xdp_sock *xs)
190{
191 if (READ_ONCE(xs->state) == XSK_BOUND) {
192 /* Matches smp_wmb() in bind(). */
193 smp_rmb();
194 return true;
195 }
196 return false;
197}
198
189int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) 199int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
190{ 200{
191 u32 len; 201 u32 len;
192 202
203 if (!xsk_is_bound(xs))
204 return -EINVAL;
205
193 if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) 206 if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
194 return -EINVAL; 207 return -EINVAL;
195 208
@@ -387,7 +400,7 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
387 struct sock *sk = sock->sk; 400 struct sock *sk = sock->sk;
388 struct xdp_sock *xs = xdp_sk(sk); 401 struct xdp_sock *xs = xdp_sk(sk);
389 402
390 if (unlikely(!xs->dev)) 403 if (unlikely(!xsk_is_bound(xs)))
391 return -ENXIO; 404 return -ENXIO;
392 if (unlikely(!(xs->dev->flags & IFF_UP))) 405 if (unlikely(!(xs->dev->flags & IFF_UP)))
393 return -ENETDOWN; 406 return -ENETDOWN;
@@ -403,10 +416,15 @@ static unsigned int xsk_poll(struct file *file, struct socket *sock,
403 struct poll_table_struct *wait) 416 struct poll_table_struct *wait)
404{ 417{
405 unsigned int mask = datagram_poll(file, sock, wait); 418 unsigned int mask = datagram_poll(file, sock, wait);
406 struct sock *sk = sock->sk; 419 struct xdp_sock *xs = xdp_sk(sock->sk);
407 struct xdp_sock *xs = xdp_sk(sk); 420 struct net_device *dev;
408 struct net_device *dev = xs->dev; 421 struct xdp_umem *umem;
409 struct xdp_umem *umem = xs->umem; 422
423 if (unlikely(!xsk_is_bound(xs)))
424 return mask;
425
426 dev = xs->dev;
427 umem = xs->umem;
410 428
411 if (umem->need_wakeup) 429 if (umem->need_wakeup)
412 dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, 430 dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
@@ -442,10 +460,9 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
442{ 460{
443 struct net_device *dev = xs->dev; 461 struct net_device *dev = xs->dev;
444 462
445 if (!dev || xs->state != XSK_BOUND) 463 if (xs->state != XSK_BOUND)
446 return; 464 return;
447 465 WRITE_ONCE(xs->state, XSK_UNBOUND);
448 xs->state = XSK_UNBOUND;
449 466
450 /* Wait for driver to stop using the xdp socket. */ 467 /* Wait for driver to stop using the xdp socket. */
451 xdp_del_sk_umem(xs->umem, xs); 468 xdp_del_sk_umem(xs->umem, xs);
@@ -520,7 +537,9 @@ static int xsk_release(struct socket *sock)
520 local_bh_enable(); 537 local_bh_enable();
521 538
522 xsk_delete_from_maps(xs); 539 xsk_delete_from_maps(xs);
540 mutex_lock(&xs->mutex);
523 xsk_unbind_dev(xs); 541 xsk_unbind_dev(xs);
542 mutex_unlock(&xs->mutex);
524 543
525 xskq_destroy(xs->rx); 544 xskq_destroy(xs->rx);
526 xskq_destroy(xs->tx); 545 xskq_destroy(xs->tx);
@@ -632,12 +651,12 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
632 } 651 }
633 652
634 umem_xs = xdp_sk(sock->sk); 653 umem_xs = xdp_sk(sock->sk);
635 if (!umem_xs->umem) { 654 if (!xsk_is_bound(umem_xs)) {
636 /* No umem to inherit. */
637 err = -EBADF; 655 err = -EBADF;
638 sockfd_put(sock); 656 sockfd_put(sock);
639 goto out_unlock; 657 goto out_unlock;
640 } else if (umem_xs->dev != dev || umem_xs->queue_id != qid) { 658 }
659 if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
641 err = -EINVAL; 660 err = -EINVAL;
642 sockfd_put(sock); 661 sockfd_put(sock);
643 goto out_unlock; 662 goto out_unlock;
@@ -671,10 +690,15 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
671 xdp_add_sk_umem(xs->umem, xs); 690 xdp_add_sk_umem(xs->umem, xs);
672 691
673out_unlock: 692out_unlock:
674 if (err) 693 if (err) {
675 dev_put(dev); 694 dev_put(dev);
676 else 695 } else {
677 xs->state = XSK_BOUND; 696 /* Matches smp_rmb() in bind() for shared umem
697 * sockets, and xsk_is_bound().
698 */
699 smp_wmb();
700 WRITE_ONCE(xs->state, XSK_BOUND);
701 }
678out_release: 702out_release:
679 mutex_unlock(&xs->mutex); 703 mutex_unlock(&xs->mutex);
680 rtnl_unlock(); 704 rtnl_unlock();
@@ -927,7 +951,7 @@ static int xsk_mmap(struct file *file, struct socket *sock,
927 unsigned long pfn; 951 unsigned long pfn;
928 struct page *qpg; 952 struct page *qpg;
929 953
930 if (xs->state != XSK_READY) 954 if (READ_ONCE(xs->state) != XSK_READY)
931 return -EBUSY; 955 return -EBUSY;
932 956
933 if (offset == XDP_PGOFF_RX_RING) { 957 if (offset == XDP_PGOFF_RX_RING) {